foedus_code/numa__node__memory_8cpp_source.html

 /*

  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.

  * This program is free software; you can redistribute it and/or modify it

  * under the terms of the GNU General Public License as published by the Free

  * Software Foundation; either version 2 of the License, or (at your option)

  * any later version.

  *

  * This program is distributed in the hope that it will be useful, but WITHOUT

  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for

  * more details. You should have received a copy of the GNU General Public

  * License along with this program; if not, write to the Free Software

  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

  *

  * HP designates this particular file as subject to the "Classpath" exception

  * as provided by HP in the LICENSE.txt file that accompanied this code.

  */

 #include "foedus/memory/numa_node_memory.hpp"


 #include <numa.h>

 #include <glog/logging.h>


 #include <iostream>

 #include <sstream>

 #include <string>


 #include "foedus/assert_nd.hpp"

 #include "foedus/engine.hpp"

 #include "foedus/engine_options.hpp"

 #include "foedus/error_stack_batch.hpp"

 #include "foedus/assorted/assorted_func.hpp"

 #include "foedus/cache/cache_hashtable.hpp"

 #include "foedus/memory/numa_core_memory.hpp"

 #include "foedus/memory/page_pool.hpp"

 #include "foedus/soc/shared_memory_repo.hpp"

 #include "foedus/soc/soc_manager.hpp"

 #include "foedus/thread/thread_options.hpp"


 namespace foedus {

 namespace memory {

 NumaNodeMemory::NumaNodeMemory(Engine* engine, thread::ThreadGroupId numa_node)

   : engine_(engine),

     numa_node_(numa_node),

     cores_(engine_->get_options().thread_.thread_count_per_group_),

     loggers_(engine_->get_options().log_.loggers_per_node_),

     snapshot_cache_table_(nullptr) {

 }


 int64_t get_numa_node_size(int node) {

   if (::numa_available() < 0) {

     return 0;

   } else {

     return ::numa_node_size(node, nullptr);

   }

 }


 ErrorStack NumaNodeMemory::initialize_once() {

   LOG(INFO) << "Initializing NumaNodeMemory for node " << static_cast<int>(numa_node_) << "."

     << " BEFORE: numa_node_size=" << get_numa_node_size(numa_node_);


   // volatile pool is placed on the shared memory

   soc::SharedMemoryRepo* memory_repo = engine_->get_soc_manager()->get_shared_memory_repo();

   uint64_t volatile_size =

     static_cast<uint64_t>(engine_->get_options().memory_.page_pool_size_mb_per_node_) << 20;

   volatile_pool_.attach(

     memory_repo->get_node_memory_anchors(numa_node_)->volatile_pool_status_,

     memory_repo->get_volatile_pool(numa_node_),

     volatile_size,

     true,

     engine_->get_options().memory_.rigorous_page_boundary_check_);

   volatile_pool_.set_debug_pool_name(

     std::string("VolatilePool-")

     + std::to_string(static_cast<int>(numa_node_)));


   // snapshot pool is SOC-local

   uint64_t snapshot_pool_bytes

     = static_cast<uint64_t>(engine_->get_options().cache_.snapshot_cache_size_mb_per_node_) << 20;

   if (engine_->get_options().memory_.rigorous_page_boundary_check_) {

     // mprotect raises EINVAL if the underlying pages are hugepages.

     LOG(INFO) << "rigorous_page_boundary_check_ is specified, so disabled hugepages.";

     allocate_numa_memory(snapshot_pool_bytes, &snapshot_pool_memory_);

   } else {

     allocate_huge_numa_memory(snapshot_pool_bytes, &snapshot_pool_memory_);

   }

   snapshot_pool_control_block_.alloc(1 << 12, 1 << 12, AlignedMemory::kNumaAllocOnnode, numa_node_);

   snapshot_pool_.attach(

     reinterpret_cast<PagePoolControlBlock*>(snapshot_pool_control_block_.get_block()),

     snapshot_pool_memory_.get_block(),

     snapshot_pool_memory_.get_size(),

     true,

     engine_->get_options().memory_.rigorous_page_boundary_check_);

   snapshot_pool_.set_debug_pool_name(

     std::string("SnapshotPool-")

     + std::to_string(static_cast<int>(numa_node_)));


   CHECK_ERROR(volatile_pool_.initialize());

   CHECK_ERROR(snapshot_pool_.initialize());


   // snapshot_pool_ consumes #pages * 4kb bytes of memory.

   // CacheBucket is 16 bytes, so even with 32-fold (3% full hashtable), we spend only

   // #pages * 0.5kb for hash buckets. This is a neligible overhead.

   uint64_t cache_hashtable_buckets = (snapshot_pool_.get_memory_size() / storage::kPageSize) * 32;

   snapshot_cache_table_ = new cache::CacheHashtable(cache_hashtable_buckets, numa_node_);

   CHECK_ERROR(initialize_page_offset_chunk_memory());

   CHECK_ERROR(initialize_log_buffers_memory());

   for (auto ordinal = 0; ordinal < cores_; ++ordinal) {

     CHECK_ERROR(initialize_core_memory(ordinal));

   }

   ASSERT_ND(volatile_pool_.is_initialized());

   ASSERT_ND(snapshot_pool_.is_initialized());

   ASSERT_ND(core_memories_.size() == cores_);

   ASSERT_ND(volatile_offset_chunk_memory_pieces_.size() == cores_);

   ASSERT_ND(snapshot_offset_chunk_memory_pieces_.size() == cores_);

   ASSERT_ND(log_buffer_memory_pieces_.size() == cores_);


   LOG(INFO) << "Initialized NumaNodeMemory for node " << static_cast<int>(numa_node_) << "."

     << " AFTER: numa_node_size=" << get_numa_node_size(numa_node_);

   return kRetOk;

 }

 ErrorStack NumaNodeMemory::initialize_page_offset_chunk_memory() {

   size_t size_per_core = sizeof(PagePoolOffsetChunk) * 2;

   size_t total_size = size_per_core * cores_;

   LOG(INFO) << "Initializing page_offset_chunk_memory_. total_size=" << total_size << " bytes";

   if (total_size < kHugepageSize) {

     // Just one per NUMA node. Not a significant waste.

     total_size = kHugepageSize;

     LOG(INFO) << "Allocating extra space to utilize hugepage.";

   }

   CHECK_ERROR(allocate_huge_numa_memory(total_size, &volatile_offset_chunk_memory_));

   CHECK_ERROR(allocate_huge_numa_memory(total_size, &snapshot_offset_chunk_memory_));

   for (auto ordinal = 0; ordinal < cores_; ++ordinal) {

     {

       PagePoolOffsetChunk* chunk = reinterpret_cast<PagePoolOffsetChunk*>(

         volatile_offset_chunk_memory_.get_block()) + ordinal;

       chunk->clear();

       volatile_offset_chunk_memory_pieces_.push_back(chunk);

     }

     {

       PagePoolOffsetChunk* chunk = reinterpret_cast<PagePoolOffsetChunk*>(

         snapshot_offset_chunk_memory_.get_block()) + ordinal;

       chunk->clear();

       snapshot_offset_chunk_memory_pieces_.push_back(chunk);

     }

   }


   return kRetOk;

 }


 ErrorStack NumaNodeMemory::initialize_log_buffers_memory() {

   uint64_t size_per_core_ = static_cast<uint64_t>(engine_->get_options().log_.log_buffer_kb_) << 10;

   uint64_t private_total = (cores_ * size_per_core_);

   LOG(INFO) << "Initializing log_buffer_memory_. total_size=" << private_total;

   CHECK_ERROR(allocate_huge_numa_memory(private_total, &log_buffer_memory_));

   LOG(INFO) << "log_buffer_memory_ allocated. addr=" << log_buffer_memory_.get_block();

   for (auto ordinal = 0; ordinal < cores_; ++ordinal) {

     AlignedMemorySlice piece(&log_buffer_memory_, size_per_core_ * ordinal, size_per_core_);

     LOG(INFO) << "log_buffer_piece[" << ordinal << "] addr=" << piece.get_block();

     log_buffer_memory_pieces_.push_back(piece);

   }


   return kRetOk;

 }


 ErrorStack NumaNodeMemory::initialize_core_memory(thread::ThreadLocalOrdinal ordinal) {

   auto core_id = thread::compose_thread_id(numa_node_, ordinal);

   NumaCoreMemory* core_memory = new NumaCoreMemory(engine_, this, core_id);

   core_memories_.push_back(core_memory);

   CHECK_ERROR(core_memory->initialize());

   return kRetOk;

 }


 ErrorStack NumaNodeMemory::uninitialize_once() {

   LOG(INFO) << "Uninitializing NumaNodeMemory for node " << static_cast<int>(numa_node_) << "."

     << " BEFORE: numa_node_size=" << get_numa_node_size(numa_node_);


   ErrorStackBatch batch;

   batch.uninitialize_and_delete_all(&core_memories_);

   volatile_offset_chunk_memory_pieces_.clear();

   volatile_offset_chunk_memory_.release_block();

   snapshot_offset_chunk_memory_pieces_.clear();

   snapshot_offset_chunk_memory_.release_block();

   log_buffer_memory_pieces_.clear();

   log_buffer_memory_.release_block();

   if (snapshot_cache_table_) {

     delete snapshot_cache_table_;

     snapshot_cache_table_ = nullptr;

   }

   batch.emprace_back(volatile_pool_.uninitialize());

   batch.emprace_back(snapshot_pool_.uninitialize());

   snapshot_pool_memory_.release_block();

   snapshot_pool_control_block_.release_block();


   LOG(INFO) << "Uninitialized NumaNodeMemory for node " << static_cast<int>(numa_node_) << "."

     << " AFTER: numa_node_size=" << get_numa_node_size(numa_node_);

   return SUMMARIZE_ERROR_BATCH(batch);

 }


 ErrorStack NumaNodeMemory::allocate_numa_memory_general(

   uint64_t size,

   uint64_t alignment,

   AlignedMemory *out) const {

   ASSERT_ND(out);

   if (engine_->get_options().memory_.use_mmap_hugepages_ &&

     alignment >= kHugepageSize

     && size >= (1ULL << 30) * 8 / 10) {

     LOG(INFO) << "This is a big memory allocation. Let's use the mmap hugepage (1GB pages)";

     out->alloc(size, 1ULL << 30, AlignedMemory::kNumaMmapOneGbPages, numa_node_);

   } else {

     out->alloc(size, alignment, AlignedMemory::kNumaAllocOnnode, numa_node_);

   }

   if (out->is_null()) {

     return ERROR_STACK(kErrorCodeOutofmemory);

   }

   return kRetOk;

 }


 std::string NumaNodeMemory::dump_free_memory_stat() const {

   std::stringstream ret;

   PagePool::Stat volatile_stat = volatile_pool_.get_stat();

   ret << "    Volatile-Pool: " << volatile_stat.allocated_pages_ << " allocated pages, "

     << volatile_stat.total_pages_ << " total pages, "

     << (volatile_stat.total_pages_ - volatile_stat.allocated_pages_) << " free pages"

     << std::endl;

   PagePool::Stat snapshot_stat = snapshot_pool_.get_stat();

   ret << "    Snapshot-Pool: " << snapshot_stat.allocated_pages_ << " allocated pages, "

     << snapshot_stat.total_pages_ << " total pages, "

     << (snapshot_stat.total_pages_ - snapshot_stat.allocated_pages_) << " free pages"

     << std::endl;

   return ret.str();

 }


 NumaNodeMemoryRef::NumaNodeMemoryRef(Engine* engine, thread::ThreadGroupId numa_node)

   : engine_(engine), numa_node_(numa_node) {

   soc::SharedMemoryRepo* memory_repo = engine->get_soc_manager()->get_shared_memory_repo();

   volatile_pool_.attach(

     memory_repo->get_node_memory_anchors(numa_node)->volatile_pool_status_,

     memory_repo->get_volatile_pool(numa_node),

     static_cast<uint64_t>(engine->get_options().memory_.page_pool_size_mb_per_node_) << 20,

     false,

     false);

 }


 std::string NumaNodeMemoryRef::dump_free_memory_stat() const {

   std::stringstream ret;

   PagePool::Stat volatile_stat = volatile_pool_.get_stat();

   ret << "    Volatile-Pool: " << volatile_stat.allocated_pages_ << " allocated pages, "

     << volatile_stat.total_pages_ << " total pages, "

     << (volatile_stat.total_pages_ - volatile_stat.allocated_pages_) << " free pages"

     << std::endl;

   return ret.str();

 }


 }  // namespace memory

 }  // namespace foedus

assorted_func.hpp

foedus::memory::PagePool::attach
void attach(PagePoolControlBlock *control_block, void *memory, uint64_t memory_size, bool owns, bool rigorous_page_boundary_check)
Definition: page_pool.cpp:101

foedus::memory::NumaNodeMemoryRef::dump_free_memory_stat
std::string dump_free_memory_stat() const
Report rough statistics of free memory.
Definition: numa_node_memory.cpp:246

foedus::memory::PagePool::Stat::allocated_pages_
uint64_t allocated_pages_
Definition: page_pool.hpp:177

foedus::kErrorCodeOutofmemory
0x0001 : "GENERAL: Out of memory" .
Definition: error_code.hpp:105

foedus::memory::NumaNodeMemory::allocate_numa_memory
ErrorStack allocate_numa_memory(uint64_t size, AlignedMemory *out) const
Definition: numa_node_memory.hpp:85

foedus::memory::AlignedMemory::kNumaAllocOnnode
numa_alloc_onnode() and numa_free().
Definition: aligned_memory.hpp:83

foedus::thread::ThreadLocalOrdinal
uint8_t ThreadLocalOrdinal
Typedef for a local ID of Thread (core), which is NOT unique across NUMA nodes.
Definition: thread_id.hpp:58

shared_memory_repo.hpp

foedus::ErrorStackBatch::emprace_back
void emprace_back(ErrorStack &&error_stack)
If the given ErrorStack is an error, this method adds it to the end of this batch.
Definition: error_stack_batch.hpp:83

ERROR_STACK
#define ERROR_STACK(e)
Instantiates ErrorStack with the given foedus::error_code, creating an error stack with the current f...
Definition: error_stack.hpp:480

foedus::memory::AlignedMemory::kNumaMmapOneGbPages
Usual new()/delete().
Definition: aligned_memory.hpp:100

foedus::memory::get_numa_node_size
int64_t get_numa_node_size(int node)
Definition: numa_node_memory.cpp:49

foedus::memory::AlignedMemory::release_block
void release_block()
Releases the memory block.
Definition: aligned_memory.cpp:235

foedus
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44

foedus::memory::PagePool::get_stat
Stat get_stat() const
Definition: page_pool.cpp:118

numa.h

foedus::memory::NumaNodeMemoryRef::NumaNodeMemoryRef
NumaNodeMemoryRef()=delete

engine.hpp

foedus::ErrorStack
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81

foedus::soc::SharedMemoryRepo::get_volatile_pool
void * get_volatile_pool(SocId node)
Definition: shared_memory_repo.hpp:532

foedus::memory::AlignedMemory::alloc
void alloc(uint64_t size, uint64_t alignment, AllocType alloc_type, int numa_node) noexcept
Allocate a memory, releasing the current memory if exists.
Definition: aligned_memory.cpp:113

numa_node_size
long numa_node_size(int node, long *freep)

foedus::soc::SharedMemoryRepo::get_node_memory_anchors
NodeMemoryAnchors * get_node_memory_anchors(SocId node)
Definition: shared_memory_repo.hpp:521

foedus::memory::NumaNodeMemory::dump_free_memory_stat
std::string dump_free_memory_stat() const
Report rough statistics of free memory.
Definition: numa_node_memory.cpp:220

foedus::memory::NumaNodeMemory::initialize_once
ErrorStack initialize_once() override
Definition: numa_node_memory.cpp:57

foedus::memory::PagePool::uninitialize
ErrorStack uninitialize() override
An idempotent method to release all resources of this object, if any.
Definition: page_pool.cpp:116

cache_hashtable.hpp

numa_node_memory.hpp

foedus::Engine::get_options
const EngineOptions & get_options() const
Definition: engine.cpp:39

foedus::memory::kHugepageSize
const uint64_t kHugepageSize
So far 2MB is the only page size available via Transparent Huge Page (THP).
Definition: memory_id.hpp:50

foedus::ErrorStackBatch
Batches zero or more ErrorStack objects to represent in one ErrorStack.
Definition: error_stack_batch.hpp:34

engine_options.hpp

foedus::EngineOptions::memory_
memory::MemoryOptions memory_
Definition: engine_options.hpp:139

foedus::memory::PagePool::initialize
ErrorStack initialize() override
Acquires resources in this object, usually called right after constructor.
Definition: page_pool.cpp:114

numa_available
int numa_available(void)

foedus::memory::PagePool::Stat::total_pages_
uint64_t total_pages_
Definition: page_pool.hpp:176

foedus::log::LogOptions::log_buffer_kb_
uint32_t log_buffer_kb_
Size in KB of log buffer for each worker thread.
Definition: log_options.hpp:83

error_stack_batch.hpp

numa_core_memory.hpp

page_pool.hpp

foedus::Engine
Database engine object that holds all resources and provides APIs.
Definition: engine.hpp:109

assert_nd.hpp

foedus::memory::NumaNodeMemory::allocate_huge_numa_memory
ErrorStack allocate_huge_numa_memory(uint64_t size, AlignedMemory *out) const
Definition: numa_node_memory.hpp:88

foedus::cache::CacheHashtable
A NUMA-local hashtable of cached snapshot pages.
Definition: cache_hashtable.hpp:233

foedus::memory::AlignedMemory::get_block
void * get_block() const
Returns the memory block.
Definition: aligned_memory.hpp:168

foedus::soc::SharedMemoryRepo
Repository of all shared memory in one FOEDUS instance.
Definition: shared_memory_repo.hpp:463

foedus::memory::AlignedMemory::get_size
uint64_t get_size() const
Returns the byte size of the memory block.
Definition: aligned_memory.hpp:172

foedus::thread::compose_thread_id
ThreadId compose_thread_id(ThreadGroupId node, ThreadLocalOrdinal local_core)
Returns a globally unique ID of Thread (core) for the given node and ordinal in the node...
Definition: thread_id.hpp:123

SUMMARIZE_ERROR_BATCH
#define SUMMARIZE_ERROR_BATCH(x)
This macro calls ErrorStackBatch::summarize() with automatically provided parameters.
Definition: error_stack_batch.hpp:136

foedus::memory::NumaNodeMemory::NumaNodeMemory
NumaNodeMemory()=delete

foedus::memory::MemoryOptions::use_mmap_hugepages_
bool use_mmap_hugepages_
Whether to use non-transparent hugepages for big memories (1GB huge pages).
Definition: memory_options.hpp:85

foedus::memory::PagePoolOffsetChunk
To reduce the overhead of grabbing/releasing pages from pool, we pack this many pointers for each gra...
Definition: page_pool.hpp:47

CHECK_ERROR
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
Definition: error_stack.hpp:517

foedus::memory::MemoryOptions::page_pool_size_mb_per_node_
uint32_t page_pool_size_mb_per_node_
Size of the page pool in MB per each NUMA node.
Definition: memory_options.hpp:122

foedus::memory::AlignedMemory
Represents one memory block aligned to actual OS/hardware pages.
Definition: aligned_memory.hpp:67

foedus::ErrorStackBatch::uninitialize_and_delete_all
void uninitialize_and_delete_all(std::vector< T * > *vec)
A convenience method to uninitialize and delete all Initializable objects in a vector, storing all errors in this batch.
Definition: error_stack_batch.hpp:99

foedus::kRetOk
const ErrorStack kRetOk
Normal return value for no-error case.
Definition: error_stack.hpp:251

thread_options.hpp

foedus::Engine::get_soc_manager
soc::SocManager * get_soc_manager() const
See SOC and IPC.
Definition: engine.cpp:59

foedus::memory::PagePool::get_memory_size
uint64_t get_memory_size() const
Definition: page_pool.cpp:117

foedus::memory::NumaNodeMemory::uninitialize_once
ErrorStack uninitialize_once() override
Definition: numa_node_memory.cpp:175

foedus::cache::CacheOptions::snapshot_cache_size_mb_per_node_
uint32_t snapshot_cache_size_mb_per_node_
Size of the snapshot cache in MB per each NUMA node.
Definition: cache_options.hpp:58

foedus::memory::PagePool::is_initialized
bool is_initialized() const  override
Returns whether the object has been already initialized or not.
Definition: page_pool.cpp:115

ASSERT_ND
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72

foedus::thread::ThreadGroupId
uint8_t ThreadGroupId
Typedef for an ID of ThreadGroup (NUMA node).
Definition: thread_id.hpp:38

foedus::memory::MemoryOptions::rigorous_page_boundary_check_
bool rigorous_page_boundary_check_
Whether to use mprotect() for page boundaries to detect bogus memory accesses.
Definition: memory_options.hpp:114

foedus::soc::NodeMemoryAnchors::volatile_pool_status_
memory::PagePoolControlBlock * volatile_pool_status_
PagePool's status and its synchronization mechanism for the volatile pool on this node...
Definition: shared_memory_repo.hpp:344

foedus::EngineOptions::log_
log::LogOptions log_
Definition: engine_options.hpp:138

foedus::memory::PagePool::Stat
Definition: page_pool.hpp:175

foedus::storage::kPageSize
const uint16_t kPageSize
A constant defining the page size (in bytes) of both snapshot pages and volatile pages.
Definition: storage_id.hpp:45

soc_manager.hpp

foedus::memory::NumaNodeMemory::allocate_numa_memory_general
ErrorStack allocate_numa_memory_general(uint64_t size, uint64_t alignment, AlignedMemory *out) const
Allocate a memory of the given size on this NUMA node.
Definition: numa_node_memory.cpp:201

foedus::memory::PagePool::set_debug_pool_name
void set_debug_pool_name(const std::string &name)
Call this anytime after attach()
Definition: page_pool.cpp:122

foedus::EngineOptions::cache_
cache::CacheOptions cache_
Definition: engine_options.hpp:136

foedus::soc::SocManager::get_shared_memory_repo
SharedMemoryRepo * get_shared_memory_repo()
Returns the shared memories maintained across SOCs.
Definition: soc_manager.cpp:38

foedus::memory::AlignedMemory::is_null
bool is_null() const
Returns if this object doesn't hold a valid memory block.
Definition: aligned_memory.hpp:170