38 std::string pid_str = std::to_string(upid);
39 std::string eid_str = std::to_string(eid);
40 return std::string(
"/tmp/libfoedus_shm_") + pid_str + std::string(
"_") + eid_str;
43 std::string pid_str = std::to_string(master_upid);
44 std::string eid_str = std::to_string(master_eid);
45 return std::string(
"/tmp/libfoedus_shm_") + pid_str + std::string(
"_") + eid_str;
65 uint64_t
align_4kb(uint64_t value) {
return assorted::align< uint64_t, (1U << 12) >(value); }
66 uint64_t
align_2mb(uint64_t value) {
return assorted::align< uint64_t, (1U << 21) >(value); }
68 void SharedMemoryRepo::allocate_one_node(
72 uint64_t node_memory_size,
73 bool rigorous_memory_boundary_check,
74 bool rigorous_page_boundary_check,
76 SharedMemoryRepo* repo) {
78 std::string node_memory_path
79 =
get_self_path(upid, eid) + std::string(
"_node_") + std::to_string(node);
80 bool use_hugepages =
true;
81 if (rigorous_memory_boundary_check || rigorous_page_boundary_check) {
83 use_hugepages =
false;
85 *alloc_result = repo->node_memories_[node].alloc(
91 repo->node_memories_[node].release_block();
104 std::stringstream options_stream;
106 std::string xml(options_stream.str());
107 uint64_t xml_size = xml.size();
111 std::string global_memory_path =
get_self_path(upid, eid) + std::string(
"_global");
113 CHECK_ERROR(global_memory_.
alloc(global_memory_path, global_memory_size, 0, global_hugepages));
117 set_global_memory_anchors(xml_size, options,
true);
121 std::memcpy(global_memory_.
get_block(), &xml_size,
sizeof(xml_size));
122 std::memcpy(global_memory_.
get_block() +
sizeof(xml_size), xml.data(), xml_size);
127 std::vector< std::thread > alloc_threads;
128 for (uint16_t node = 0; node < soc_count_; ++node) {
129 alloc_threads.emplace_back(std::thread(
130 SharedMemoryRepo::allocate_one_node,
137 alloc_results + node,
143 for (uint16_t node = 0; node < soc_count_; ++node) {
144 alloc_threads[node].join();
145 if (alloc_results[node].is_error()) {
146 std::cerr <<
"[FOEDUS] Failed to allocate node shared memory for node-" << node
147 <<
". " << alloc_results[node] << std::endl;
148 last_error = alloc_results[node];
158 for (uint16_t node = 0; node < soc_count_; ++node) {
159 set_node_memory_anchors(node, options,
true);
166 uint64_t master_upid,
173 std::string global_memory_path = base + std::string(
"_global");
175 global_memory_.
attach(global_memory_path, global_hugepages);
176 if (global_memory_.
is_null()) {
182 uint64_t xml_size = 0;
183 std::memcpy(&xml_size, global_memory_.
get_block(),
sizeof(xml_size));
185 std::string xml(global_memory_.
get_block() +
sizeof(xml_size), xml_size);
188 my_soc_id_ = my_soc_id;
189 init_empty(*options);
190 set_global_memory_anchors(xml_size, *options,
false);
193 for (uint16_t node = 0; node < soc_count_; ++node) {
194 std::string node_memory_str = base + std::string(
"_node_") + std::to_string(node);
196 if (node_memories_[node].is_null()) {
199 set_node_memory_anchors(node, *options,
false);
204 if (!node_memories_[my_soc_id].is_null()) {
217 for (uint16_t i = 0; i < soc_count_; ++i) {
218 if (node_memories_) {
226 if (!global_memory_.
is_null()) {
235 global_memory_anchors_.
clear();
240 for (uint16_t i = 0; i < soc_count_; ++i) {
241 if (node_memories_) {
242 if (node_memory_anchors_[i].protected_boundaries_needs_release_) {
254 if (node_memories_) {
255 delete[] node_memories_;
256 node_memories_ =
nullptr;
258 if (node_memory_anchors_) {
259 delete[] node_memory_anchors_;
260 node_memory_anchors_ =
nullptr;
265 void SharedMemoryRepo::init_empty(
const EngineOptions& options) {
269 for (uint16_t node = 0; node < soc_count_; ++node) {
274 void SharedMemoryRepo::set_global_memory_anchors(
276 const EngineOptions& options,
277 bool reset_boundaries) {
281 global_memory_anchors_.
options_xml_ = base +
sizeof(uint64_t);
282 total +=
align_4kb(
sizeof(uint64_t) + xml_size);
283 put_global_memory_boundary(&total,
"options_xml_boundary", reset_boundaries);
286 =
reinterpret_cast<MasterEngineStatus*
>(base + total);
288 put_global_memory_boundary(&total,
"master_status_memory_boundary", reset_boundaries);
291 =
reinterpret_cast<log::LogManagerControlBlock*
>(base + total);
293 put_global_memory_boundary(&total,
"log_manager_memory_boundary", reset_boundaries);
296 =
reinterpret_cast<log::MetaLogControlBlock*
>(base + total);
298 put_global_memory_boundary(&total,
"meta_logger_memory_boundary", reset_boundaries);
301 =
reinterpret_cast<restart::RestartManagerControlBlock*
>(base + total);
303 put_global_memory_boundary(&total,
"restart_manager_memory_boundary", reset_boundaries);
306 =
reinterpret_cast<savepoint::SavepointManagerControlBlock*
>(base + total);
308 put_global_memory_boundary(&total,
"savepoint_manager_memory_boundary", reset_boundaries);
311 =
reinterpret_cast<snapshot::SnapshotManagerControlBlock*
>(base + total);
313 put_global_memory_boundary(&total,
"snapshot_manager_memory_boundary", reset_boundaries);
316 =
reinterpret_cast<storage::StorageManagerControlBlock*
>(base + total);
318 put_global_memory_boundary(&total,
"storage_manager_memory_boundary", reset_boundaries);
321 =
reinterpret_cast<xct::XctManagerControlBlock*
>(base + total);
323 put_global_memory_boundary(&total,
"xct_manager_memory_boundary", reset_boundaries);
326 =
reinterpret_cast<storage::PartitionerMetadata*
>(base + total);
327 total +=
align_4kb(
sizeof(storage::PartitionerMetadata) * options.storage_.max_storages_);
328 put_global_memory_boundary(&total,
"partitioner_metadata_boundary", reset_boundaries);
330 total +=
static_cast<uint64_t
>(options.storage_.partitioner_data_memory_mb_) << 20;
331 put_global_memory_boundary(&total,
"partitioner_data_boundary", reset_boundaries);
336 put_global_memory_boundary(&total,
"storage_name_sort_memory_boundary", reset_boundaries);
339 =
reinterpret_cast<storage::StorageControlBlock*
>(base + total);
341 * options.storage_.max_storages_;
342 put_global_memory_boundary(&total,
"storage_memories_boundary", reset_boundaries);
345 total +=
align_4kb(1024ULL * options.soc_.shared_user_memory_size_kb_);
346 put_global_memory_boundary(&total,
"user_memory_boundary", reset_boundaries);
350 std::cerr <<
"[FOEDUS] global memory size doesn't match. bug?"
352 <<
", expected=" << total << std::endl;
355 if (options.memory_.rigorous_memory_boundary_check_) {
359 boundary->acquire_protect();
370 total +=
align_4kb(
sizeof(xml_size) + xml_size) + kBoundarySize;
395 void SharedMemoryRepo::set_node_memory_anchors(
398 bool reset_boundaries) {
399 char* base = node_memories_[node].
get_block();
404 put_node_memory_boundary(node, &total,
"node_child_status_memory_boundary", reset_boundaries);
408 put_node_memory_boundary(node, &total,
"node_volatile_pool_status_boundary", reset_boundaries);
412 put_node_memory_boundary(node, &total,
"node_proc_manager_memory_boundary", reset_boundaries);
416 put_node_memory_boundary(node, &total,
"node_proc_memory_boundary", reset_boundaries);
420 put_node_memory_boundary(node, &total,
"node_proc_name_sort_memory_boundary", reset_boundaries);
424 put_node_memory_boundary(node, &total,
"node_log_reducer_memory_boundary", reset_boundaries);
428 put_node_memory_boundary(
431 "node_log_reducer_root_info_pages_boundary",
437 put_node_memory_boundary(node, &total,
"node_logger_memories_boundary", reset_boundaries);
444 put_node_memory_boundary(node, &total,
"thread_memory_boundary", reset_boundaries);
446 thread_anchor.task_input_memory_ = base + total;
448 put_node_memory_boundary(node, &total,
"thread_task_input_memory_boundary", reset_boundaries);
450 thread_anchor.task_output_memory_ = base + total;
452 put_node_memory_boundary(node, &total,
"thread_task_output_memory_boundary", reset_boundaries);
454 thread_anchor.mcs_ww_lock_memories_ =
reinterpret_cast<xct::McsWwBlock*
>(base + total);
456 put_node_memory_boundary(node, &total,
"thread_mcs_lock_memories_boundary", reset_boundaries);
458 thread_anchor.mcs_rw_simple_lock_memories_
461 put_node_memory_boundary(
462 node, &total,
"thread_mcs_rw_simple_lock_memories_boundary", reset_boundaries);
463 thread_anchor.mcs_rw_extended_lock_memories_
466 put_node_memory_boundary(
467 node, &total,
"thread_mcs_rw_extended_lock_memories_boundary", reset_boundaries);
469 thread_anchor.mcs_rw_async_mappings_memories_
472 put_node_memory_boundary(
473 node, &total,
"thread_mcs_rw_async_mappings_memories_boundary", reset_boundaries);
477 uint64_t reducer_buffer_size
481 total += reducer_buffer_size;
482 put_node_memory_boundary(node, &total,
"node_log_reducer_buffers_boundary", reset_boundaries);
487 put_node_memory_boundary(node, &total,
"volatile_pool_boundary", reset_boundaries);
491 std::cerr <<
"[FOEDUS] node memory size doesn't match. bug?"
493 <<
", expected=" << total << std::endl;
499 boundary->acquire_protect();
ErrorStack load_from_string(const std::string &xml)
Load the content of this object from the given XML string.
uint32_t protected_boundaries_count_
To be a POD, we avoid vector and instead uses a fix-sized array.
ErrorStack allocate_shared_memories(uint64_t upid, Eid eid, const EngineOptions &options)
Master process creates shared memories by calling this method.
void save_to_stream(std::ostream *ptr) const
Invokes save() and directs the resulting XML text to the given stream.
0x0C02 : "SOC : Failed to attach a shared memory." .
char * options_xml_
The xml itself.
A 4kb dummy data placed between separate memory regions so that we can check if/where a bogus memory ...
void change_master_status(MasterEngineStatus::StatusCode new_status)
#define ERROR_STACK(e)
Instantiates ErrorStack with the given foedus::error_code, creating an error stack with the current f...
storage::StorageId * storage_name_sort_memory_
This memory stores the ID of storages sorted by their names.
uint32_t StorageId
Unique ID for storage.
uint64_t options_xml_length_
The beginning of global memory is an XML-serialized EngineOption.
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
void * partitioner_data_
Data block to place detailed information of partitioners.
proc::LocalProcId * proc_name_sort_memory_
This memory stores the ID of procedures sorted by their names.
log::LogManagerControlBlock * log_manager_memory_
Tiny memory for log manager.
StatusCode
These statuses represent each step described in SocManager comment.
log::LoggerControlBlock ** logger_memories_
Status and synchronization mechanism for loggers on this node.
void * volatile_page_pool_
By far the largest memory for volatile page pool on this node.
Shared data of ThreadPimpl.
void deallocate_shared_memories()
Detaches and releases the shared memories.
void change_status_atomic(StatusCode new_status)
Update the value of status_code_ with fence.
char * get_block() const
Returns the memory block.
void change_status_atomic(StatusCode new_status)
Update the value of status_code_ with fence.
std::string get_self_path(uint64_t upid, Eid eid)
uint64_t align_4kb(uint64_t value)
Brings error stacktrace information as return value of functions.
Reader-writer (RW) MCS lock classes.
void * user_memory_
This 'user memory' can be used for arbitrary purporses by the user to communicate between SOCs...
StatusCode read_status_atomic() const
Read status_code_ with fence.
Same as GlobalMemoryAnchors except this is for node_memories_.
void mark_for_release()
Marks shared memories as being removed so that it will be reclaimed when all processes detach it...
Part of NodeMemoryAnchors for each thread.
uint64_t align_2mb(uint64_t value)
log::MetaLogControlBlock * meta_logger_memory_
Tiny memory for metadata logger.
bool rigorous_memory_boundary_check_
Whether to use mprotect() for memory boundaries to detect bogus memory accesses.
uint32_t max_storages_
Maximum number of storages in this database.
ThreadLocalOrdinal thread_count_per_group_
Number of Thread in each ThreadGroup.
storage::StorageControlBlock * storage_memories_
Status of each storage instance is stored in this shared memory.
3 << 19 is for FixedSavepoint.
Represents memory shared between processes.
proc::ProcManagerControlBlock * proc_manager_memory_
ProcManagers's status and its synchronization mechanism on this node.
storage::StorageManagerControlBlock * storage_manager_memory_
Tiny memory for storage manager.
uint32_t partitioner_data_memory_mb_
Size in MB of a shared memory buffer allocated for all partitioners during log gleaning.
uint32_t protected_boundaries_count_
To be a POD, we avoid vector and instead uses a fix-sized array.
memory::MemoryOptions memory_
storage::PartitionerMetadata * partitioner_metadata_
Tiny metadata memory for partitioners.
Pre-allocated MCS block for WW-locks.
storage::StorageOptions storage_
ErrorStack attach_shared_memories(uint64_t master_upid, Eid master_eid, SocId my_soc_id, EngineOptions *options)
Child processes (emulated or not) set a reference to shared memory and receive the EngnieOption value...
static uint64_t calculate_global_memory_size(uint64_t xml_size, const EngineOptions &options)
snapshot::SnapshotOptions snapshot_
Pre-allocated MCS block for extended version of RW-locks.
Shared data for LogReducer.
thread::ThreadControlBlock * thread_memory_
Status and synchronization mechanism for impersonation of this thread.
Just a marker to denote that the memory region represents a data page.
std::pair< ProcName, Proc > ProcAndName
Set of option values given to the engine at start-up.
snapshot::LogReducerControlBlock * log_reducer_memory_
Tiny control memory for LogReducer in this node.
Current status of a child SOC engine.
static uint64_t calculate_node_memory_size(const EngineOptions &options)
void * log_reducer_buffers_[2]
Actual buffers for LogReducer.
std::string get_master_path(uint64_t master_upid, Eid master_eid)
uint16_t group_count_
Number of ThreadGroup in the engine.
xct::XctManagerControlBlock * xct_manager_memory_
Tiny memory for xct manager.
void attach(const std::string &meta_path, bool use_hugepages)
Attach an already-allocated shared memory so that this object points to the memory.
snapshot::SnapshotManagerControlBlock * snapshot_manager_memory_
Tiny memory for snapshot manager.
uint32_t log_reducer_buffer_mb_
The size in MB of a buffer to store log entries in reducer (partition).
storage::Page * log_reducer_root_info_pages_
This is the 'output' of the reducer in this node.
Shared data in PagePoolPimpl.
uint16_t SocId
Represents an ID of an SOC, or NUMA node.
ThreadMemoryAnchors * thread_anchors_
Anchors for each thread.
void assert_boundary() const
Called at shutdown to check whether these boundaries were not accessed.
thread::ThreadOptions thread_
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
uint32_t page_pool_size_mb_per_node_
Size of the page pool in MB per each NUMA node.
proc::ProcAndName * proc_memory_
Procedure list on this node.
const ErrorStack kRetOk
Normal return value for no-error case.
MasterEngineStatus * master_status_memory_
This tiny piece of memory contains the current status of the master engine and its synchronization me...
This small control block is used to synchronize the access to the array.
void allocate_arrays(const EngineOptions &options)
ChildEngineStatus * child_status_memory_
This tiny piece of memory contains the current status of the child engine on this node...
savepoint::SavepointManagerControlBlock * savepoint_manager_memory_
Tiny memory for savepoint manager.
uint32_t LocalProcId
Represents a locally-unique ID of a procedure in one SOC.
uint64_t Eid
An Engine ID to differentiate two Engine objects instantiated in the same process.
uint32_t max_proc_count_
Maximum number of system/user procedures.
assorted::ProtectedBoundary * protected_boundaries_[kMaxBoundaries]
sanity check boundaries to detect bogus memory accesses that overrun a memory region ...
void change_child_status(SocId node, ChildEngineStatus::StatusCode new_status)
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
ErrorCode release_protect()
Removes all access restrictions via mprotect().
void mark_for_release()
Marks the shared memory as being removed so that it will be reclaimed when all processes detach it...
restart::RestartManagerControlBlock * restart_manager_memory_
Tiny memory for restart manager.
bool rigorous_page_boundary_check_
Whether to use mprotect() for page boundaries to detect bogus memory accesses.
bool is_null() const
Returns if this object doesn't hold a valid memory block.
bool protected_boundaries_needs_release_
whether we have invoked mprotect on them
StatusCode read_status_atomic() const
Read status_code_ with fence.
memory::PagePoolControlBlock * volatile_pool_status_
PagePool's status and its synchronization mechanism for the volatile pool on this node...
uint64_t shared_user_memory_size_kb_
As part of the global shared memory, we reserve this size of 'user memory' that can be used for arbit...
ErrorStack alloc(const std::string &meta_path, uint64_t size, int numa_node, bool use_hugepages)
Newly allocate a shared memory of given size on given NUMA node.
bool protected_boundaries_needs_release_
whether we have invoked mprotect on them
ChildEngineStatus::StatusCode get_child_status(SocId node) const
assorted::ProtectedBoundary * protected_boundaries_[kMaxBoundaries]
sanity check boundaries to detect bogus memory accesses that overrun a memory region ...
const uint16_t kMaxSocs
Maximum number of SOCs.
void release_block()
Releases the memory block IF this process has an ownership.
StatusCode
These statuses represent each step described in SocManager comment.
bool is_error() const
Returns if this return code is not kErrorCodeOk.
Master engine has just started.
The child engine observed some unrecoverable error and has exit.
uint16_t loggers_per_node_
Number of loggers per NUMA node.
MasterEngineStatus::StatusCode get_master_status() const