libfoedus-core
FOEDUS Core Library
numa_core_memory.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
19 
20 #include <glog/logging.h>
21 
22 #include <algorithm>
23 
24 #include "foedus/compiler.hpp"
25 #include "foedus/engine.hpp"
34 #include "foedus/xct/xct_id.hpp"
36 
37 namespace foedus {
38 namespace memory {
40  Engine* engine,
41  NumaNodeMemory *node_memory,
42  thread::ThreadId core_id)
43  : engine_(engine),
44  node_memory_(node_memory),
45  core_id_(core_id),
46  numa_node_(thread::decompose_numa_node(core_id)),
47  core_local_ordinal_(thread::decompose_numa_local_ordinal(core_id)),
48  free_volatile_pool_chunk_(nullptr),
49  free_snapshot_pool_chunk_(nullptr),
50  retired_volatile_pool_chunks_(nullptr),
51  current_lock_list_memory_(nullptr),
52  current_lock_list_capacity_(0),
53  retrospective_lock_list_memory_(nullptr),
54  retrospective_lock_list_capacity_(0),
55  volatile_pool_(nullptr),
56  snapshot_pool_(nullptr) {
57  ASSERT_ND(numa_node_ == node_memory->get_numa_node());
58  ASSERT_ND(core_id_ == thread::compose_thread_id(node_memory->get_numa_node(),
59  core_local_ordinal_));
60 }
61 
63  uint64_t memory_size = 0;
64  // for the "shift" part, we calculate conservatively then skip it at the end.
65  // it's a wasted memory, but negligible.
66  memory_size += static_cast<uint64_t>(options.thread_.thread_count_per_group_) << 12;
67  memory_size += sizeof(xct::SysxctWorkspace);
69  memory_size += sizeof(xct::PointerAccess) * xct::Xct::kMaxPointerSets;
70  const xct::XctOptions& xct_opt = options.xct_;
71  const uint16_t nodes = options.thread_.group_count_;
72  memory_size += sizeof(xct::ReadXctAccess) * xct_opt.max_read_set_size_;
73  memory_size += sizeof(xct::WriteXctAccess) * xct_opt.max_write_set_size_;
74  memory_size += sizeof(xct::LockFreeReadXctAccess)
76  memory_size += sizeof(xct::LockFreeWriteXctAccess)
78  memory_size += sizeof(memory::PagePoolOffsetAndEpochChunk) * nodes;
79 
80  // In reality almost no chance we take as many locks as all read/write-sets,
81  // but let's simplify that. Not much memory anyways.
82  const uint64_t total_access_sets = xct_opt.max_read_set_size_ + xct_opt.max_write_set_size_;
83  memory_size += sizeof(xct::LockEntry) * total_access_sets;
84  memory_size += sizeof(xct::LockEntry) * total_access_sets;
85  return memory_size;
86 }
87 
89  LOG(INFO) << "Initializing NumaCoreMemory for core " << core_id_;
90  free_volatile_pool_chunk_ = node_memory_->get_volatile_offset_chunk_memory_piece(
91  core_local_ordinal_);
92  free_snapshot_pool_chunk_ = node_memory_->get_snapshot_offset_chunk_memory_piece(
93  core_local_ordinal_);
94  volatile_pool_ = node_memory_->get_volatile_pool();
95  snapshot_pool_ = node_memory_->get_snapshot_pool();
96  log_buffer_memory_ = node_memory_->get_log_buffer_memory_piece(core_local_ordinal_);
97 
98  // allocate small_thread_local_memory_. it's a collection of small memories
99  uint64_t memory_size = calculate_local_small_memory_size(engine_->get_options());
100  if (memory_size > (1U << 21)) {
101  VLOG(1) << "mm, small_local_memory_size is more than 2MB(" << memory_size << ")."
102  " not a big issue, but consumes one more TLB entry...";
103  }
104  CHECK_ERROR(node_memory_->allocate_numa_memory(memory_size, &small_thread_local_memory_));
105 
106  const xct::XctOptions& xct_opt = engine_->get_options().xct_;
107  const uint16_t nodes = engine_->get_options().thread_.group_count_;
108  const uint16_t thread_per_group = engine_->get_options().thread_.thread_count_per_group_;
109  char* memory = reinterpret_cast<char*>(small_thread_local_memory_.get_block());
110  // "shift" 4kb for each thread on this node so that memory banks are evenly used.
111  // in many architecture, 13th- or 14th- bits are memory banks (see [JEONG11])
112  memory += static_cast<uint64_t>(core_local_ordinal_) << 12;
113  small_thread_local_memory_pieces_.sysxct_workspace_memory_ = memory;
114  memory += sizeof(xct::SysxctWorkspace);
115  small_thread_local_memory_pieces_.xct_page_version_memory_ = memory;
117  small_thread_local_memory_pieces_.xct_pointer_access_memory_ = memory;
118  memory += sizeof(xct::PointerAccess) * xct::Xct::kMaxPointerSets;
119  small_thread_local_memory_pieces_.xct_read_access_memory_ = memory;
120  memory += sizeof(xct::ReadXctAccess) * xct_opt.max_read_set_size_;
121  small_thread_local_memory_pieces_.xct_write_access_memory_ = memory;
122  memory += sizeof(xct::WriteXctAccess) * xct_opt.max_write_set_size_;
123  small_thread_local_memory_pieces_.xct_lock_free_read_access_memory_ = memory;
124  memory += sizeof(xct::LockFreeReadXctAccess) * xct_opt.max_lock_free_read_set_size_;
125  small_thread_local_memory_pieces_.xct_lock_free_write_access_memory_ = memory;
126  memory += sizeof(xct::LockFreeWriteXctAccess) * xct_opt.max_lock_free_write_set_size_;
127  retired_volatile_pool_chunks_ = reinterpret_cast<PagePoolOffsetAndEpochChunk*>(memory);
128  memory += sizeof(memory::PagePoolOffsetAndEpochChunk) * nodes;
129 
130  const uint64_t total_access_sets = xct_opt.max_read_set_size_ + xct_opt.max_write_set_size_;
131  current_lock_list_memory_ = reinterpret_cast<xct::LockEntry*>(memory);
132  current_lock_list_capacity_ = total_access_sets;
133  memory += sizeof(xct::LockEntry) * total_access_sets;
134  retrospective_lock_list_memory_ = reinterpret_cast<xct::LockEntry*>(memory);
135  retrospective_lock_list_capacity_ = total_access_sets;
136  memory += sizeof(xct::LockEntry) * total_access_sets;
137 
138  memory += static_cast<uint64_t>(thread_per_group - core_local_ordinal_) << 12;
139  ASSERT_ND(reinterpret_cast<char*>(small_thread_local_memory_.get_block())
140  + memory_size == memory);
141 
142  for (uint16_t node = 0; node < nodes; ++node) {
143  retired_volatile_pool_chunks_[node].clear();
144  }
145 
146  CHECK_ERROR(node_memory_->allocate_numa_memory(
147  xct_opt.local_work_memory_size_mb_ * (1ULL << 20),
148  &local_work_memory_));
149 
150  // Each core starts from 50%-full free pool chunk (configurable)
151  uint32_t initial_pages = engine_->get_options().memory_.private_page_pool_initial_grab_;
152  {
153  uint32_t grab_count = std::min<uint32_t>(
154  volatile_pool_->get_recommended_pages_per_grab(),
155  std::min<uint32_t>(
156  initial_pages,
157  volatile_pool_->get_free_pool_capacity() / (2U * thread_per_group)));
158  WRAP_ERROR_CODE(volatile_pool_->grab(grab_count, free_volatile_pool_chunk_));
159  }
160  {
161  uint32_t grab_count = std::min<uint32_t>(
162  snapshot_pool_->get_recommended_pages_per_grab(),
163  std::min<uint32_t>(
164  initial_pages,
165  snapshot_pool_->get_free_pool_capacity() / (2U * thread_per_group)));
166  WRAP_ERROR_CODE(snapshot_pool_->grab(grab_count, free_snapshot_pool_chunk_));
167  }
168  return kRetOk;
169 }
171  LOG(INFO) << "Releasing NumaCoreMemory for core " << core_id_;
172  ErrorStackBatch batch;
173  // return all free pages
174  if (retired_volatile_pool_chunks_) {
175  // this should be already released in ThreadPimpl's uninitialize.
176  // we can't do it here because uninitialization of node/core memories are parallelized
177  for (uint16_t node = 0; node < engine_->get_soc_count(); ++node) {
178  PagePoolOffsetAndEpochChunk* chunk = retired_volatile_pool_chunks_ + node;
179  ASSERT_ND(chunk->empty()); // just sanity check
180  }
181  retired_volatile_pool_chunks_ = nullptr;
182  }
183  if (free_volatile_pool_chunk_) {
184  volatile_pool_->release(free_volatile_pool_chunk_->size(), free_volatile_pool_chunk_);
185  free_volatile_pool_chunk_ = nullptr;
186  volatile_pool_ = nullptr;
187  }
188  if (free_snapshot_pool_chunk_) {
189  snapshot_pool_->release(free_snapshot_pool_chunk_->size(), free_snapshot_pool_chunk_);
190  free_snapshot_pool_chunk_ = nullptr;
191  snapshot_pool_ = nullptr;
192  }
193  log_buffer_memory_.clear();
194  local_work_memory_.release_block();
195  small_thread_local_memory_.release_block();
196  return SUMMARIZE_ERROR_BATCH(batch);
197 }
198 
200  if (UNLIKELY(free_volatile_pool_chunk_->empty())) {
201  if (grab_free_pages_from_node(free_volatile_pool_chunk_, volatile_pool_) != kErrorCodeOk) {
202  return 0;
203  }
204  }
205  ASSERT_ND(!free_volatile_pool_chunk_->empty());
206  return free_volatile_pool_chunk_->pop_back();
207 }
210  ret.set(numa_node_, grab_free_volatile_page());
211  return ret;
212 }
214  if (UNLIKELY(free_volatile_pool_chunk_->full())) {
215  release_free_pages_to_node(free_volatile_pool_chunk_, volatile_pool_);
216  }
217  ASSERT_ND(!free_volatile_pool_chunk_->full());
218  free_volatile_pool_chunk_->push_back(offset);
219 }
220 
222  if (UNLIKELY(free_snapshot_pool_chunk_->empty())) {
223  if (grab_free_pages_from_node(free_snapshot_pool_chunk_, snapshot_pool_) != kErrorCodeOk) {
224  return 0;
225  }
226  }
227  ASSERT_ND(!free_snapshot_pool_chunk_->empty());
228  return free_snapshot_pool_chunk_->pop_back();
229 }
231  if (UNLIKELY(free_snapshot_pool_chunk_->full())) {
232  release_free_pages_to_node(free_snapshot_pool_chunk_, snapshot_pool_);
233  }
234  ASSERT_ND(!free_snapshot_pool_chunk_->full());
235  free_snapshot_pool_chunk_->push_back(offset);
236 }
237 
238 ErrorCode NumaCoreMemory::grab_free_pages_from_node(
239  PagePoolOffsetChunk* free_chunk,
240  memory::PagePool* pool) {
241  uint32_t desired = (free_chunk->capacity() - free_chunk->size()) / 2;
242  desired = std::min<uint32_t>(desired, pool->get_recommended_pages_per_grab());
243  return pool->grab(desired, free_chunk);
244 }
245 
246 void NumaCoreMemory::release_free_pages_to_node(
247  PagePoolOffsetChunk* free_chunk,
248  memory::PagePool *pool) {
249  uint32_t desired = free_chunk->size() / 2;
250  pool->release(desired, free_chunk);
251 }
252 
254  return retired_volatile_pool_chunks_ + node;
255 }
256 
257 } // namespace memory
258 } // namespace foedus
ErrorStack uninitialize_once() override
ErrorStack allocate_numa_memory(uint64_t size, AlignedMemory *out) const
ErrorStack initialize_once() override
Represents a record of special read-access during a transaction without any need for locking...
Definition: xct_access.hpp:200
void release_free_volatile_page(PagePoolOffset offset)
Returns one free volatile page to local page pool.
PagePoolOffset grab_free_volatile_page()
Acquires one free volatile page from local page pool.
uint32_t max_lock_free_read_set_size_
The maximum number of lock-free read-set one transaction can have.
Definition: xct_options.hpp:84
Page pool for volatile read/write store (VolatilePage) and the read-only bufferpool (SnapshotPage)...
Definition: page_pool.hpp:173
uint32_t private_page_pool_initial_grab_
How many pages each NumaCoreMemory initially grabs when it is initialized.
void release_block()
Releases the memory block.
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
Represents a record of write-access during a transaction.
Definition: xct_access.hpp:168
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
Definition: memory_id.hpp:44
ThreadLocalOrdinal decompose_numa_local_ordinal(ThreadId global_id)
Extracts local ordinal from the given globally unique ID of Thread (core).
Definition: thread_id.hpp:139
Represents a pointer to a volatile page with modification count for preventing ABA.
Definition: storage_id.hpp:194
PagePoolOffsetChunk * get_volatile_offset_chunk_memory_piece(foedus::thread::ThreadLocalOrdinal core_ordinal)
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81
Represents a record of read-access during a transaction.
Definition: xct_access.hpp:139
An entry in CLL and RLL, representing a lock that is taken or will be taken.
ErrorCode grab(uint32_t desired_grab_count, PagePoolOffsetChunk *chunk)
Adds the specified number of free pages to the chunk.
Definition: page_pool.cpp:129
uint32_t get_recommended_pages_per_grab() const
Definition: page_pool.cpp:124
storage::VolatilePagePointer grab_free_volatile_page_pointer()
Wrapper for grab_free_volatile_page().
Represents a record of special write-access during a transaction without any need for locking...
Definition: xct_access.hpp:228
const EngineOptions & get_options() const
Definition: engine.cpp:39
ThreadLocalOrdinal thread_count_per_group_
Number of Thread in each ThreadGroup.
soc::SocId get_soc_count() const
Shorthand for get_options().thread_.group_count_.
Definition: engine.cpp:74
Set of options for xct manager.
Definition: xct_options.hpp:35
foedus::thread::ThreadGroupId get_numa_node() const
Batches zero or more ErrorStack objects to represent in one ErrorStack.
memory::MemoryOptions memory_
0 means no-error.
Definition: error_code.hpp:87
Definitions of IDs in this package and a few related constant values.
uint32_t max_lock_free_write_set_size_
The maximum number of lock-free write-set one transaction can have.
Definition: xct_options.hpp:92
void release_free_snapshot_page(PagePoolOffset offset)
Same, except it's for snapshot page.
Database engine object that holds all resources and provides APIs.
Definition: engine.hpp:109
Repository of memories dynamically acquired and shared within one NUMA node (socket).
void set(uint8_t numa_node, memory::PagePoolOffset offset)
Definition: storage_id.hpp:212
Set of option values given to the engine at start-up.
ThreadGroupId decompose_numa_node(ThreadId global_id)
Extracts NUMA node ID from the given globally unique ID of Thread (core).
Definition: thread_id.hpp:131
void * get_block() const
Returns the memory block.
uint16_t group_count_
Number of ThreadGroup in the engine.
void release(uint32_t desired_release_count, PagePoolOffsetChunk *chunk)
Returns the specified number of free pages from the chunk.
Definition: page_pool.cpp:134
uint32_t max_write_set_size_
The maximum number of write-set one transaction can have.
Definition: xct_options.hpp:76
ThreadId compose_thread_id(ThreadGroupId node, ThreadLocalOrdinal local_core)
Returns a globally unique ID of Thread (core) for the given node and ordinal in the node...
Definition: thread_id.hpp:123
#define SUMMARIZE_ERROR_BATCH(x)
This macro calls ErrorStackBatch::summarize() with automatically provided parameters.
PagePoolOffsetAndEpochChunk * get_retired_volatile_pool_chunk(uint16_t node)
PagePoolOffset grab_free_snapshot_page()
Same, except it's for snapshot page.
void push_back(PagePoolOffset pointer)
Definition: page_pool.hpp:68
thread::ThreadOptions thread_
static uint64_t calculate_local_small_memory_size(const EngineOptions &options)
To reduce the overhead of grabbing/releasing pages from pool, we pack this many pointers for each gra...
Definition: page_pool.hpp:47
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
uint16_t ThreadId
Typedef for a global ID of Thread (core), which is unique across NUMA nodes.
Definition: thread_id.hpp:80
const ErrorStack kRetOk
Normal return value for no-error case.
PagePoolOffsetChunk * get_snapshot_offset_chunk_memory_piece(foedus::thread::ThreadLocalOrdinal core_ordinal)
Used to store an epoch value with each entry in PagePoolOffsetChunk.
Definition: page_pool.hpp:108
#define UNLIKELY(x)
Hints that x is highly likely false.
Definition: compiler.hpp:104
uint64_t get_free_pool_capacity() const
Definition: page_pool.cpp:120
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
Represents a record of following a page pointer during a transaction.
Definition: xct_access.hpp:48
#define WRAP_ERROR_CODE(x)
Same as CHECK_ERROR(x) except it receives only an error code, thus more efficient.
uint32_t max_read_set_size_
The maximum number of read-set one transaction can have.
Definition: xct_options.hpp:60
Represents a record of reading a page during a transaction.
Definition: xct_access.hpp:72
ErrorCode
Enum of error codes defined in error_code.xmacro.
Definition: error_code.hpp:85
Per-thread reused work memory for system transactions.
AlignedMemorySlice get_log_buffer_memory_piece(log::LoggerId logger)