libfoedus-core
FOEDUS Core Library
engine_options.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
19 
20 #include <tinyxml2.h>
21 #include <valgrind.h>
22 #include <sys/resource.h>
23 
24 #include <algorithm>
25 #include <fstream>
26 #include <iostream>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30 
31 #include "foedus/assert_nd.hpp"
36 
37 namespace foedus {
39 }
41  operator=(other);
42 }
43 
44 // template-ing just for const/non-const
45 template <typename ENGINE_OPTION_PTR, typename CHILD_PTR>
46 std::vector< CHILD_PTR > get_children_impl(ENGINE_OPTION_PTR option) {
47  std::vector< CHILD_PTR > children;
48  children.push_back(&option->cache_);
49  children.push_back(&option->debugging_);
50  children.push_back(&option->log_);
51  children.push_back(&option->memory_);
52  children.push_back(&option->proc_);
53  children.push_back(&option->restart_);
54  children.push_back(&option->savepoint_);
55  children.push_back(&option->snapshot_);
56  children.push_back(&option->soc_);
57  children.push_back(&option->storage_);
58  children.push_back(&option->thread_);
59  children.push_back(&option->xct_);
60  return children;
61 }
62 std::vector< externalize::Externalizable* > get_children(EngineOptions* option) {
63  return get_children_impl<EngineOptions*, externalize::Externalizable*>(option);
64 }
65 std::vector< const externalize::Externalizable* > get_children(const EngineOptions* option) {
66  return get_children_impl<const EngineOptions*, const externalize::Externalizable*>(option);
67 }
68 
70  auto mine = get_children(this);
71  auto others = get_children(&other);
72  ASSERT_ND(mine.size() == others.size());
73  for (size_t i = 0; i < mine.size(); ++i) {
74  mine[i]->assign(others[i]);
75  }
76  return *this;
77 }
78 
79 ErrorStack EngineOptions::load(tinyxml2::XMLElement* element) {
80  *this = EngineOptions(); // This guarantees default values for optional XML elements.
81  for (externalize::Externalizable* child : get_children(this)) {
82  CHECK_ERROR(get_child_element(element, child->get_tag_name(), child));
83  }
84  return kRetOk;
85 }
86 
87 ErrorStack EngineOptions::save(tinyxml2::XMLElement* element) const {
88  CHECK_ERROR(insert_comment(element, "Set of options given to the engine at start-up"));
89  for (const externalize::Externalizable* child : get_children(this)) {
90  CHECK_ERROR(add_child_element(element, child->get_tag_name(), "", *child));
91  }
92  return kRetOk;
93 }
94 
96 uint64_t EngineOptions::get_available_hugepage_memory(std::ostream* details_out) {
97  std::ifstream file("/proc/meminfo");
98  if (!file.is_open()) {
99  *details_out << "[FOEDUS] Failed to open /proc/meminfo. Cannot check available hugepages."
100  << std::endl;
101  return 0;
102  }
103 
104  std::string line;
105  uint64_t hugepage_size = 1ULL << 21;
106  uint64_t hugepage_count = 0;
107  while (std::getline(file, line)) {
108  if (line.find("Hugepagesize:") != std::string::npos) {
109  // /proc/meminfo should have "Hugepagesize: 1048576 kB" for 1GB hugepages
110  if (line.find("1048576 kB") != std::string::npos) {
111  hugepage_size = 1ULL << 30;
112  } else {
113  ASSERT_ND(line.find("2048 kB") != std::string::npos);
114  }
115  } else if (line.find("HugePages_Free:") != std::string::npos) {
116  ASSERT_ND(hugepage_count == 0);
117  // And "HugePages_Free: 12345" for the number of available hugepages.
118  std::string pages_str = line.substr(std::string("HugePages_Free:").length());
119  hugepage_count = std::stoull(pages_str);
120  }
121  }
122  file.close();
123  return hugepage_count * hugepage_size;
124 }
125 
126 ErrorStack EngineOptions::prescreen(std::ostream* details_out) const {
127  ASSERT_ND(details_out);
128  std::stringstream out_buffer;
129  const uint64_t kMarginRatio = 4; // Add 1/4 to be safe
130 
131  // we don't stop prescreening on individual errors so that
132  // the user can see all issues at once.
133  bool has_any_error = false;
134 
135  if (RUNNING_ON_VALGRIND && memory_.rigorous_page_boundary_check_) {
136  out_buffer
137  << "[FOEDUS] WARNING. We strongly discourage rigorous_page_boundary_check_ on valgrind."
138  << " If you are sure what you are doing, consider increasing VG_N_SEGMENTS and recompile"
139  << " valgrind."
140  << std::endl;
141  }
142 
143  // Check available hugepages
144  uint64_t available_hugepage_bytes = get_available_hugepage_memory(&out_buffer);
145  uint64_t required_shared_bytes;
146  uint64_t required_local_bytes;
147  calculate_required_memory(&required_shared_bytes, &required_local_bytes);
148 
149  uint64_t required_total_bytes = required_shared_bytes + required_local_bytes;
150  uint64_t required_total_safe_bytes = required_total_bytes + required_total_bytes / kMarginRatio;
151  if (available_hugepage_bytes < required_total_safe_bytes) {
152  has_any_error = true;
153 
154  out_buffer
155  << "[FOEDUS] There are not enough hugepages available."
156  << " Based on the values in EngineOptions, the machine should have at least "
157  << required_total_safe_bytes << " bytes ("
158  << assorted::int_div_ceil(required_total_safe_bytes, 1ULL << 21) << " 2MB pages, or "
159  << assorted::int_div_ceil(required_total_safe_bytes, 1ULL << 30) << " 1GB pages)"
160  << " of hugepages, but there are only " << available_hugepage_bytes << " bytes available."
161  << " eg: sudo sh -c 'echo xyz > /proc/sys/vm/nr_hugepages' "
162  << std::endl;
163  }
164 
165  // Check ulimit values
166  prescreen_ulimits(required_total_safe_bytes, &has_any_error, &out_buffer);
167 
168  // Check sysctl values
169  uint64_t required_shared_safe_bytes
170  = required_shared_bytes + required_shared_bytes / kMarginRatio;
171  prescreen_sysctl(required_shared_safe_bytes, &has_any_error, &out_buffer);
172 
173  std::string error_messages = out_buffer.str();
174  *details_out << error_messages;
175 
176  if (has_any_error) {
178  *details_out
179  << "**********************************************************" << std::endl
180  << "**** ENVIRONMENT PRESCREENING DETECTED SOME ISSUES." << std::endl
181  << "**** HOWEVER, suppress_memory_prescreening option was specified." << std::endl
182  << "**** FOEDUS will start up." << std::endl
183  << "**********************************************************" << std::endl;
184  return kRetOk;
185  } else {
186  *details_out
187  << "**********************************************************" << std::endl
188  << "**** ENVIRONMENT PRESCREENING FAILED." << std::endl
189  << "**** FOEDUS does not start up because of issues listed above." << std::endl
190  << "**********************************************************" << std::endl;
191  return ERROR_STACK_MSG(kErrorCodeEnvPrescreenFailed, error_messages.c_str());
192  }
193  } else {
194  return kRetOk;
195  }
196 }
197 
199  uint64_t required_total_safe_bytes,
200  bool* has_any_error,
201  std::ostream* details_out) const {
202  // nofile (number of file/socket that can be opened)
203  ::rlimit nofile_limit;
204  ::getrlimit(RLIMIT_NOFILE, &nofile_limit);
205  const uint64_t kMinNoFile = std::max(1U << 13, thread_.get_total_thread_count() * 16U);
206  if (nofile_limit.rlim_cur < kMinNoFile) {
207  *has_any_error = true;
208 
209  *details_out
210  << "[FOEDUS] ulimit -n is too small (" << nofile_limit.rlim_cur
211  << "). You must have at least " << kMinNoFile << std::endl;
212  }
213  // Record of a struggle: WTF,, no idea why, but I'm seeing an weird behavior only on Ubuntu.
214  // I did set limits.conf, and ulimit -n is saying 100000, but the above code returns "8192"
215  // on Ubuntu. As a tentative solution, reduced the min value to 8192.
216  // This happens only when I run the code as jenkins user from jenkins service.
217  // If I run it as myself, or "sudo su jenkins" then run it, it runs fine. WWWTTTTFFF.
218 
219  // 2015 Jun: Ahhh, I got it. It's because jenkins service is started by a daemon script:
220  // http://blog.mindfab.net/2013/12/changing-ulimits-for-jenkins-daemons.html
221  // "The important part is that you have to specify the ulimits, e.g., for the number
222  // of open files before start-stop-daemon is called. The reason is that
223  // **start-stop-daemon doesn't consider pam**
224  // and hence will not find the limits which have been specified in /etc/security/limits.conf."
225 
226  // Note that proc means threads in linux.
227  ::rlimit proc_limit;
228  ::getrlimit(RLIMIT_NPROC, &proc_limit);
229  const uint64_t kMinProc = std::max(1U << 12, thread_.get_total_thread_count() * 2U);
230  if (proc_limit.rlim_cur < kMinProc) {
231  *has_any_error = true;
232 
233  *details_out
234  << "[FOEDUS] ulimit -u is too small(" << proc_limit.rlim_cur
235  << "). You must have at least " << kMinProc << std::endl;
236  }
237 
238  // memlock
239  ::rlimit memlock_limit;
240  ::getrlimit(RLIMIT_MEMLOCK, &memlock_limit);
241  if (memlock_limit.rlim_cur * (1ULL << 10) < required_total_safe_bytes) {
242  *has_any_error = true;
243 
244  *details_out
245  << "[FOEDUS] ulimit -l is too small(" << memlock_limit.rlim_cur
246  << "). You must have at least "
247  << (required_total_safe_bytes >> 10) << std::endl;
248  }
249 
250  // Should also check: RLIMIT_AS, RLIMIT_DATA, RLIMIT_FSIZE, RLIMIT_LOCKS
251  // but it's rarely an issue in typical setup.
252 }
253 
254 uint64_t EngineOptions::read_int_from_proc_fs(const char* path, std::ostream* details_out) {
255  // _sysctl() is now strongly discouraged, so let's simlpy read as a file.
256  std::ifstream file(path);
257  if (!file.is_open()) {
258  *details_out << "[FOEDUS] Fails to read " << path;
259  return 0;
260  }
261 
262  std::string line;
263  std::getline(file, line);
264  file.close();
265 
266  return std::stoull(line);
267 }
268 
270  uint64_t required_shared_safe_bytes,
271  bool* has_any_error,
272  std::ostream* details_out) const {
273  uint64_t shmall = read_int_from_proc_fs("/proc/sys/kernel/shmall", details_out);
274  if (shmall < required_shared_safe_bytes) {
275  *has_any_error = true;
276 
277  *details_out
278  << "[FOEDUS] /proc/sys/kernel/shmall is too small (" << shmall << ".)"
279  << " It must be at least " << required_shared_safe_bytes
280  << ". We recommend to simply set semi-inifinite value: "
281  << " sudo sysctl -w kernel.shmall=1152921504606846720"
282  << " and adding an entry 'kernel.shmall = 1152921504606846720' to /etc/sysctl.conf"
283  << " then sudo sysctl -p"
284  << std::endl;
285  }
286 
287  uint64_t shmmax = read_int_from_proc_fs("/proc/sys/kernel/shmmax", details_out);
288  if (shmmax < required_shared_safe_bytes) {
289  *has_any_error = true;
290 
291  *details_out
292  << "[FOEDUS] /proc/sys/kernel/shmmax is too small(" << shmmax << ")."
293  << " It must be at least " << required_shared_safe_bytes
294  << ". We recommend to simply set semi-inifinite value: "
295  << " sudo sysctl -w kernel.shmmax=9223372036854775807"
296  << " and adding an entry 'kernel.shmmax = 9223372036854775807' to /etc/sysctl.conf"
297  << " then sudo sysctl -p"
298  << std::endl;
299  }
300 
301  uint64_t shmmni = read_int_from_proc_fs("/proc/sys/kernel/shmmni", details_out);
302  const uint64_t kMinShmmni = 4096;
303  if (shmmni < kMinShmmni) {
304  *has_any_error = true;
305 
306  *details_out
307  << "[FOEDUS] /proc/sys/kernel/shmmni is too small(" << shmmni << ")."
308  << " It must be at least " << kMinShmmni
309  << ". We recommend to set : "
310  << " sudo sysctl -w kernel.shmmni=" << kMinShmmni
311  << " and adding an entry 'kernel.shmmni = " << kMinShmmni << "' to /etc/sysctl.conf"
312  << " then sudo sysctl -p"
313  << std::endl;
314  }
315 
316  uint64_t shm_group = read_int_from_proc_fs("/proc/sys/vm/hugetlb_shm_group", details_out);
317  // This one is not an error. It works in some environment even without this parameter.
318  // So, we only warn about it so far. Also, we don't even check if the user is in this group.
319  if (shm_group == 0) {
320  *details_out
321  << "[FOEDUS] Warning: /proc/sys/vm/hugetlb_shm_group is not set."
322  << " In some environment, this is fine: FOEDUS can allocate shared memory backed by hugepages"
323  << " without configuring it, but some environment might fail without it"
324  << std::endl;
325  }
326 
327  uint64_t map_count = read_int_from_proc_fs("/proc/sys/vm/max_map_count", details_out);
328  if (map_count <= 65530U) {
329  *details_out
330  << "[FOEDUS] /proc/sys/vm/max_map_count is only " << map_count
331  << " When rigorous_memory_boundary_check or rigorous_page_boundary_check features"
332  << " are specified, you must set a large number to it."
333  << ". We recommend to set : "
334  << " sudo sysctl -w vm.max_map_count=2147483647"
335  << " and adding an entry 'vm.max_map_count=2147483647' to /etc/sysctl.conf"
336  << " then sudo sysctl -p"
337  << std::endl;
338  }
339 }
340 
342  uint64_t* shared_bytes,
343  uint64_t* local_bytes) const {
344  const uint32_t nodes = thread_.group_count_;
345  const uint32_t total_threads = thread_.get_total_thread_count();
346 
347  // First, shared memories. soc::SharedMemoryRepo has exact methods for that.
348  *shared_bytes = 0;
349 
350  // No idea how big the XML representation would be, but surely within 4MB.
351  uint64_t kMaxXmlSize = 1ULL << 22;
352  *shared_bytes += soc::SharedMemoryRepo::calculate_global_memory_size(kMaxXmlSize, *this);
353  *shared_bytes += soc::SharedMemoryRepo::calculate_node_memory_size(*this) * nodes;
354  *shared_bytes += (static_cast<uint64_t>(memory_.page_pool_size_mb_per_node_) << 20) * nodes;
355 
356  // Then, local memories, which are allocated in various places, so
357  // we need to list up each of them.. maybe missing something.
358  *local_bytes = 0;
359 
360  // snapshot cache pool
361  *local_bytes += cache_.snapshot_cache_size_mb_per_node_ * (1ULL << 20) * nodes;
362 
363  // logger buffer
364  *local_bytes += log_.log_buffer_kb_ * (1ULL << 10) * log_.loggers_per_node_ * nodes;
365 
366  // misc memory in NumaNodeMemory. for volatile pool and snapshot cache pool
367  *local_bytes += sizeof(memory::PagePoolOffsetChunk) * 2ULL * total_threads * 2;
368 
369  // core-local memories in NumaCoreMemory. work_memory and "small_memory" (terrible name, yes)
370  *local_bytes += xct_.local_work_memory_size_mb_ * (1ULL << 20) * total_threads;
371  *local_bytes += memory::NumaCoreMemory::calculate_local_small_memory_size(*this) * total_threads;
372 }
373 
374 
375 
376 } // namespace foedus
std::vector< externalize::Externalizable * > get_children(EngineOptions *option)
static ErrorStack get_child_element(tinyxml2::XMLElement *parent, const std::string &tag, Externalizable *child, bool optional=false)
child Externalizable version
Represents an object that can be written to and read from files/bytes in XML format.
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
static ErrorStack add_child_element(tinyxml2::XMLElement *parent, const std::string &tag, const std::string &comment, const Externalizable &child)
child Externalizable version
void prescreen_ulimits(uint64_t required_total_safe_bytes, bool *has_any_error, std::ostream *details_out) const
Subroutine of prescreen to check ulimit values.
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81
EngineOptions()
Constructs option values with default values.
static ErrorStack insert_comment(tinyxml2::XMLElement *element, const std::string &comment)
ErrorStack prescreen(std::ostream *details_out) const
Checks the machine environment and raises as many errors as possible before the engine starts up...
memory::MemoryOptions memory_
ErrorStack save(tinyxml2::XMLElement *element) const override
Writes the content of this object to the given XML element.
0x000E : "GENERAL: Pre-screening of the environment detected" " an issue before start up...
Definition: error_code.hpp:122
uint32_t log_buffer_kb_
Size in KB of log buffer for each worker thread.
Definition: log_options.hpp:83
void prescreen_sysctl(uint64_t required_shared_safe_bytes, bool *has_any_error, std::ostream *details_out) const
Subroutine of prescreen to check sysctl values.
static uint64_t calculate_global_memory_size(uint64_t xml_size, const EngineOptions &options)
ErrorStack load(tinyxml2::XMLElement *element) override
Reads the content of this object from the given XML element.
static uint64_t get_available_hugepage_memory(std::ostream *details_out)
A utility method to check how much hugepage memory is currently available.
Set of option values given to the engine at start-up.
static uint64_t calculate_node_memory_size(const EngineOptions &options)
uint16_t group_count_
Number of ThreadGroup in the engine.
static uint64_t read_int_from_proc_fs(const char *path, std::ostream *details_out)
A utility method to read /proc/ value in linux.
std::vector< CHILD_PTR > get_children_impl(ENGINE_OPTION_PTR option)
thread::ThreadOptions thread_
static uint64_t calculate_local_small_memory_size(const EngineOptions &options)
To reduce the overhead of grabbing/releasing pages from pool, we pack this many pointers for each gra...
Definition: page_pool.hpp:47
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
uint32_t page_pool_size_mb_per_node_
Size of the page pool in MB per each NUMA node.
const ErrorStack kRetOk
Normal return value for no-error case.
void calculate_required_memory(uint64_t *shared_bytes, uint64_t *local_bytes) const
int64_t int_div_ceil(int64_t dividee, int64_t dividor)
Efficient ceil(dividee/dividor) for integer.
ThreadId get_total_thread_count() const
#define ERROR_STACK_MSG(e, m)
Overload of ERROR_STACK(e) to receive a custom error message.
uint32_t snapshot_cache_size_mb_per_node_
Size of the snapshot cache in MB per each NUMA node.
bool suppress_memory_prescreening_
Whether to tolerate insufficient hugepages etc in the prescreen check.
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
bool rigorous_page_boundary_check_
Whether to use mprotect() for page boundaries to detect bogus memory accesses.
uint32_t local_work_memory_size_mb_
Size of local and temporary work memory one transaction can use during transaction.
EngineOptions & operator=(const EngineOptions &other)
cache::CacheOptions cache_
uint16_t loggers_per_node_
Number of loggers per NUMA node.
Definition: log_options.hpp:80