libfoedus-core
FOEDUS Core Library
shared_memory.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
19 
20 #include <fcntl.h>
21 #include <numa.h>
22 #include <numaif.h>
23 #include <unistd.h>
24 #include <valgrind.h> // just for RUNNING_ON_VALGRIND macro.
25 #include <sys/ipc.h>
26 #include <sys/shm.h>
27 #include <sys/types.h>
28 
29 #include <cstdio>
30 #include <cstring>
31 #include <fstream>
32 #include <iostream>
33 #include <sstream>
34 #include <string>
35 
36 #include "foedus/assert_nd.hpp"
39 #include "foedus/fs/filesystem.hpp"
41 
42 namespace foedus {
43 namespace memory {
44 
45 // Note, we can't use glog in this file because shared memory is used before glog is initialized.
46 
47 SharedMemory::SharedMemory(SharedMemory &&other) noexcept : block_(nullptr) {
48  *this = std::move(other);
49 }
51  release_block();
52  meta_path_ = other.meta_path_;
53  size_ = other.size_;
54  numa_node_ = other.numa_node_;
55  shmid_ = other.shmid_;
56  shmkey_ = other.shmkey_;
57  owner_pid_ = other.owner_pid_;
58  block_ = other.block_;
59  other.block_ = nullptr;
60  return *this;
61 }
62 
63 bool SharedMemory::is_owned() const {
64  return owner_pid_ != 0 && owner_pid_ == ::getpid();
65 }
66 
68  const std::string& meta_path,
69  uint64_t size,
70  int numa_node,
71  bool use_hugepages) {
72  release_block();
73 
74  if (size % (1ULL << 21) != 0) {
75  size = ((size >> 21) + 1ULL) << 21;
76  }
77 
78  // create a meta file. we must first create it then generate key.
79  // shmkey will change whenever we modify the file.
80  if (fs::exists(fs::Path(meta_path))) {
81  std::string msg = std::string("Shared memory meta file already exists:") + meta_path;
82  return ERROR_STACK_MSG(kErrorCodeSocShmAllocFailed, msg.c_str());
83  }
84  std::ofstream file(meta_path, std::ofstream::binary);
85  if (!file.is_open()) {
86  std::string msg = std::string("Failed to create shared memory meta file:") + meta_path;
87  return ERROR_STACK_MSG(kErrorCodeSocShmAllocFailed, msg.c_str());
88  }
89 
90  // randomly generate shmkey. We initially used ftok(), but it occasionally gives lots of
91  // conflicts for some reason, esp on aarch64. we just need some random number, so here
92  // we use pid and CPU cycle.
93  pid_t the_pid = ::getpid();
94  uint64_t key64 = debugging::get_rdtsc() ^ the_pid;
95  key_t the_key = (key64 >> 32) ^ key64;
96 
97  if (the_key == 0) {
98  // rdtsc and getpid not working??
99  std::string msg = std::string("Dubious shmkey");
100  return ERROR_STACK_MSG(kErrorCodeSocShmAllocFailed, msg.c_str());
101  }
102 
103  // Write out the size/node/shmkey of the shared memory in the meta file
104  file.write(reinterpret_cast<char*>(&size), sizeof(size));
105  file.write(reinterpret_cast<char*>(&numa_node), sizeof(numa_node));
106  file.write(reinterpret_cast<char*>(&the_key), sizeof(key_t));
107  file.flush();
108  file.close();
109 
110  size_ = size;
111  numa_node_ = numa_node;
112  owner_pid_ = the_pid;
113  meta_path_ = meta_path;
114  shmkey_ = the_key;
115 
116  // if this is running under valgrind, we have to avoid using hugepages due to a bug in valgrind.
117  // When we are running on valgrind, we don't care performance anyway. So shouldn't matter.
118  if (RUNNING_ON_VALGRIND) {
119  use_hugepages = false;
120  }
121  // see https://bugs.kde.org/show_bug.cgi?id=338995
122 
123  // Use libnuma's numa_set_preferred to initialize the NUMA node of the memory.
124  // This is the only way to control numa allocation for shared memory.
125  // mbind does nothing for shared memory.
126  ScopedNumaPreferred numa_scope(numa_node, true);
127 
128  shmid_ = ::shmget(
129  shmkey_,
130  size_,
131  IPC_CREAT | IPC_EXCL | S_IRUSR | S_IWUSR | (use_hugepages ? SHM_HUGETLB : 0));
132  if (shmid_ == -1) {
133  std::string msg = std::string("shmget() failed! size=") + std::to_string(size_)
134  + std::string(", os_error=") + assorted::os_error() + std::string(", meta_path=") + meta_path;
135  return ERROR_STACK_MSG(kErrorCodeSocShmAllocFailed, msg.c_str());
136  }
137 
138  block_ = reinterpret_cast<char*>(::shmat(shmid_, nullptr, 0));
139 
140  if (block_ == reinterpret_cast<void*>(-1)) {
141  ::shmctl(shmid_, IPC_RMID, nullptr); // first thing. release it! before everything else.
142  block_ = nullptr;
143  std::stringstream msg;
144  msg << "shmat alloc failed!" << *this << ", error=" << assorted::os_error();
145  release_block();
146  std::string str = msg.str();
147  return ERROR_STACK_MSG(kErrorCodeSocShmAllocFailed, str.c_str());
148  }
149 
150  std::memset(block_, 0, size_); // see class comment for why we do this immediately
151  // This memset takes a very long time due to the issue in linux kernel:
152  // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8382d914ebf72092aa15cdc2a5dcedb2daa0209d
153  // In linux 3.15 and later, this problem gets resolved and highly parallelizable.
154  return kRetOk;
155 }
156 
157 void SharedMemory::attach(const std::string& meta_path, bool use_hugepages) {
158  release_block();
159  if (!fs::exists(fs::Path(meta_path))) {
160  std::cerr << "Shared memory meta file does not exist:" << meta_path << std::endl;
161  return;
162  }
163  // the meta file contains the size of the shared memory
164  std::ifstream file(meta_path, std::ifstream::binary);
165  if (!file.is_open()) {
166  std::cerr << "Failed to open shared memory meta file:" << meta_path << std::endl;
167  return;
168  }
169  uint64_t shared_size = 0;
170  int numa_node = 0;
171  key_t the_key = 0;
172  file.read(reinterpret_cast<char*>(&shared_size), sizeof(shared_size));
173  file.read(reinterpret_cast<char*>(&numa_node), sizeof(numa_node));
174  file.read(reinterpret_cast<char*>(&the_key), sizeof(key_t));
175  file.close();
176 
177  // we always use hugepages, so it's at least 2MB
178  if (shared_size < (1ULL << 21)) {
179  std::cerr << "Failed to read size of shared memory from meta file:" << meta_path
180  << ". It looks like:" << shared_size << std::endl;
181  return;
182  }
183  if (the_key == 0) {
184  std::cerr << "Failed to read shmkey from meta file:" << meta_path << std::endl;
185  return;
186  }
187 
188  size_ = shared_size;
189  numa_node_ = numa_node;
190  meta_path_ = meta_path;
191  shmkey_ = the_key;
192  owner_pid_ = 0;
193 
194  if (RUNNING_ON_VALGRIND) {
195  use_hugepages = false;
196  }
197  shmid_ = ::shmget(shmkey_, size_, use_hugepages ? SHM_HUGETLB : 0);
198  if (shmid_ == -1) {
199  std::cerr << "shmget() attach failed! size=" << size_ << ", error=" << assorted::os_error()
200  << std::endl;
201  return;
202  }
203 
204  block_ = reinterpret_cast<char*>(::shmat(shmid_, nullptr, 0));
205  if (block_ == reinterpret_cast<void*>(-1)) {
206  block_ = nullptr;
207  std::cerr << "shmat attach failed!" << *this << ", error=" << assorted::os_error() << std::endl;
208  release_block();
209  return;
210  }
211 }
212 
214  if (block_ != nullptr && shmid_ != 0) {
215  // Some material says that Linux allows shmget even after shmctl(IPC_RMID), but it doesn't.
216  // It allows shmat() after shmctl(IPC_RMID), but not shmget().
217  // So we have to invoke IPC_RMID after all child processes acked.
218  ::shmctl(shmid_, IPC_RMID, nullptr);
219  }
220 }
221 
223  if (block_ != nullptr) {
224  // mark the memory to be reclaimed
225  if (is_owned()) {
227  }
228 
229  // Just detach it. as we already invoked shmctl(IPC_RMID) at beginning, linux will
230  // automatically release it once the reference count reaches zero.
231  int dt_ret = ::shmdt(block_);
232  if (dt_ret == -1) {
233  std::cerr << "shmdt() failed." << *this << ", error=" << assorted::os_error() << std::endl;
234  }
235 
236  block_ = nullptr;
237 
238  // clean up meta file.
239  if (is_owned()) {
240  std::remove(meta_path_.c_str());
241  }
242  }
243 }
244 
245 std::ostream& operator<<(std::ostream& o, const SharedMemory& v) {
246  o << "<SharedMemory>";
247  o << "<meta_path>" << v.get_meta_path() << "</meta_path>";
248  o << "<size>" << v.get_size() << "</size>";
249  o << "<owned>" << v.is_owned() << "</owned>";
250  o << "<owner_pid>" << v.get_owner_pid() << "</owner_pid>";
251  o << "<numa_node>" << v.get_numa_node() << "</numa_node>";
252  o << "<shmid>" << v.get_shmid() << "</shmid>";
253  o << "<shmkey>" << v.get_shmkey() << "</shmkey>";
254  o << "<address>" << reinterpret_cast<uintptr_t>(v.get_block()) << "</address>";
255  o << "</SharedMemory>";
256  return o;
257 }
258 
259 } // namespace memory
260 } // namespace foedus
261 
bool remove(const Path &p)
Deletes a regular file or an empty directory.
Definition: filesystem.cpp:132
bool is_owned() const
Returns if this process owns this memory and is responsible to delete it.
Automatically sets and resets numa_set_preferred().
Definition: memory_id.hpp:58
key_t get_shmkey() const
Returns the key of this shared memory.
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
char * get_block() const
Returns the memory block.
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81
Definitions of IDs in this package and a few related constant values.
Represents memory shared between processes.
int get_numa_node() const
Where the physical memory is allocated.
Analogue of boost::filesystem::path.
Definition: path.hpp:37
SharedMemory() noexcept
Empty constructor which allocates nothing.
uint64_t get_rdtsc()
Returns the current CPU cycle via x86 RDTSC.
Definition: rdtsc.hpp:35
bool exists(const Path &p)
Returns if the file exists.
Definition: filesystem.hpp:128
std::ostream & operator<<(std::ostream &o, const AlignedMemory &v)
void attach(const std::string &meta_path, bool use_hugepages)
Attach an already-allocated shared memory so that this object points to the memory.
std::string os_error()
Thread-safe strerror(errno).
const ErrorStack kRetOk
Normal return value for no-error case.
const std::string & get_meta_path() const
Returns the path of the meta file.
Implements an RDTSC (Real-time time stamp counter) wait to emulate latency on slower devices...
int get_shmid() const
Returns the ID of this shared memory.
#define ERROR_STACK_MSG(e, m)
Overload of ERROR_STACK(e) to receive a custom error message.
0x0C01 : "SOC : Failed to allocate a shared memory. This is usually caused by a misconfigured envi...
Definition: error_code.hpp:220
void mark_for_release()
Marks the shared memory as being removed so that it will be reclaimed when all processes detach it...
SharedMemory & operator=(const SharedMemory &other)=delete
ErrorStack alloc(const std::string &meta_path, uint64_t size, int numa_node, bool use_hugepages)
Newly allocate a shared memory of given size on given NUMA node.
pid_t get_owner_pid() const
If non-zero, it means the ID of the process that allocated the shared memory.
uint64_t get_size() const
Returns the byte size of the memory block.
void release_block()
Releases the memory block IF this process has an ownership.