libfoedus-core
FOEDUS Core Library
masstree_adopt_impl.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
19 
20 #include <glog/logging.h>
21 
22 #include "foedus/assert_nd.hpp"
26 #include "foedus/thread/thread.hpp"
27 
28 namespace foedus {
29 namespace storage {
30 namespace masstree {
31 
35  ASSERT_ND(old_->is_moved()); // this is guaranteed because these flag are immutable once set.
36 
37  // Lock pages in one shot.
38  Page* pages[2];
39  pages[0] = reinterpret_cast<Page*>(parent_);
40  pages[1] = reinterpret_cast<Page*>(old_);
41  CHECK_ERROR_CODE(context_->sysxct_batch_page_locks(sysxct_workspace, 2, pages));
42 
43  // After the above lock, we check status of the pages
44  if (parent_->is_moved()) {
45  VLOG(0) << "Interesting. concurrent thread has already split this node?";
46  return kErrorCodeOk;
47  } else if (old_->is_retired()) {
48  VLOG(0) << "Interesting. concurrent thread already adopted.";
49  return kErrorCodeOk;
50  }
51 
52  const KeySlice searching_slice = old_->get_low_fence();
53  const auto minipage_index = parent_->find_minipage(searching_slice);
54  auto& minipage = parent_->get_minipage(minipage_index);
55  const auto pointer_index = minipage.find_pointer(searching_slice);
56  ASSERT_ND(minipage.key_count_ <= kMaxIntermediateMiniSeparators);
57  ASSERT_ND(old_->get_volatile_page_id() == minipage.pointers_[pointer_index].volatile_pointer_);
58 
59  // Now, how do we accommodate the new pointer?
62  // Case A. One of the grandchildrens is empty-range
63  CHECK_ERROR_CODE(adopt_case_a(minipage_index, pointer_index));
64  } else {
65  // Case B. More complex, normal case
66  CHECK_ERROR_CODE(adopt_case_b(minipage_index, pointer_index));
67  }
68 
70  return kErrorCodeOk;
71 }
72 
74  uint16_t minipage_index,
75  uint16_t pointer_index) {
76  VLOG(0) << "Adopting from a child page that contains an empty-range page. This happens when"
77  << " record compaction/expansion created a page without a record.";
78 
80  ASSERT_ND(grandchild_minor->get_low_fence() == old_->get_low_fence());
81  ASSERT_ND(grandchild_minor->get_high_fence() == old_->get_foster_fence());
83  ASSERT_ND(grandchild_major->get_low_fence() == old_->get_foster_fence());
84  ASSERT_ND(grandchild_major->get_high_fence() == old_->get_high_fence());
85  ASSERT_ND(!grandchild_minor->header().snapshot_);
86  ASSERT_ND(!grandchild_major->header().snapshot_);
87 
88  ASSERT_ND(grandchild_minor->is_empty_range() != grandchild_major->is_empty_range());
89 
90  VolatilePagePointer nonempty_grandchild_pointer;
91  MasstreePage* empty_grandchild = nullptr;
92  MasstreePage* nonempty_grandchild = nullptr;
93  if (grandchild_minor->is_empty_range()) {
94  empty_grandchild = grandchild_minor;
95  nonempty_grandchild = grandchild_major;
96  } else {
97  empty_grandchild = grandchild_major;
98  nonempty_grandchild = grandchild_minor;
99  }
100 
101  auto& minipage = parent_->get_minipage(minipage_index);
102  ASSERT_ND(old_->get_volatile_page_id() == minipage.pointers_[pointer_index].volatile_pointer_);
103  minipage.pointers_[pointer_index].snapshot_pointer_ = 0;
104  minipage.pointers_[pointer_index].volatile_pointer_ = nonempty_grandchild->get_volatile_page_id();
105 
106  // The only thread that might be retiring this empty page must be in this function,
107  // holding a page-lock in scope_child. Thus we don't need a lock in empty_grandchild.
108  ASSERT_ND(!empty_grandchild->is_locked()); // none else holding lock on it
109  // and we can safely retire the page. We do not use set_retired because is_moved() is false
110  // It's a special retirement path.
113  old_->set_retired();
115  return kErrorCodeOk;
116 }
117 
119  uint16_t minipage_index,
120  uint16_t pointer_index) {
121  const auto key_count = parent_->get_key_count();
122  const KeySlice new_separator = old_->get_foster_fence();
123  const VolatilePagePointer minor_pointer = old_->get_foster_minor();
124  const VolatilePagePointer major_pointer = old_->get_foster_major();
125  auto& minipage = parent_->get_minipage(minipage_index);
126  ASSERT_ND(old_->get_volatile_page_id() == minipage.pointers_[pointer_index].volatile_pointer_);
127 
128  // Can we simply append the new pointer either as a new minipage at the end of this page
129  // or as a new separator at the end of a minipage? Then we don't need major change.
130  // We simply append without any change, which is easy to make right.
131  if (minipage.key_count_ == pointer_index) {
132  if (minipage.key_count_ < kMaxIntermediateMiniSeparators) {
133  // We can append a new separator at the end of the minipage.
134  ASSERT_ND(!minipage.pointers_[pointer_index].is_both_null());
135  ASSERT_ND(pointer_index == 0 || minipage.separators_[pointer_index - 1] < new_separator);
136  minipage.separators_[pointer_index] = new_separator;
137  minipage.pointers_[pointer_index + 1].snapshot_pointer_ = 0;
138  minipage.pointers_[pointer_index + 1].volatile_pointer_ = major_pointer;
139 
140  // we don't have to adopt the foster-minor because that's the child page itself,
141  // but we have to switch the pointer
142  minipage.pointers_[pointer_index].snapshot_pointer_ = 0;
143  minipage.pointers_[pointer_index].volatile_pointer_ = minor_pointer;
144 
145  // we increase key count after above, with fence, so that concurrent transactions
146  // never see an empty slot.
148  ++minipage.key_count_;
149  ASSERT_ND(minipage.key_count_ <= kMaxIntermediateMiniSeparators);
150 
151  old_->set_retired();
153  return kErrorCodeOk;
154  } else if (key_count == minipage_index && key_count < kMaxIntermediateSeparators) {
155  // The minipage is full.. and the minipage is the last one!
156  // We can add it as a new minipage
157  auto& new_minipage = parent_->get_minipage(minipage_index + 1);
158  new_minipage.key_count_ = 0;
159 
160 #ifndef NDEBUG
161  // for ease of debugging zero-out the page first (only data part). only for debug build.
162  for (uint8_t j = 0; j <= kMaxIntermediateMiniSeparators; ++j) {
164  new_minipage.separators_[j] = 0;
165  }
166  new_minipage.pointers_[j].snapshot_pointer_ = 0;
167  new_minipage.pointers_[j].volatile_pointer_.word = 0;
168  }
169 #endif // NDEBUG
170 
171  ASSERT_ND(new_minipage.key_count_ == 0);
172  new_minipage.pointers_[0].snapshot_pointer_ = 0;
173  new_minipage.pointers_[0].volatile_pointer_ = major_pointer;
174 
175  // also handle foster-twin if it's border page
176  DualPagePointer& old_pointer = minipage.pointers_[minipage.key_count_];
177  old_pointer.snapshot_pointer_ = 0;
178  old_pointer.volatile_pointer_ = minor_pointer;
179 
180  parent_->set_separator(minipage_index, new_separator);
181 
182  // increment key count after all with fence so that concurrent transactions never see
183  // a minipage that is not ready for read
186  ASSERT_ND(parent_->get_key_count() == minipage_index + 1);
187 
188  old_->set_retired();
190  return kErrorCodeOk;
191  }
192  }
193 
194  // In all other cases, we split this page.
195  // We initially had more complex code to do in-page rebalance and
196  // in-minipage "shifting", but got some bugs. Pulled out too many hairs.
197  // Let's keep it simple. If we really observe bottleneck here, we can reconsider.
198  // Splitting is way more robust because it changes nothing in this existing page.
199  // It just places new foster-twin pointers.
200 
201  // Reuse SplitIntermediate. We are directly invoking the sysxct's internal logic
202  // rather than nesting sysxct.
203  memory::PagePoolOffset offsets[2];
204  thread::GrabFreeVolatilePagesScope free_pages_scope(context_, offsets);
205  CHECK_ERROR_CODE(free_pages_scope.grab(2));
207  split.split_impl_no_error(&free_pages_scope);
208  ASSERT_ND(old_->is_retired()); // the above internally retires old page
209  return kErrorCodeOk;
210 }
211 
212 } // namespace masstree
213 } // namespace storage
214 } // namespace foedus
SlotIndex get_key_count() const __attribute__((always_inline))
physical key count (those keys might be deleted) in this page.
Represents a pointer to another page (usually a child page).
Definition: storage_id.hpp:271
ErrorCode adopt_case_b(uint16_t minipage_index, uint16_t pointer_index)
bool is_empty_range() const __attribute__((always_inline))
An empty-range page, either intermediate or border, never has any entries.
ErrorCode sysxct_batch_page_locks(xct::SysxctWorkspace *sysxct_workspace, uint32_t lock_count, storage::Page **pages)
Takes a bunch of page locks for a sysxct running under this thread.
bool is_locked() const __attribute__((always_inline))
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
bool is_moved() const __attribute__((always_inline))
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
Definition: memory_id.hpp:44
void collect_retired_volatile_page(storage::VolatilePagePointer ptr)
Keeps the specified volatile page as retired as of the current epoch.
Definition: thread.cpp:113
MasstreePage *const old_
The old page that was split, whose foster-twins are being adopted.
Represents a pointer to a volatile page with modification count for preventing ABA.
Definition: storage_id.hpp:194
MiniPage & get_minipage(uint8_t index) __attribute__((always_inline))
uint64_t KeySlice
Each key slice is an 8-byte integer.
const uint16_t kMaxIntermediateMiniSeparators
Max number of separators stored in the second level of intermediate pages.
Common base of MasstreeIntermediatePage and MasstreeBorderPage.
VolatilePagePointer get_foster_minor() const __attribute__((always_inline))
bool is_retired() const __attribute__((always_inline))
MasstreeIntermediatePage *const parent_
The parent page that currently points to the old page.
VolatilePagePointer get_foster_major() const __attribute__((always_inline))
ErrorCode grab(uint32_t count)
If this thread doesn't have enough free pages, no page is obtained, returning kErrorCodeMemoryNoFreeP...
Definition: thread.cpp:147
VolatilePagePointer volatile_pointer_
Definition: storage_id.hpp:308
0 means no-error.
Definition: error_code.hpp:87
ErrorCode adopt_case_a(uint16_t minipage_index, uint16_t pointer_index)
const uint16_t kMaxIntermediateSeparators
Max number of separators stored in the first level of intermediate pages.
thread::Thread *const context_
Thread context.
SnapshotPagePointer snapshot_pointer_
Definition: storage_id.hpp:307
void increment_key_count() __attribute__((always_inline))
A system transaction to split an intermediate page in Master-Tree.
Just a marker to denote that the memory region represents a data page.
Definition: page.hpp:334
void set_retired() __attribute__((always_inline))
KeySlice get_high_fence() const __attribute__((always_inline))
P * resolve_cast(storage::VolatilePagePointer ptr) const
resolve() plus reinterpret_cast
Definition: thread.hpp:110
void set_separator(uint8_t minipage_index, KeySlice new_separator)
Place a new separator for a new minipage.
KeySlice get_low_fence() const __attribute__((always_inline))
KeySlice get_foster_fence() const __attribute__((always_inline))
#define CHECK_ERROR_CODE(x)
This macro calls x and checks its returned error code.
Definition: error_code.hpp:155
const PageVersion * get_version_address() const __attribute__((always_inline))
VolatilePagePointer get_volatile_page_id() const
PageVersionStatus status_
Definition: page.hpp:172
bool snapshot_
Whether this page image is of a snapshot page.
Definition: page.hpp:211
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
void memory_fence_release()
Equivalent to std::atomic_thread_fence(std::memory_order_release).
Obtains multiple free volatile pages at once and releases them automatically when this object gets ou...
Definition: thread.hpp:391
ErrorCode
Enum of error codes defined in error_code.xmacro.
Definition: error_code.hpp:85
uint8_t find_minipage(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of the mini pages in this page.
Per-thread reused work memory for system transactions.
uint8_t find_pointer(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of pointers in this mini-page.
virtual ErrorCode run(xct::SysxctWorkspace *sysxct_workspace) override
Execute the system transaction.