libfoedus-core
FOEDUS Core Library
masstree_page_impl.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
18 #ifndef FOEDUS_STORAGE_MASSTREE_MASSTREE_PAGE_IMPL_HPP_
19 #define FOEDUS_STORAGE_MASSTREE_MASSTREE_PAGE_IMPL_HPP_
20 
21 #include <stdint.h>
22 
23 #include <algorithm>
24 #include <cstring>
25 #include <iosfwd>
26 
27 #include "foedus/assert_nd.hpp"
28 #include "foedus/compiler.hpp"
29 #include "foedus/epoch.hpp"
30 #include "foedus/error_code.hpp"
31 #include "foedus/fwd.hpp"
34 #include "foedus/memory/fwd.hpp"
35 #include "foedus/storage/page.hpp"
40 #include "foedus/thread/fwd.hpp"
41 #include "foedus/xct/fwd.hpp"
42 #include "foedus/xct/xct_id.hpp"
43 
44 namespace foedus {
45 namespace storage {
46 namespace masstree {
47 
50 
52 struct HighFence {
53  HighFence(KeySlice slice, bool supremum) : slice_(slice), supremum_(supremum) {}
55  bool supremum_;
56 };
57 
66 class MasstreePage {
67  public:
68  MasstreePage() = delete;
69  MasstreePage(const MasstreePage& other) = delete;
70  MasstreePage& operator=(const MasstreePage& other) = delete;
71 
72  // simple accessors
73  PageHeader& header() { return header_; }
74  const PageHeader& header() const { return header_; }
78  }
81  return static_cast<SnapshotPagePointer>(header_.page_id_);
82  }
83 
84  bool is_border() const ALWAYS_INLINE {
88  }
97  bool is_empty_range() const ALWAYS_INLINE { return low_fence_ == high_fence_; }
98  KeySlice get_low_fence() const ALWAYS_INLINE { return low_fence_; }
99  KeySlice get_high_fence() const ALWAYS_INLINE { return high_fence_; }
100  bool is_high_fence_supremum() const ALWAYS_INLINE {
101  return high_fence_ == kSupremumSlice;
102  }
103  bool is_low_fence_infimum() const ALWAYS_INLINE {
104  return low_fence_ == kInfimumSlice;
105  }
106  bool is_layer_root() const ALWAYS_INLINE {
108  }
109  KeySlice get_foster_fence() const ALWAYS_INLINE { return foster_fence_; }
110  bool is_foster_minor_null() const ALWAYS_INLINE { return foster_twin_[0].is_null(); }
111  bool is_foster_major_null() const ALWAYS_INLINE { return foster_twin_[1].is_null(); }
112  VolatilePagePointer get_foster_minor() const ALWAYS_INLINE { return foster_twin_[0]; }
113  VolatilePagePointer get_foster_major() const ALWAYS_INLINE { return foster_twin_[1]; }
115  foster_twin_[0] = minor;
116  foster_twin_[1] = major;
117  }
119  VolatilePagePointer minor,
120  VolatilePagePointer major,
121  KeySlice foster_fence) {
122  set_foster_twin(minor, major);
123  foster_fence_ = foster_fence;
124  }
125 
126  bool within_fences(KeySlice slice) const ALWAYS_INLINE {
127  return slice >= low_fence_ && (is_high_fence_supremum() || slice < high_fence_);
128  }
129  bool within_foster_minor(KeySlice slice) const ALWAYS_INLINE {
130  ASSERT_ND(within_fences(slice));
132  return slice < foster_fence_;
133  }
134  bool within_foster_major(KeySlice slice) const ALWAYS_INLINE {
135  ASSERT_ND(within_fences(slice));
137  return slice >= foster_fence_;
138  }
139  bool has_foster_child() const ALWAYS_INLINE {
140  return header_.page_version_.is_moved();
141  }
142 
144  uint8_t get_layer() const ALWAYS_INLINE { return header_.masstree_layer_; }
146  uint8_t get_btree_level() const ALWAYS_INLINE { return header_.masstree_in_layer_level_; }
148  SlotIndex get_key_count() const ALWAYS_INLINE { return header_.key_count_; }
149  void set_key_count(SlotIndex count) ALWAYS_INLINE { header_.set_key_count(count); }
150  void increment_key_count() ALWAYS_INLINE { header_.increment_key_count(); }
151 
158  void prefetch_general() const ALWAYS_INLINE {
159  assorted::prefetch_cachelines(this, 4); // max(border's prefetch, interior's prefetch)
160  }
161 
162  const PageVersion& get_version() const ALWAYS_INLINE { return header_.page_version_; }
163  PageVersion& get_version() ALWAYS_INLINE { return header_.page_version_; }
164  const PageVersion* get_version_address() const ALWAYS_INLINE { return &header_.page_version_; }
165  PageVersion* get_version_address() ALWAYS_INLINE { return &header_.page_version_; }
167 
168  bool is_locked() const ALWAYS_INLINE { return header_.page_version_.is_locked(); }
169  bool is_moved() const ALWAYS_INLINE { return header_.page_version_.is_moved(); }
170  bool is_retired() const ALWAYS_INLINE { return header_.page_version_.is_retired(); }
171  void set_moved() ALWAYS_INLINE { header_.page_version_.set_moved(); }
172  void set_retired() ALWAYS_INLINE { header_.page_version_.set_retired(); }
173 
175  const memory::GlobalVolatilePageResolver& page_resolver,
176  memory::PageReleaseBatch* batch);
177 
178 
182  foster_twin_[1].set_offset_unsafe(offset);
183  }
185  void set_high_fence_unsafe(KeySlice high_fence) ALWAYS_INLINE {
187  high_fence_ = high_fence;
188  }
189 
191  friend std::ostream& operator<<(std::ostream& o, const MasstreePage& v);
192 
193  protected:
194  PageHeader header_; // +40 -> 40
195 
197  KeySlice low_fence_; // +8 -> 48
199  KeySlice high_fence_; // +8 -> 56
201  KeySlice foster_fence_; // +8 -> 64
202 
212 
214  StorageId storage_id,
215  VolatilePagePointer page_id,
216  PageType page_type,
217  uint8_t layer,
218  uint8_t level,
219  KeySlice low_fence,
220  KeySlice high_fence);
221 
223  StorageId storage_id,
224  SnapshotPagePointer page_id,
225  PageType page_type,
226  uint8_t layer,
227  uint8_t level,
228  KeySlice low_fence,
229  KeySlice high_fence);
230 };
231 
237 const uint16_t kMaxIntermediateSeparators = 9U;
238 
244 const uint16_t kMaxIntermediateMiniSeparators = 15U;
245 
250 const uint32_t kMaxIntermediatePointers
251  = (kMaxIntermediateSeparators + 1U) * (kMaxIntermediateMiniSeparators + 1U);
252 
263  public:
264  friend struct SplitIntermediate;
265  struct MiniPage {
266  MiniPage() = delete;
267  MiniPage(const MiniPage& other) = delete;
268  MiniPage& operator=(const MiniPage& other) = delete;
269 
270  // +8 -> 8
271  uint8_t key_count_;
272  uint8_t reserved_[7];
273 
274  // +8*15 -> 128
277  // +16*16 -> 384
278  DualPagePointer pointers_[kMaxIntermediateMiniSeparators + 1];
279 
281  void prefetch() const {
283  }
287  uint8_t find_pointer(KeySlice slice) const ALWAYS_INLINE {
288  uint8_t key_count = key_count_;
289  ASSERT_ND(key_count <= kMaxIntermediateMiniSeparators);
290  for (uint8_t i = 0; i < key_count; ++i) {
291  if (slice < separators_[i]) {
292  return i;
293  }
294  }
295  return key_count;
296  }
297  };
298 
299  // A page object is never explicitly instantiated. You must reinterpret_cast.
300  MasstreeIntermediatePage() = delete;
301  MasstreeIntermediatePage(const MasstreeIntermediatePage& other) = delete;
303 
305  void prefetch() const {
307  }
308 
312  uint8_t find_minipage(KeySlice slice) const ALWAYS_INLINE {
313  uint8_t key_count = get_key_count();
314  ASSERT_ND(key_count <= kMaxIntermediateSeparators);
315  for (uint8_t i = 0; i < key_count; ++i) {
316  if (slice < separators_[i]) {
317  return i;
318  }
319  }
320  return key_count;
321  }
322  MiniPage& get_minipage(uint8_t index) ALWAYS_INLINE { return mini_pages_[index]; }
323  const MiniPage& get_minipage(uint8_t index) const ALWAYS_INLINE { return mini_pages_[index]; }
324  KeySlice get_separator(uint8_t index) const ALWAYS_INLINE { return separators_[index]; }
325 
333  const memory::GlobalVolatilePageResolver& page_resolver,
334  memory::PageReleaseBatch* batch);
335 
337  StorageId storage_id,
338  VolatilePagePointer page_id,
339  uint8_t layer,
340  uint8_t level,
341  KeySlice low_fence,
342  KeySlice high_fence);
344  StorageId storage_id,
345  SnapshotPagePointer page_id,
346  uint8_t layer,
347  uint8_t level,
348  KeySlice low_fence,
349  KeySlice high_fence);
350 
356  void append_pointer_snapshot(KeySlice low_fence, SnapshotPagePointer pointer);
363  void append_minipage_snapshot(KeySlice low_fence, SnapshotPagePointer pointer);
365  bool is_full_snapshot() const;
368  uint8_t index,
369  uint8_t index_mini,
370  KeySlice* separator_low,
371  KeySlice* separator_high) const {
373  extract_separators_common(index, index_mini, separator_low, separator_high);
374  }
380  uint8_t index,
381  uint8_t index_mini,
382  KeySlice* separator_low,
383  KeySlice* separator_high) const {
385  extract_separators_common(index, index_mini, separator_low, separator_high);
386  }
389  uint8_t index,
390  uint8_t index_mini,
391  KeySlice* separator_low,
392  KeySlice* separator_high) const;
393 
394  void verify_separators() const;
395 
397  void set_separator(uint8_t minipage_index, KeySlice new_separator) {
398  separators_[minipage_index] = new_separator;
399  }
400 
402  friend std::ostream& operator<<(std::ostream& o, const MasstreeIntermediatePage& v);
403 
404  private:
405  // 80
406 
413  KeySlice separators_[kMaxIntermediateSeparators]; // +72 -> 152
414 
415  char reserved_[104]; // -> 256
416 
417  MiniPage mini_pages_[10]; // +384 * 10 -> 4096
418 };
419 
444 class MasstreeBorderPage final : public MasstreePage {
445  public:
446  friend struct SplitBorder;
466  struct SlotLengthPart {
472  DataOffset offset_; // +2 -> 2
480  uint16_t unused_; // +2 -> 6
488  };
491  uint64_t word;
493  };
494 
500  struct Slot {
505 
511 
513 
538  char filler_[2]; // +2 -> 32
539 
541  Slot() = delete;
542  Slot(const Slot&) = delete;
543  Slot& operator=(const Slot&) = delete;
544  ~Slot() = delete;
545 
546  inline KeyLength get_aligned_suffix_length() const ALWAYS_INLINE {
547  return calculate_suffix_length_aligned(remainder_length_);
548  }
549  inline KeyLength get_suffix_length() const ALWAYS_INLINE {
550  return calculate_suffix_length(remainder_length_);
551  }
553  inline PayloadLength get_max_payload_peek() const ALWAYS_INLINE {
555  }
557  inline PayloadLength get_max_payload_stable(SlotLengthPart stable_length) const ALWAYS_INLINE {
558  return stable_length.physical_record_length_ - get_aligned_suffix_length();
559  }
560  inline bool does_point_to_layer() const ALWAYS_INLINE {
561  return tid_.xct_id_.is_next_layer();
562  }
563 
568  inline SlotLengthPart read_lengthes_oneshot() const ALWAYS_INLINE {
569  SlotLengthUnion ret;
570  ret.word = lengthes_.word;
571  return ret.components;
572  }
577  inline void write_lengthes_oneshot(SlotLengthPart new_value) ALWAYS_INLINE {
578  SlotLengthUnion tmp;
579  tmp.components = new_value;
580  lengthes_.word = tmp.word;
581  }
582  };
583 
585  enum MatchType {
590  };
591 
595  : index_(index), match_type_(match_type) {}
598  };
599 
600  // A page object is never explicitly instantiated. You must reinterpret_cast.
601  MasstreeBorderPage() = delete;
602  MasstreeBorderPage(const MasstreeBorderPage& other) = delete;
603  MasstreeBorderPage& operator=(const MasstreeBorderPage& other) = delete;
604 
606  StorageId storage_id,
607  VolatilePagePointer page_id,
608  uint8_t layer,
609  KeySlice low_fence,
610  KeySlice high_fence);
612  StorageId storage_id,
613  SnapshotPagePointer page_id,
614  uint8_t layer,
615  KeySlice low_fence,
616  KeySlice high_fence);
617 
623  bool is_consecutive_inserts() const { return consecutive_inserts_; }
624 
625  DataOffset get_next_offset() const { return next_offset_; }
627  next_offset_ += length;
628  ASSERT_ND(next_offset_ <= sizeof(data_));
629  }
630 
631  inline const Slot* get_slot(SlotIndex index) const ALWAYS_INLINE {
632  ASSERT_ND(index < get_key_count());
634  return reinterpret_cast<const Slot*>(this + 1) - index - 1;
635  }
636 
637  inline Slot* get_slot(SlotIndex index) ALWAYS_INLINE {
638  ASSERT_ND(index < get_key_count());
640  return reinterpret_cast<Slot*>(this + 1) - index - 1;
641  }
642 
643  inline Slot* get_new_slot(SlotIndex index) ALWAYS_INLINE {
644  ASSERT_ND(index == get_key_count());
646  return reinterpret_cast<Slot*>(this + 1) - index - 1;
647  }
648 
649  inline SlotIndex to_slot_index(const Slot* slot) const ALWAYS_INLINE {
650  ASSERT_ND(slot);
651  int64_t index = reinterpret_cast<const Slot*>(this + 1) - slot - 1;
652  ASSERT_ND(index >= 0);
653  ASSERT_ND(index < static_cast<int64_t>(sizeof(data_) / sizeof(Slot)));
654  return static_cast<SlotIndex>(index);
655  }
656 
657  // methods about available/required spaces
658 
660  inline DataOffset available_space() const {
661  uint16_t consumed = next_offset_ + get_key_count() * sizeof(Slot);
662  ASSERT_ND(consumed <= sizeof(data_));
663  if (consumed > sizeof(data_)) { // just to be conservative on release build
664  return 0;
665  }
666  return sizeof(data_) - consumed;
667  }
668 
675  KeyLength remainder_length,
676  PayloadLength payload_length) {
677  KeyLength suffix_length_aligned = calculate_suffix_length_aligned(remainder_length);
678  return suffix_length_aligned + assorted::align8(payload_length);
679  }
680 
688  KeyLength remainder_length,
689  PayloadLength payload_length) {
690  return to_record_length(remainder_length, payload_length) + sizeof(Slot);
691  }
692 
698  KeySlice slice,
699  const void* suffix,
700  KeyLength remainder) const ALWAYS_INLINE;
701 
707  SlotIndex from_index,
708  SlotIndex to_index,
709  KeySlice slice) const ALWAYS_INLINE;
710 
711 
715  FindKeyForReserveResult find_key_for_reserve(
716  SlotIndex from_index,
717  SlotIndex to_index,
718  KeySlice slice,
719  const void* suffix,
720  KeyLength remainder) const ALWAYS_INLINE;
727  FindKeyForReserveResult find_key_for_snapshot(
728  KeySlice slice,
729  const void* suffix,
730  KeyLength remainder) const ALWAYS_INLINE;
731 
732  char* get_record(SlotIndex index) ALWAYS_INLINE {
735  }
736  const char* get_record(SlotIndex index) const ALWAYS_INLINE {
739  }
740  char* get_record_payload(SlotIndex index) ALWAYS_INLINE {
741  char* record = get_record(index);
742  KeyLength skipped = get_suffix_length_aligned(index);
743  return record + skipped;
744  }
745  const char* get_record_payload(SlotIndex index) const ALWAYS_INLINE {
746  const char* record = get_record(index);
747  KeyLength skipped = get_suffix_length_aligned(index);
748  return record + skipped;
749  }
750  DualPagePointer* get_next_layer(SlotIndex index) ALWAYS_INLINE {
751  return reinterpret_cast<DualPagePointer*>(get_record_payload(index));
752  }
753  const DualPagePointer* get_next_layer(SlotIndex index) const ALWAYS_INLINE {
754  return reinterpret_cast<const DualPagePointer*>(get_record_payload(index));
755  }
756 
758  char* get_record_from_offset(DataOffset record_offset) ALWAYS_INLINE {
759  ASSERT_ND(record_offset % 8 == 0);
760  ASSERT_ND(record_offset < sizeof(data_));
761  return data_ + record_offset;
762  }
763  const char* get_record_from_offset(DataOffset record_offset) const ALWAYS_INLINE {
764  ASSERT_ND(record_offset % 8 == 0);
765  ASSERT_ND(record_offset < sizeof(data_));
766  return data_ + record_offset;
767  }
769  DataOffset record_offset,
770  KeyLength remainder_length) const ALWAYS_INLINE {
771  const char* record = get_record_from_offset(record_offset);
772  KeyLength skipped = calculate_suffix_length_aligned(remainder_length);
773  return record + skipped;
774  }
776  DataOffset record_offset,
777  KeyLength remainder_length) ALWAYS_INLINE {
778  char* record = get_record_from_offset(record_offset);
779  KeyLength skipped = calculate_suffix_length_aligned(remainder_length);
780  return record + skipped;
781  }
783  DataOffset record_offset,
784  KeyLength remainder_length) ALWAYS_INLINE {
785  char* payload = get_record_payload_from_offsets(record_offset, remainder_length);
786  return reinterpret_cast<DualPagePointer*>(payload);
787  }
789  DataOffset record_offset,
790  KeyLength remainder_length) const ALWAYS_INLINE {
791  const char* payload = get_record_payload_from_offsets(record_offset, remainder_length);
792  return reinterpret_cast<const DualPagePointer*>(payload);
793  }
794 
795 
796  bool does_point_to_layer(SlotIndex index) const ALWAYS_INLINE {
798  return get_slot(index)->tid_.xct_id_.is_next_layer();
799  }
800 
801  KeySlice get_slice(SlotIndex index) const ALWAYS_INLINE {
803  return slices_[index];
804  }
805  void set_slice(SlotIndex index, KeySlice slice) ALWAYS_INLINE {
807  slices_[index] = slice;
808  }
809  DataOffset get_offset_in_bytes(SlotIndex index) const ALWAYS_INLINE {
810  return get_slot(index)->lengthes_.components.offset_;
811  }
812 
814  return &get_slot(index)->tid_;
815  }
816  const xct::RwLockableXctId* get_owner_id(SlotIndex index) const ALWAYS_INLINE {
817  return &get_slot(index)->tid_;
818  }
819 
820  KeyLength get_remainder_length(SlotIndex index) const ALWAYS_INLINE {
821  return get_slot(index)->remainder_length_;
822  }
823  KeyLength get_suffix_length(SlotIndex index) const ALWAYS_INLINE {
824  const KeyLength remainder_length = get_remainder_length(index);
825  return calculate_suffix_length(remainder_length);
826  }
827  KeyLength get_suffix_length_aligned(SlotIndex index) const ALWAYS_INLINE {
828  const KeyLength remainder_length = get_remainder_length(index);
829  return calculate_suffix_length_aligned(remainder_length);
830  }
832  PayloadLength get_payload_length(SlotIndex index) const ALWAYS_INLINE {
834  }
838  PayloadLength get_max_payload_length(SlotIndex index) const ALWAYS_INLINE {
839  return get_slot(index)->get_max_payload_peek();
840  }
841 
842  bool can_accomodate(
843  SlotIndex new_index,
844  KeyLength remainder_length,
845  PayloadLength payload_count) const ALWAYS_INLINE;
854  KeyLength remainder_length,
855  PayloadLength payload_count) const ALWAYS_INLINE;
857  bool compare_key(
858  SlotIndex index,
859  const void* be_key,
860  KeyLength key_length) const ALWAYS_INLINE;
862  bool equal_key(SlotIndex index, const void* be_key, KeyLength key_length) const ALWAYS_INLINE;
863 
865  int ltgt_key(SlotIndex index, const char* be_key, KeyLength key_length) const ALWAYS_INLINE;
867  int ltgt_key(
868  SlotIndex index,
869  KeySlice slice,
870  const char* suffix,
871  KeyLength remainder) const ALWAYS_INLINE;
876  bool will_conflict(SlotIndex index, const char* be_key, KeyLength key_length) const ALWAYS_INLINE;
878  bool will_conflict(SlotIndex index, KeySlice slice, KeyLength remainder) const ALWAYS_INLINE;
881  SlotIndex index,
882  const char* be_key,
883  KeyLength key_length) const ALWAYS_INLINE;
885  SlotIndex index,
886  KeySlice slice,
887  KeyLength remainder) const ALWAYS_INLINE;
888 
895  SlotIndex index,
896  xct::XctId initial_owner_id,
897  KeySlice slice,
898  const void* suffix,
899  KeyLength remainder_length,
900  PayloadLength payload_count);
901 
904  SlotIndex index,
905  xct::XctId initial_owner_id,
906  KeySlice slice,
907  const DualPagePointer& pointer);
908 
913  xct::XctId initial_owner_id,
914  KeySlice slice,
915  SnapshotPagePointer pointer);
921 
927  void initialize_layer_root(const MasstreeBorderPage* copy_from, SlotIndex copy_index);
928 
930  const memory::GlobalVolatilePageResolver& page_resolver,
931  memory::PageReleaseBatch* batch);
932 
934  void prefetch() const ALWAYS_INLINE {
936  }
937  void prefetch_additional_if_needed(SlotIndex key_count) const ALWAYS_INLINE {
938  const uint32_t kInitialPrefetchBytes = 4U * assorted::kCachelineSize; // 256 bytes
939  const SlotIndex kInitiallyFetched
940  = (kInitialPrefetchBytes - kCommonPageHeaderSize - kBorderPageAdditionalHeaderSize)
941  / sizeof(KeySlice);
942  if (key_count > kInitiallyFetched) {
943  // we initially prefetched 64*4 = 256 bytes: header and 22 key slices.
944  // if we have more, prefetch now while we are still searching.
945  uint16_t cachelines = ((key_count - kInitiallyFetched) / sizeof(KeySlice)) + 1;
947  reinterpret_cast<const char*>(this) + kInitialPrefetchBytes,
948  cachelines);
949  }
950  }
951 
959  bool try_expand_record_in_page_physical(PayloadLength payload_count, SlotIndex record_index);
971  VolatilePagePointer page_id,
972  MasstreeBorderPage* parent,
973  SlotIndex parent_index);
974 
977  Engine* engine,
978  xct::RwLockableXctId* old_address,
979  xct::WriteXctAccess* write_set);
982  Engine* engine,
983  xct::RwLockableXctId* old_address);
984 
986  bool verify_slot_lengthes(SlotIndex index) const;
987 
988  void assert_entries() ALWAYS_INLINE {
989 #ifndef NDEBUG
991 #endif // NDEBUG
992  }
994  void assert_entries_impl() const;
996  friend std::ostream& operator<<(std::ostream& o, const MasstreeBorderPage& v);
997 
998  private:
999  // 80
1006  DataOffset next_offset_; // +2 -> 82
1012  bool consecutive_inserts_; // +1 -> 83
1013 
1015  char dummy_[5]; // +5 -> 88
1016 
1030  KeySlice slices_[kBorderPageMaxSlots];
1031 
1036  char data_[kBorderPageDataPartSize];
1037 
1038  MasstreeBorderPage* track_foster_child(
1039  KeySlice slice,
1040  const memory::GlobalVolatilePageResolver& resolver);
1041 };
1042 
1049  : page_(page), index_(0), index_mini_(0) {
1050  if (page->is_empty_range()) {
1051  // Empty-range page has zero pointers, which is special.
1052  ASSERT_ND(!page->header().snapshot_);
1053  index_ = 1; // so that initial is_valid returns false.
1054  }
1055  }
1056 
1057  void next() {
1058  if (!is_valid()) {
1059  return;
1060  }
1061  ++index_mini_;
1063  ++index_;
1064  index_mini_ = 0;
1065  }
1066  }
1067  bool is_valid() const {
1068  return index_ <= page_->get_key_count()
1070  }
1072  ASSERT_ND(is_valid());
1074  if (index_mini_ > 0) {
1075  return minipage.separators_[index_mini_ - 1];
1076  } else if (index_ > 0) {
1077  return page_->get_separator(index_ - 1);
1078  } else {
1079  return page_->get_low_fence();
1080  }
1081  }
1083  ASSERT_ND(is_valid());
1085  if (index_mini_ < minipage.key_count_) {
1086  return minipage.separators_[index_mini_];
1087  } else if (index_ < page_->get_key_count()) {
1088  return page_->get_separator(index_ );
1089  } else {
1090  return page_->get_high_fence();
1091  }
1092  }
1093  const DualPagePointer& get_pointer() const {
1094  ASSERT_ND(is_valid());
1096  return minipage.pointers_[index_mini_];
1097  }
1098 
1100  uint16_t index_;
1101  uint16_t index_mini_;
1102 };
1103 
1104 
1105 inline KeyLength calculate_suffix_length(KeyLength remainder_length) {
1106  if (remainder_length == kInitiallyNextLayer) {
1107  return 0;
1108  }
1109  ASSERT_ND(remainder_length <= kMaxKeyLength);
1110  if (remainder_length >= sizeof(KeySlice)) {
1111  return remainder_length - sizeof(KeySlice);
1112  } else {
1113  return 0;
1114  }
1115 }
1117  return assorted::align8(calculate_suffix_length(remainder_length));
1118 }
1119 
1121  KeySlice slice,
1122  const void* suffix,
1123  KeyLength remainder) const {
1124  SlotIndex key_count = get_key_count();
1125  ASSERT_ND(remainder <= kMaxKeyLength);
1126  ASSERT_ND(key_count <= kBorderPageMaxSlots);
1127  prefetch_additional_if_needed(key_count);
1128 
1129  // one slice might be used for up to 10 keys, length 0 to 8 and pointer to next layer.
1130  if (remainder <= sizeof(KeySlice)) {
1131  // then we are looking for length 0-8 only.
1132  for (SlotIndex i = 0; i < key_count; ++i) {
1133  const KeySlice rec_slice = get_slice(i);
1134  if (LIKELY(slice != rec_slice)) {
1135  continue;
1136  }
1137  // no suffix nor next layer, so just compare length. if not match, continue
1138  const KeyLength klen = get_remainder_length(i);
1139  if (klen == remainder) {
1140  return i;
1141  }
1142  }
1143  } else {
1144  // then we are only looking for length>8.
1145  for (SlotIndex i = 0; i < key_count; ++i) {
1146  const KeySlice rec_slice = get_slice(i);
1147  if (LIKELY(slice != rec_slice)) {
1148  continue;
1149  }
1150 
1151  if (does_point_to_layer(i)) {
1152  // as it points to next layer, no need to check suffix. We are sure this is it.
1153  // so far we don't delete layers, so in this case the record is always valid.
1154  return i;
1155  }
1156 
1157  // now, our key is > 8 bytes and we found some local record.
1158  const KeyLength klen = get_remainder_length(i);
1159  if (klen <= sizeof(KeySlice)) {
1160  continue; // length 0-8, surely not a match. skip.
1161  }
1162 
1163  if (klen == remainder) {
1164  // compare suffix.
1165  const char* record_suffix = get_record(i);
1166  if (std::memcmp(record_suffix, suffix, remainder - sizeof(KeySlice)) == 0) {
1167  return i;
1168  }
1169  }
1170 
1171  // The record has > 8 bytes key of the same slice. it must be the only such record in this
1172  // page because otherwise we must have created a next layer!
1173  break; // no more check needed
1174  }
1175  }
1176  return kBorderPageMaxSlots;
1177 }
1178 
1180  SlotIndex from_index,
1181  SlotIndex to_index,
1182  KeySlice slice) const {
1183  ASSERT_ND(to_index <= kBorderPageMaxSlots);
1184  ASSERT_ND(from_index <= to_index);
1185  if (from_index == 0) { // we don't need prefetching in second time
1187  }
1188  for (SlotIndex i = from_index; i < to_index; ++i) {
1189  const KeySlice rec_slice = get_slice(i);
1190  const KeyLength klen = get_remainder_length(i);
1191  if (UNLIKELY(slice == rec_slice && klen == sizeof(KeySlice))) {
1192  return i;
1193  }
1194  }
1195  return kBorderPageMaxSlots;
1196 }
1197 
1199  SlotIndex from_index,
1200  SlotIndex to_index,
1201  KeySlice slice,
1202  const void* suffix,
1203  KeyLength remainder) const {
1204  ASSERT_ND(to_index <= kBorderPageMaxSlots);
1205  ASSERT_ND(from_index <= to_index);
1206  ASSERT_ND(remainder <= kMaxKeyLength);
1207  if (from_index == 0) {
1209  }
1210  if (remainder <= sizeof(KeySlice)) {
1211  for (SlotIndex i = from_index; i < to_index; ++i) {
1212  const KeySlice rec_slice = get_slice(i);
1213  if (LIKELY(slice != rec_slice)) {
1214  continue;
1215  }
1216  const KeyLength klen = get_remainder_length(i);
1217  if (klen == remainder) {
1220  }
1221  }
1222  } else {
1223  for (SlotIndex i = from_index; i < to_index; ++i) {
1224  const KeySlice rec_slice = get_slice(i);
1225  if (LIKELY(slice != rec_slice)) {
1226  continue;
1227  }
1228 
1229  const bool next_layer = does_point_to_layer(i);
1230  const KeyLength klen = get_remainder_length(i);
1231 
1232  if (next_layer) {
1233  ASSERT_ND(klen > sizeof(KeySlice));
1235  } else if (klen <= sizeof(KeySlice)) {
1236  continue;
1237  }
1238 
1239  // now, both the searching key and this key are more than 8 bytes.
1240  // whether the key really matches or not, this IS the slot we are looking for.
1241  // Either 1) the keys really match, or 2) we will make this record point to next layer.
1242  const char* record_suffix = get_record(i);
1243  if (klen == remainder &&
1244  std::memcmp(record_suffix, suffix, remainder - sizeof(KeySlice)) == 0) {
1245  // case 1)
1247  } else {
1248  // case 2)
1250  }
1251  }
1252  }
1254 }
1255 
1257  KeySlice slice,
1258  const void* suffix,
1259  KeyLength remainder) const {
1261  ASSERT_ND(remainder <= kMaxKeyLength);
1262  // Remember, unlike other cases above, there are no worry on concurrency.
1263  const SlotIndex key_count = get_key_count();
1264  if (remainder <= sizeof(KeySlice)) {
1265  for (SlotIndex i = 0; i < key_count; ++i) {
1266  const KeySlice rec_slice = get_slice(i);
1267  if (rec_slice < slice) {
1268  continue;
1269  } else if (rec_slice > slice) {
1270  // as keys are sorted in snapshot page, "i" is the place the slice would be inserted.
1272  }
1273  ASSERT_ND(rec_slice == slice);
1274  const KeyLength klen = get_remainder_length(i);
1275  if (klen == remainder) {
1278  } else if (klen < remainder) {
1280  continue; // same as "rec_slice < slice" case
1281  } else {
1282  return FindKeyForReserveResult(i, kNotFound); // same as "rec_slice > slice" case
1283  }
1284  }
1285  } else {
1286  for (SlotIndex i = 0; i < key_count; ++i) {
1287  const KeySlice rec_slice = get_slice(i);
1288  if (rec_slice < slice) {
1289  continue;
1290  } else if (rec_slice > slice) {
1291  // as keys are sorted in snapshot page, "i" is the place the slice would be inserted.
1293  }
1294  ASSERT_ND(rec_slice == slice);
1295  const KeyLength klen = get_remainder_length(i);
1296 
1297  if (does_point_to_layer(i)) {
1299  }
1300 
1302 
1303  if (klen <= sizeof(KeySlice)) {
1304  continue; // same as "rec_slice < slice" case
1305  }
1306 
1307  // see the comment in MasstreeComposerContext::PathLevel.
1308  // Even if the record is larger than the current key, we consider it "matched" and cause
1309  // next layer creation, but keeping the larger record in a dummy original page in nexy layer.
1310  const char* record_suffix = get_record(i);
1311  if (klen == remainder &&
1312  std::memcmp(record_suffix, suffix, remainder - sizeof(KeySlice)) == 0) {
1314  } else {
1316  }
1317  }
1318  }
1319  return FindKeyForReserveResult(key_count, kNotFound);
1320 }
1321 
1323  SlotIndex index,
1324  xct::XctId initial_owner_id,
1325  KeySlice slice,
1326  const void* suffix,
1327  KeyLength remainder_length,
1328  PayloadLength payload_count) {
1329  ASSERT_ND(header().snapshot_ || initial_owner_id.is_deleted());
1330  ASSERT_ND(index < kBorderPageMaxSlots);
1331  ASSERT_ND(remainder_length <= kMaxKeyLength || remainder_length == kInitiallyNextLayer);
1332  ASSERT_ND(remainder_length != kInitiallyNextLayer || payload_count == sizeof(DualPagePointer));
1333  ASSERT_ND(header().snapshot_ || is_locked());
1334  ASSERT_ND(get_key_count() == index);
1335  ASSERT_ND(can_accomodate(index, remainder_length, payload_count));
1336  ASSERT_ND(next_offset_ % 8 == 0);
1337  const KeyLength suffix_length = calculate_suffix_length(remainder_length);
1338  const DataOffset record_size = to_record_length(remainder_length, payload_count);
1339  ASSERT_ND(record_size % 8 == 0);
1340  const DataOffset new_offset = next_offset_;
1341  set_slice(index, slice);
1342  // This is a new slot, so no worry on race.
1343  Slot* slot = get_new_slot(index);
1344  slot->lengthes_.components.offset_ = new_offset;
1345  slot->lengthes_.components.unused_ = 0;
1346  slot->lengthes_.components.physical_record_length_ = record_size;
1347  slot->lengthes_.components.payload_length_ = payload_count;
1348  slot->original_physical_record_length_ = record_size;
1349  slot->remainder_length_ = remainder_length;
1350  slot->original_offset_ = new_offset;
1351  next_offset_ += record_size;
1352  if (index == 0) {
1353  consecutive_inserts_ = true;
1354  } else if (consecutive_inserts_) {
1355  // do we have to turn off consecutive_inserts_ now?
1356  const KeySlice prev_slice = get_slice(index - 1);
1357  const KeyLength prev_klen = get_remainder_length(index - 1);
1358  if (prev_slice > slice || (prev_slice == slice && prev_klen > remainder_length)) {
1359  consecutive_inserts_ = false;
1360  }
1361  }
1362  slot->tid_.lock_.reset();
1363  slot->tid_.xct_id_ = initial_owner_id;
1364  if (suffix_length > 0) {
1365  char* record = get_record_from_offset(new_offset);
1366  std::memcpy(record, suffix, suffix_length);
1367  KeyLength suffix_length_aligned = assorted::align8(suffix_length);
1368  // zero-padding
1369  if (suffix_length_aligned > suffix_length) {
1370  std::memset(record + suffix_length, 0, suffix_length_aligned - suffix_length);
1371  }
1372  }
1373 }
1374 
1376  SlotIndex index,
1377  xct::XctId initial_owner_id,
1378  KeySlice slice,
1379  const DualPagePointer& pointer) {
1380  ASSERT_ND(index < kBorderPageMaxSlots);
1381  ASSERT_ND(header().snapshot_ || is_locked());
1382  ASSERT_ND(get_key_count() == index);
1383  ASSERT_ND(can_accomodate(index, sizeof(KeySlice), sizeof(DualPagePointer)));
1384  ASSERT_ND(next_offset_ % 8 == 0);
1385  ASSERT_ND(initial_owner_id.is_next_layer());
1386  ASSERT_ND(!initial_owner_id.is_deleted());
1387  const KeyLength remainder = kInitiallyNextLayer;
1388  const DataOffset record_size = to_record_length(remainder, sizeof(DualPagePointer));
1389  ASSERT_ND(record_size % 8 == 0);
1390  const DataOffset new_offset = next_offset_;
1391  set_slice(index, slice);
1392  // This is a new slot, so no worry on race.
1393  Slot* slot = get_new_slot(index);
1394  slot->lengthes_.components.offset_ = new_offset;
1395  slot->lengthes_.components.unused_ = 0;
1396  slot->lengthes_.components.physical_record_length_ = record_size;
1398  slot->original_physical_record_length_ = record_size;
1399  slot->remainder_length_ = remainder;
1400  slot->original_offset_ = new_offset;
1401  next_offset_ += record_size;
1402  if (index == 0) {
1403  consecutive_inserts_ = true;
1404  } else if (consecutive_inserts_) {
1405  // This record must be the only full-length slice, so the check is simpler
1406  const KeySlice prev_slice = get_slice(index - 1);
1407  if (prev_slice > slice) {
1408  consecutive_inserts_ = false;
1409  }
1410  }
1411  slot->tid_.lock_.reset();
1412  slot->tid_.xct_id_ = initial_owner_id;
1413  *get_next_layer_from_offsets(new_offset, remainder) = pointer;
1414 }
1415 
1416 
1418  xct::XctId initial_owner_id,
1419  KeySlice slice,
1420  SnapshotPagePointer pointer) {
1421  ASSERT_ND(header().snapshot_); // this is used only from snapshot composer
1422  const SlotIndex index = get_key_count();
1423  const KeyLength kRemainder = kInitiallyNextLayer;
1424  ASSERT_ND(can_accomodate(index, kRemainder, sizeof(DualPagePointer)));
1425  const DataOffset record_size = to_record_length(kRemainder, sizeof(DualPagePointer));
1426  const DataOffset offset = next_offset_;
1427  set_slice(index, slice);
1428  // This is in snapshot page, so no worry on race.
1429  Slot* slot = get_new_slot(index);
1430  slot->lengthes_.components.offset_ = offset;
1431  slot->lengthes_.components.unused_ = 0;
1432  slot->lengthes_.components.physical_record_length_ = record_size;
1434  slot->original_physical_record_length_ = record_size;
1435  slot->remainder_length_ = kRemainder;
1436  slot->original_offset_ = offset;
1437  next_offset_ += record_size;
1438 
1439  slot->tid_.xct_id_ = initial_owner_id;
1440  slot->tid_.xct_id_.set_next_layer();
1441  DualPagePointer* dual_pointer = get_next_layer_from_offsets(offset, kRemainder);
1442  dual_pointer->volatile_pointer_.clear();
1443  dual_pointer->snapshot_pointer_ = pointer;
1444  set_key_count(index + 1U);
1445 }
1446 
1448  ASSERT_ND(header().snapshot_); // this is used only from snapshot composer
1449  ASSERT_ND(get_key_count() > 0);
1450  const SlotIndex index = get_key_count() - 1U;
1451 
1452  ASSERT_ND(!does_point_to_layer(index));
1453  ASSERT_ND(!get_owner_id(index)->xct_id_.is_next_layer());
1454 
1455  // Overwriting an existing slot with kInitiallyNextLayer is not generally safe,
1456  // but this is in snapshot page, so no worry on race.
1457  const KeyLength kRemainder = kInitiallyNextLayer;
1458  const DataOffset new_record_size = sizeof(DualPagePointer);
1459  ASSERT_ND(new_record_size == to_record_length(kRemainder, sizeof(DualPagePointer)));
1460 
1461  // This is in snapshot page, so no worry on race.
1462  Slot* slot = get_slot(index);
1463 
1464  // This is the last record in this page, right?
1465  ASSERT_ND(next_offset_
1467 
1468  // Here, we assume the snapshot border page always leaves sizeof(DualPagePointer) whenever
1469  // it creates a record. Otherwise we might not have enough space!
1470  // For this reason, we use can_accomodate_snapshot() rather than can_accomodate().
1471  ASSERT_ND(slot->lengthes_.components.offset_ + new_record_size + sizeof(Slot) * get_key_count()
1472  <= sizeof(data_));
1473  slot->lengthes_.components.physical_record_length_ = new_record_size;
1475  slot->original_physical_record_length_ = new_record_size;
1476  slot->remainder_length_ = kRemainder;
1477  next_offset_ = slot->lengthes_.components.offset_ + new_record_size;
1478 
1479  slot->tid_.xct_id_.set_next_layer();
1480  DualPagePointer* dual_pointer = get_next_layer(index);
1481  dual_pointer->volatile_pointer_.clear();
1482  dual_pointer->snapshot_pointer_ = pointer;
1483 
1485 }
1486 
1488  uint16_t keys = get_key_count();
1489  ASSERT_ND(keys <= kMaxIntermediateSeparators);
1490  const MiniPage& minipage = mini_pages_[keys];
1491  uint16_t keys_mini = minipage.key_count_;
1492  ASSERT_ND(keys_mini <= kMaxIntermediateMiniSeparators);
1493  return keys == kMaxIntermediateSeparators && keys_mini == kMaxIntermediateMiniSeparators;
1494 }
1495 
1497  KeySlice low_fence,
1498  SnapshotPagePointer pointer) {
1501  uint16_t index = get_key_count();
1502  MiniPage& mini = mini_pages_[index];
1503  uint16_t index_mini = mini.key_count_;
1504  ASSERT_ND(low_fence > get_low_fence());
1505  ASSERT_ND(is_high_fence_supremum() || low_fence < get_high_fence());
1506  ASSERT_ND(pointer == 0 || extract_snapshot_id_from_snapshot_pointer(pointer) != 0);
1507  if (index_mini < kMaxIntermediateMiniSeparators) {
1508  // p0 s0 p1 + "s p" -> p0 s0 p1 s1 p2
1509  ASSERT_ND(pointer == 0 // composer init_root uses this method to set null pointers..
1510  || mini.pointers_[index_mini].snapshot_pointer_ != pointer); // otherwise dup.
1511  ASSERT_ND(index_mini == 0 || low_fence > mini.separators_[index_mini - 1]);
1512  ASSERT_ND(index == 0 || low_fence > separators_[index - 1]);
1513  mini.separators_[index_mini] = low_fence;
1514  mini.pointers_[index_mini + 1].volatile_pointer_.clear();
1515  mini.pointers_[index_mini + 1].snapshot_pointer_ = pointer;
1516  ++mini.key_count_;
1517  } else {
1518  append_minipage_snapshot(low_fence, pointer);
1519  }
1520 }
1521 
1523  KeySlice low_fence,
1524  SnapshotPagePointer pointer) {
1527  uint16_t key_count = get_key_count();
1528  ASSERT_ND(key_count < kMaxIntermediateSeparators);
1529  ASSERT_ND(mini_pages_[key_count].key_count_ == kMaxIntermediateMiniSeparators);
1530  ASSERT_ND(low_fence > get_minipage(key_count).separators_[kMaxIntermediateMiniSeparators - 1]);
1531  separators_[key_count] = low_fence;
1532  MiniPage& new_minipage = mini_pages_[key_count + 1];
1533  new_minipage.key_count_ = 0;
1534  new_minipage.pointers_[0].volatile_pointer_.clear();
1535  new_minipage.pointers_[0].snapshot_pointer_ = pointer;
1536  set_key_count(key_count + 1);
1537 }
1538 
1540  uint8_t index,
1541  uint8_t index_mini,
1542  KeySlice* separator_low,
1543  KeySlice* separator_high) const {
1544  ASSERT_ND(!is_border());
1545  uint8_t key_count = get_key_count();
1546  ASSERT_ND(index <= key_count);
1547  const MasstreeIntermediatePage::MiniPage& minipage = get_minipage(index);
1548  ASSERT_ND(index_mini <= minipage.key_count_);
1549  if (index_mini == 0) {
1550  if (index == 0) {
1551  *separator_low = get_low_fence();
1552  } else {
1553  *separator_low = get_separator(index - 1U);
1554  }
1555  } else {
1556  *separator_low = minipage.separators_[index_mini - 1U];
1557  }
1558  if (index_mini == minipage.key_count_) {
1559  if (index == key_count) {
1560  *separator_high = get_high_fence();
1561  } else {
1562  *separator_high = get_separator(index);
1563  }
1564  } else {
1565  *separator_high = minipage.separators_[index_mini];
1566  }
1567 }
1568 
1569 
1571  SlotIndex new_index,
1572  KeyLength remainder_length,
1573  PayloadLength payload_count) const {
1574  if (new_index == 0) {
1575  ASSERT_ND(remainder_length <= kMaxKeyLength || remainder_length == kInitiallyNextLayer);
1576  ASSERT_ND(payload_count <= kMaxPayloadLength);
1577  return true;
1578  } else if (new_index >= kBorderPageMaxSlots) {
1579  return false;
1580  }
1581  const DataOffset required = required_data_space(remainder_length, payload_count);
1582  const DataOffset available = available_space();
1583  return required <= available;
1584 }
1586  KeyLength remainder_length,
1587  PayloadLength payload_count) const {
1589  SlotIndex new_index = get_key_count();
1590  if (new_index == 0) {
1591  return true;
1592  } else if (new_index >= kBorderPageMaxSlots) {
1593  return false;
1594  }
1595  PayloadLength adjusted_payload = std::max<PayloadLength>(payload_count, sizeof(DualPagePointer));
1596  const DataOffset required = required_data_space(remainder_length, adjusted_payload);
1597  const DataOffset available = available_space();
1598  return required <= available;
1599 }
1601  SlotIndex index,
1602  const void* be_key,
1603  KeyLength key_length) const {
1604  ASSERT_ND(index < kBorderPageMaxSlots);
1605  KeyLength remainder = key_length - get_layer() * sizeof(KeySlice);
1606  const KeyLength klen = get_remainder_length(index);
1607  if (remainder != klen) {
1608  return false;
1609  }
1610  KeySlice slice = slice_layer(be_key, key_length, get_layer());
1611  const KeySlice rec_slice = get_slice(index);
1612  if (slice != rec_slice) {
1613  return false;
1614  }
1615  if (remainder > sizeof(KeySlice)) {
1616  return std::memcmp(
1617  reinterpret_cast<const char*>(be_key) + (get_layer() + 1) * sizeof(KeySlice),
1618  get_record(index),
1619  remainder - sizeof(KeySlice)) == 0;
1620  } else {
1621  return true;
1622  }
1623 }
1625  SlotIndex index,
1626  const void* be_key,
1627  KeyLength key_length) const {
1628  return compare_key(index, be_key, key_length);
1629 }
1630 
1632  SlotIndex index,
1633  const char* be_key,
1634  KeyLength key_length) const {
1635  ASSERT_ND(key_length > get_layer() * kSliceLen);
1636  KeyLength remainder = key_length - get_layer() * kSliceLen;
1637  KeySlice slice = slice_layer(be_key, key_length, get_layer());
1638  const char* suffix = be_key + (get_layer() + 1) * kSliceLen;
1639  return ltgt_key(index, slice, suffix, remainder);
1640 }
1642  SlotIndex index,
1643  KeySlice slice,
1644  const char* suffix,
1645  KeyLength remainder) const {
1646  ASSERT_ND(index < kBorderPageMaxSlots);
1647  ASSERT_ND(remainder > 0);
1648  ASSERT_ND(remainder <= kMaxKeyLength);
1649  KeyLength rec_remainder = get_remainder_length(index);
1650  KeySlice rec_slice = get_slice(index);
1651  if (slice < rec_slice) {
1652  return -1;
1653  } else if (slice > rec_slice) {
1654  return 1;
1655  }
1656 
1657  if (remainder <= kSliceLen) {
1658  if (remainder == rec_remainder) {
1659  return 0;
1660  } else if (remainder < rec_remainder) {
1661  return -1;
1662  } else {
1663  return 1;
1664  }
1665  }
1666 
1667  ASSERT_ND(remainder > kSliceLen);
1668  if (rec_remainder <= kSliceLen) {
1669  return 1;
1670  }
1671  if (does_point_to_layer(index)) {
1672  return 0;
1673  }
1674  ASSERT_ND(rec_remainder <= kMaxKeyLength);
1675  KeyLength min_remainder = std::min(remainder, rec_remainder);
1676  const char* rec_suffix = get_record(index);
1677  int cmp = std::memcmp(suffix, rec_suffix, min_remainder);
1678  if (cmp != 0) {
1679  return cmp;
1680  }
1681 
1682  if (remainder == rec_remainder) {
1683  return 0;
1684  } else if (remainder < rec_remainder) {
1685  return -1;
1686  } else {
1687  return 1;
1688  }
1689 }
1691  SlotIndex index,
1692  const char* be_key,
1693  KeyLength key_length) const {
1694  ASSERT_ND(key_length > get_layer() * kSliceLen);
1695  KeyLength remainder = key_length - get_layer() * kSliceLen;
1696  KeySlice slice = slice_layer(be_key, key_length, get_layer());
1697  return will_conflict(index, slice, remainder);
1698 }
1700  SlotIndex index,
1701  KeySlice slice,
1702  KeyLength remainder) const {
1703  ASSERT_ND(index < kBorderPageMaxSlots);
1704  ASSERT_ND(remainder > 0);
1705  if (slice != get_slice(index)) {
1706  return false;
1707  }
1708  KeyLength rec_remainder = get_remainder_length(index);
1709  if (remainder > kSliceLen && rec_remainder > kSliceLen) {
1710  return true; // need to create next layer
1711  }
1712  return (remainder == rec_remainder); // if same, it's exactly the same key
1713 }
1715  SlotIndex index,
1716  const char* be_key,
1717  KeyLength key_length) const {
1718  ASSERT_ND(key_length > get_layer() * kSliceLen);
1719  KeyLength remainder = key_length - get_layer() * kSliceLen;
1720  KeySlice slice = slice_layer(be_key, key_length, get_layer());
1721  return will_contain_next_layer(index, slice, remainder);
1722 }
1724  SlotIndex index,
1725  KeySlice slice,
1726  KeyLength remainder) const {
1727  ASSERT_ND(index < kBorderPageMaxSlots);
1728  ASSERT_ND(remainder > 0);
1729  return slice == get_slice(index) && does_point_to_layer(index) && remainder > kSliceLen;
1730 }
1731 
1732 // We must place static asserts at the end, otherwise doxygen gets confused (most likely its bug)
1735 STATIC_SIZE_CHECK(sizeof(MasstreeIntermediatePage::MiniPage), 128 + 256)
1738 
1739 } // namespace masstree
1740 } // namespace storage
1741 } // namespace foedus
1742 #endif // FOEDUS_STORAGE_MASSTREE_MASSTREE_PAGE_IMPL_HPP_
void reserve_record_space(SlotIndex index, xct::XctId initial_owner_id, KeySlice slice, const void *suffix, KeyLength remainder_length, PayloadLength payload_count)
Installs a new physical record that doesn't exist logically (delete bit on).
KeyLength get_remainder_length(SlotIndex index) const __attribute__((always_inline))
void set_slice(SlotIndex index, KeySlice slice) __attribute__((always_inline))
DataOffset physical_record_length_
Byte count this record occupies.
const KeySlice kInfimumSlice
void prefetch_additional_if_needed(SlotIndex key_count) const __attribute__((always_inline))
void write_lengthes_oneshot(SlotLengthPart new_value) __attribute__((always_inline))
Writes lengthes_ in one-shot.
KeySlice high_fence_
Inclusive high fence of this page.
SlotIndex get_key_count() const __attribute__((always_inline))
physical key count (those keys might be deleted) in this page.
PayloadLength get_max_payload_peek() const __attribute__((always_inline))
This might be affected by concurrent threads.
DataOffset get_offset_in_bytes(SlotIndex index) const __attribute__((always_inline))
KeySlice get_slice(SlotIndex index) const __attribute__((always_inline))
T align8(T value)
8-alignment.
void release_pages_recursive_common(const memory::GlobalVolatilePageResolver &page_resolver, memory::PageReleaseBatch *batch)
const uint32_t kCommonPageHeaderSize
Size of the base page class (MasstreePage), which is the common header for intermediate and border pa...
void release_pages_recursive(const memory::GlobalVolatilePageResolver &page_resolver, memory::PageReleaseBatch *batch)
const uint64_t kSliceLen
Shorthand for sizeof(KeySlice).
KeyLength calculate_suffix_length_aligned(KeyLength remainder_length) __attribute__((always_inline))
SlotLengthUnion lengthes_
Stores mutable length information of the record.
Represents a pointer to another page (usually a child page).
Definition: storage_id.hpp:271
void prefetch() const __attribute__((always_inline))
prefetch upto 256th bytes.
bool is_empty_range() const __attribute__((always_inline))
An empty-range page, either intermediate or border, never has any entries.
uint16_t PayloadLength
Represents a byte-length of a payload in this package.
Definition: masstree_id.hpp:92
friend std::ostream & operator<<(std::ostream &o, const MasstreeBorderPage &v)
defined in masstree_page_debug.cpp.
Definitions of IDs in this package and a few related constant values.
PageVersion & get_version() __attribute__((always_inline))
uint16_t SlotIndex
Index of a record in a (border) page.
const char * get_record_payload_from_offsets(DataOffset record_offset, KeyLength remainder_length) const __attribute__((always_inline))
void initialize_snapshot_page(StorageId storage_id, SnapshotPagePointer page_id, uint8_t layer, KeySlice low_fence, KeySlice high_fence)
void initialize_as_layer_root_physical(VolatilePagePointer page_id, MasstreeBorderPage *parent, SlotIndex parent_index)
A physical-only method to initialize this page as a volatile page of a layer-root pointed from the gi...
PayloadLength get_max_payload_length(SlotIndex index) const __attribute__((always_inline))
const KeyLength kMaxKeyLength
Max length of a key.
Definition: masstree_id.hpp:75
const MiniPage & get_minipage(uint8_t index) const __attribute__((always_inline))
const xct::RwLockableXctId * get_owner_id(SlotIndex index) const __attribute__((always_inline))
MasstreePage & operator=(const MasstreePage &other)=delete
bool is_locked() const __attribute__((always_inline))
bool is_retired() const __attribute__((always_inline))
Definition: page.hpp:140
uint32_t StorageId
Unique ID for storage.
Definition: storage_id.hpp:55
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
Represents a record of write-access during a transaction.
Definition: xct_access.hpp:168
bool is_moved() const __attribute__((always_inline))
bool is_border() const __attribute__((always_inline))
void increment_key_count() __attribute__((always_inline))
Definition: page.hpp:318
SnapshotPagePointer get_snapshot_page_id() const
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
Definition: memory_id.hpp:44
KeySlice low_fence_
Inclusive low fence of this page.
void prefetch_general() const __attribute__((always_inline))
prefetch upto keys/separators, whether this page is border or interior.
uint8_t masstree_layer_
used only in masstree.
Definition: page.hpp:226
bool compare_key(SlotIndex index, const void *be_key, KeyLength key_length) const __attribute__((always_inline))
actually this method should be renamed to equal_key...
Slot * get_new_slot(SlotIndex index) __attribute__((always_inline))
uint16_t DataOffset
Byte-offset in a page.
bool is_foster_minor_null() const __attribute__((always_inline))
PageType
The following 1-byte value is stored in the common page header.
Definition: page.hpp:46
Forward declarations of classes in transaction package.
bool is_foster_major_null() const __attribute__((always_inline))
Result of track_moved_record().
Definition: xct_id.hpp:1180
KeySlice foster_fence_
Inclusive low_fence of foster child.
Represents a pointer to a volatile page with modification count for preventing ABA.
Definition: storage_id.hpp:194
MiniPage & get_minipage(uint8_t index) __attribute__((always_inline))
Just a synonym of XctId to be used as a page lock mechanism.
Definition: page.hpp:129
Forward declarations of classes in root package.
Persistent status part of Transaction ID.
Definition: xct_id.hpp:955
SlotLengthPart read_lengthes_oneshot() const __attribute__((always_inline))
Reads lengthes_ of this slot in one-shot.
void prefetch() const
prefetch upto separators.
Represents one border page in Masstree Storage.
bool is_consecutive_inserts() const
Whether this page is receiving only sequential inserts.
DualPagePointer * get_next_layer(SlotIndex index) __attribute__((always_inline))
void set_foster_major_offset_unsafe(memory::PagePoolOffset offset) __attribute__((always_inline))
As the name suggests, this should be used only by composer.
const DataOffset kBorderPageDataPartOffset
Offset of data_ member in MasstreeBorderPage.
XctId xct_id_
the second 64bit: Persistent status part of TID.
Definition: xct_id.hpp:1137
void set_moved() __attribute__((always_inline))
HighFence(KeySlice slice, bool supremum)
void prefetch_cachelines(const void *address, int cacheline_count)
Prefetch multiple contiguous cachelines to L1 cache.
Definition: cacheline.hpp:66
uint64_t KeySlice
Each key slice is an 8-byte integer.
bool is_layer_root() const __attribute__((always_inline))
friend std::ostream & operator<<(std::ostream &o, const MasstreeIntermediatePage &v)
defined in masstree_page_debug.cpp.
Definitions of IDs in this package and a few related constant values.
const uint16_t kMaxIntermediateMiniSeparators
Max number of separators stored in the second level of intermediate pages.
VolatilePagePointer foster_twin_[2]
Points to foster children, or tentative child pages.
const char * get_record(SlotIndex index) const __attribute__((always_inline))
uint16_t KeyLength
Represents a byte-length of a key in this package.
Definition: masstree_id.hpp:69
Common base of MasstreeIntermediatePage and MasstreeBorderPage.
#define LIKELY(x)
Hints that x is highly likely true.
Definition: compiler.hpp:103
char * get_record(SlotIndex index) __attribute__((always_inline))
PayloadLength get_max_payload_stable(SlotLengthPart stable_length) const __attribute__((always_inline))
This is not affected by concurrent threads.
bool can_accomodate(SlotIndex new_index, KeyLength remainder_length, PayloadLength payload_count) const __attribute__((always_inline))
VolatilePagePointer get_foster_minor() const __attribute__((always_inline))
PayloadLength get_payload_length(SlotIndex index) const __attribute__((always_inline))
const DualPagePointer * get_next_layer_from_offsets(DataOffset record_offset, KeyLength remainder_length) const __attribute__((always_inline))
bool is_retired() const __attribute__((always_inline))
VolatilePagePointer get_foster_major() const __attribute__((always_inline))
MasstreeIntermediatePointerIterator(const MasstreeIntermediatePage *page)
McsRwLock lock_
the first 64bit: Locking part of TID
Definition: xct_id.hpp:1134
void set_offset_unsafe(memory::PagePoolOffset offset)
This is used only in special places (snapshot composer).
Definition: storage_id.hpp:207
KeySlice get_separator(uint8_t index) const __attribute__((always_inline))
bool does_point_to_layer() const __attribute__((always_inline))
void initialize_volatile_common(StorageId storage_id, VolatilePagePointer page_id, PageType page_type, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
KeyLength get_suffix_length() const __attribute__((always_inline))
const uint16_t kCachelineSize
Byte count of one cache line.
Definition: cacheline.hpp:42
The MCS reader-writer lock variant of LockableXctId.
Definition: xct_id.hpp:1132
void append_next_layer_snapshot(xct::XctId initial_owner_id, KeySlice slice, SnapshotPagePointer pointer)
Installs a next layer pointer.
KeyLength get_suffix_length(SlotIndex index) const __attribute__((always_inline))
DualPagePointer pointers_[kMaxIntermediateMiniSeparators+1]
FindKeyForReserveResult find_key_for_reserve(SlotIndex from_index, SlotIndex to_index, KeySlice slice, const void *suffix, KeyLength remainder) const __attribute__((always_inline))
This is for the case we are looking for either the matching slot or the slot we will modify...
void append_pointer_snapshot(KeySlice low_fence, SnapshotPagePointer pointer)
Appends a new poiner and separator in an existing mini page, used only by snapshot composer...
DataOffset available_space() const
Returns usable data space in bytes.
VolatilePagePointer volatile_pointer_
Definition: storage_id.hpp:308
bool is_moved() const __attribute__((always_inline))
Definition: page.hpp:139
MasstreeBorderPage & operator=(const MasstreeBorderPage &other)=delete
void set_high_fence_unsafe(KeySlice high_fence) __attribute__((always_inline))
As the name suggests, this should be used only by composer.
PageVersion * get_version_address() __attribute__((always_inline))
void release_pages_recursive_parallel(Engine *engine)
This method is used when we release a large number of volatile pages, most likely when we drop a stor...
uint8_t masstree_in_layer_level_
used only in masstree.
Definition: page.hpp:243
void assert_entries_impl() const
defined in masstree_page_debug.cpp.
void set_retired() __attribute__((always_inline))
Definition: page.hpp:150
uint16_t key_count_
physical key count (those keys might be deleted) in this page.
Definition: page.hpp:219
SlotIndex find_key_normalized(SlotIndex from_index, SlotIndex to_index, KeySlice slice) const __attribute__((always_inline))
Specialized version for 8 byte native integer search.
DataOffset original_offset_
The value of offset_ as of the creation of this record.
const DualPagePointer * get_next_layer(SlotIndex index) const __attribute__((always_inline))
Definitions of IDs in this package and a few related constant values.
const uint16_t kMaxIntermediateSeparators
Max number of separators stored in the first level of intermediate pages.
An exclusive-only (WW) MCS lock data structure.
Definition: xct_id.hpp:324
xct::McsWwLock lock_
Definition: page.hpp:171
A helper class to return a bunch of pages to individual nodes.
Definition: page_pool.hpp:276
static DataOffset required_data_space(KeyLength remainder_length, PayloadLength payload_length)
returns the byte size of required contiguous space in data_ to insert a new record of the given remai...
void install_foster_twin(VolatilePagePointer minor, VolatilePagePointer major, KeySlice foster_fence)
uint64_t SnapshotPagePointer
Page ID of a snapshot page.
Definition: storage_id.hpp:79
Constants and methods related to CPU cacheline and its prefetching.
DataOffset original_physical_record_length_
The value of physical_record_length_ as of the creation of this record.
const KeyLength kInitiallyNextLayer
A special key length value that denotes the record in a border page was initially a next-layer pointe...
Definition: masstree_id.hpp:86
PageType get_page_type() const
Definition: page.hpp:280
Database engine object that holds all resources and provides APIs.
Definition: engine.hpp:109
Slot * get_slot(SlotIndex index) __attribute__((always_inline))
A system transaction to split a border page in Master-Tree.
uint8_t get_btree_level() const __attribute__((always_inline))
used only in masstree.
Fix-sized slot for each record, which is placed at the end of data region.
void initialize_snapshot_common(StorageId storage_id, SnapshotPagePointer page_id, PageType page_type, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
const Slot * get_slot(SlotIndex index) const __attribute__((always_inline))
SnapshotPagePointer snapshot_pointer_
Definition: storage_id.hpp:307
void increment_key_count() __attribute__((always_inline))
const SlotIndex kBorderPageMaxSlots
Maximum number of slots in one MasstreeBorderPage.
Just a marker to denote that a memory region represents a data page.
Definition: page.hpp:184
A system transaction to split an intermediate page in Master-Tree.
bool is_next_layer() const __attribute__((always_inline))
Definition: xct_id.hpp:1042
const uint32_t kBorderPageDataPartSize
Byte size of the record data part (data_) in MasstreeBorderPage.
uint16_t extract_snapshot_id_from_snapshot_pointer(SnapshotPagePointer pointer)
Definition: storage_id.hpp:98
const uint32_t kBorderPageAdditionalHeaderSize
Misc header attributes specific to MasstreeBorderPage placed after the common header.
A piece of Slot object that must be read/written in one-shot, meaning no one reads half-written value...
PageVersion page_version_
Used in several storage types as concurrency control mechanism for the page.
Definition: page.hpp:272
uint8_t get_layer() const __attribute__((always_inline))
Layer-0 stores the first 8 byte slice, Layer-1 next 8 byte...
xct::McsWwLock * get_lock_address() __attribute__((always_inline))
bool is_deleted() const __attribute__((always_inline))
Definition: xct_id.hpp:1040
void set_retired() __attribute__((always_inline))
KeySlice get_high_fence() const __attribute__((always_inline))
int ltgt_key(SlotIndex index, const char *be_key, KeyLength key_length) const __attribute__((always_inline))
compare the key.
void set_foster_twin(VolatilePagePointer minor, VolatilePagePointer major)
void set_separator(uint8_t minipage_index, KeySlice new_separator)
Place a new separator for a new minipage.
void assert_entries() __attribute__((always_inline))
bool within_foster_minor(KeySlice slice) const __attribute__((always_inline))
DualPagePointer * get_next_layer_from_offsets(DataOffset record_offset, KeyLength remainder_length) __attribute__((always_inline))
KeySlice separators_[kMaxIntermediateMiniSeparators]
Same semantics as separators_ in enclosing class.
FindKeyForReserveResult find_key_for_snapshot(KeySlice slice, const void *suffix, KeyLength remainder) const __attribute__((always_inline))
This one is used for snapshot pages.
Used only for debugging as this is not space efficient.
void initialize_layer_root(const MasstreeBorderPage *copy_from, SlotIndex copy_index)
Copy the initial record that will be the only record for a new root page.
SlotIndex to_slot_index(const Slot *slot) const __attribute__((always_inline))
const PayloadLength kMaxPayloadLength
Max length of a payload.
Definition: masstree_id.hpp:98
Forward declarations of classes in memory package.
bool has_foster_child() const __attribute__((always_inline))
KeySlice get_low_fence() const __attribute__((always_inline))
void initialize_volatile_page(StorageId storage_id, VolatilePagePointer page_id, uint8_t layer, KeySlice low_fence, KeySlice high_fence)
void set_moved() __attribute__((always_inline))
Definition: page.hpp:146
MasstreeIntermediatePage & operator=(const MasstreeIntermediatePage &other)=delete
SlotIndex find_key(KeySlice slice, const void *suffix, KeyLength remainder) const __attribute__((always_inline))
Navigates a searching key-slice to one of the record in this page.
bool can_accomodate_snapshot(KeyLength remainder_length, PayloadLength payload_count) const __attribute__((always_inline))
Slightly different from can_accomodate() as follows:
bool try_expand_record_in_page_physical(PayloadLength payload_count, SlotIndex record_index)
A physical-only method to expand a record within this page without any logical change.
KeySlice get_foster_fence() const __attribute__((always_inline))
void extract_separators_volatile(uint8_t index, uint8_t index_mini, KeySlice *separator_low, KeySlice *separator_high) const
Retrieves separators defining the index, used for volatile page, which requires appropriate locks or ...
bool is_high_fence_supremum() const __attribute__((always_inline))
xct::TrackMovedRecordResult track_moved_record(Engine *engine, xct::RwLockableXctId *old_address, xct::WriteXctAccess *write_set)
void append_minipage_snapshot(KeySlice low_fence, SnapshotPagePointer pointer)
Appends a new separator and the initial pointer in new mini page, used only by snapshot composer...
xct::TrackMovedRecordResult track_moved_record_next_layer(Engine *engine, xct::RwLockableXctId *old_address)
This one further tracks it to next layer.
bool will_conflict(SlotIndex index, const char *be_key, KeyLength key_length) const __attribute__((always_inline))
Returns whether inserting the key will cause creation of a new next layer.
const PageVersion * get_version_address() const __attribute__((always_inline))
KeyLength remainder_length_
Followings are immutable.
void initialize_volatile_page(StorageId storage_id, VolatilePagePointer page_id, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
VolatilePagePointer get_volatile_page_id() const
char * get_record_payload_from_offsets(DataOffset record_offset, KeyLength remainder_length) __attribute__((always_inline))
friend std::ostream & operator<<(std::ostream &o, const MasstreePage &v)
defined in masstree_page_debug.cpp.
char * get_record_payload(SlotIndex index) __attribute__((always_inline))
bool is_low_fence_infimum() const __attribute__((always_inline))
void release_pages_recursive(const memory::GlobalVolatilePageResolver &page_resolver, memory::PageReleaseBatch *batch)
const uint32_t kMaxIntermediatePointers
Max number of pointers (if completely filled) stored in an intermediate pages.
static DataOffset to_record_length(KeyLength remainder_length, PayloadLength payload_length)
returns minimal physical_record_length_ for the given remainder/payload length.
void extract_separators_common(uint8_t index, uint8_t index_mini, KeySlice *separator_low, KeySlice *separator_high) const
Retrieves separators defining the index, used only by snapshot composer (or when no race) ...
#define STATIC_SIZE_CHECK(desired, actual)
const char * get_record_from_offset(DataOffset record_offset) const __attribute__((always_inline))
KeyLength get_suffix_length_aligned(SlotIndex index) const __attribute__((always_inline))
Resolves an offset in a volatile page pool to an actual pointer and vice versa.
void set_key_count(uint8_t key_count) __attribute__((always_inline))
Definition: page.hpp:319
const KeySlice kSupremumSlice
const char * get_record_payload(SlotIndex index) const __attribute__((always_inline))
xct::RwLockableXctId * get_owner_id(SlotIndex index) __attribute__((always_inline))
Represents one intermediate page in Masstree Storage.
KeyLength calculate_suffix_length(KeyLength remainder_length) __attribute__((always_inline))
Forward declarations of classes in masstree storage package.
#define UNLIKELY(x)
Hints that x is highly likely false.
Definition: compiler.hpp:104
bool snapshot_
Whether this page image is of a snapshot page.
Definition: page.hpp:211
bool is_full_snapshot() const
Whether this page is full of poiters, used only by snapshot composer (or when no race) ...
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
bool within_foster_major(KeySlice slice) const __attribute__((always_inline))
bool is_locked() const __attribute__((always_inline))
Definition: page.hpp:138
KeySlice slice_layer(const void *be_bytes, KeyLength key_length, Layer current_layer)
Extract a part of a big-endian byte array of given length as KeySlice.
bool equal_key(SlotIndex index, const void *be_key, KeyLength key_length) const __attribute__((always_inline))
let's gradually migrate from compare_key() to this.
void reserve_initially_next_layer(SlotIndex index, xct::XctId initial_owner_id, KeySlice slice, const DualPagePointer &pointer)
For creating a record that is initially a next-layer.
Forward declarations of classes in thread package.
MiniPage & operator=(const MiniPage &other)=delete
xct::RwLockableXctId tid_
TID of the record.
bool will_contain_next_layer(SlotIndex index, const char *be_key, KeyLength key_length) const __attribute__((always_inline))
Returns whether the record is a next-layer pointer that would contain the key.
KeyLength get_aligned_suffix_length() const __attribute__((always_inline))
#define ALWAYS_INLINE
A function suffix to hint that the function should always be inlined.
Definition: compiler.hpp:106
bool is_next_layer() const __attribute__((always_inline))
Definition: xct_id.hpp:1143
bool within_fences(KeySlice slice) const __attribute__((always_inline))
void replace_next_layer_snapshot(SnapshotPagePointer pointer)
Same as above, except this is used to transform an existing record at end to a next layer pointer...
bool does_point_to_layer(SlotIndex index) const __attribute__((always_inline))
const uint16_t kPageSize
A constant defining the page size (in bytes) of both snapshot pages and volatile pages.
Definition: storage_id.hpp:45
void set_key_count(SlotIndex count) __attribute__((always_inline))
uint8_t find_minipage(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of the mini pages in this page.
void extract_separators_snapshot(uint8_t index, uint8_t index_mini, KeySlice *separator_low, KeySlice *separator_high) const
Retrieves separators defining the index, used only by snapshot composer, thus no race.
DataOffset offset_
Byte offset in data_ where this record starts.
uint8_t find_pointer(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of pointers in this mini-page.
void initialize_snapshot_page(StorageId storage_id, SnapshotPagePointer page_id, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
char * get_record_from_offset(DataOffset record_offset) __attribute__((always_inline))
Offset versions.
uint64_t page_id_
Page ID of this page.
Definition: page.hpp:191
void set_next_layer() __attribute__((always_inline))
Definition: xct_id.hpp:1030
const PageVersion & get_version() const __attribute__((always_inline))