libfoedus-core
FOEDUS Core Library
masstree_composer_impl.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
18 #ifndef FOEDUS_STORAGE_MASSTREE_MASSTREE_COMPOSER_IMPL_HPP_
19 #define FOEDUS_STORAGE_MASSTREE_MASSTREE_COMPOSER_IMPL_HPP_
20 
21 #include <stdint.h>
22 
23 #include <iosfwd>
24 #include <string>
25 
26 #include "foedus/fwd.hpp"
27 #include "foedus/memory/fwd.hpp"
28 #include "foedus/snapshot/fwd.hpp"
31 #include "foedus/storage/page.hpp"
36 
37 namespace foedus {
38 namespace storage {
39 namespace masstree {
69 class MasstreeComposer final {
70  public:
71  explicit MasstreeComposer(Composer *parent);
72  std::string to_string() const;
73 
78 
79  private:
80  Engine* const engine_;
81  const StorageId storage_id_;
82  const MasstreeStorage storage_;
83 
85  ErrorStack check_buddies(const Composer::ConstructRootArguments& args) const;
86 
87  MasstreePage* resolve_volatile(VolatilePagePointer pointer);
88 
89  Composer::DropResult drop_volatiles_recurse(
91  DualPagePointer* pointer);
92  Composer::DropResult drop_volatiles_intermediate(
95  Composer::DropResult drop_volatiles_border(
97  MasstreeBorderPage* page);
98  bool is_updated_pointer(
100  SnapshotPagePointer pointer) const;
101  bool is_to_keep_volatile(uint8_t layer, uint16_t btree_level) const;
102 
104  void drop_foster_twins(const Composer::DropVolatilesArguments& args, MasstreePage* page);
106  void drop_all_recurse(
108  DualPagePointer* pointer);
110  void drop_all_recurse_page_only(
112  MasstreePage* page);
113 };
114 
115 
162  public:
163  enum Constants {
169  kMaxLogGroupSize = 1 << 14,
175  };
176 
194  struct PathLevel {
200  uint16_t layer_;
212  uint32_t page_count_;
213  uint32_t filler_;
224 
225  bool has_next_original() const { return next_original_ != 0xFFFFU; }
227  next_original_ = 0xFFFFU;
228  next_original_mini_ = 0xFFFFU;
229  next_original_remainder_ = kInitiallyNextLayer;
230  next_original_slice_ = kSupremumSlice;
231  }
232  bool contains_slice(KeySlice slice) const {
233  ASSERT_ND(low_fence_ <= slice); // as logs are sorted, this must not happen
234  return high_fence_ == kSupremumSlice || slice < high_fence_; // careful on supremum case
235  }
236  bool contains_key(const char* key, KeyLength key_length) const {
237  ASSERT_ND(is_key_aligned_and_zero_padded(key, key_length));
238  KeySlice slice = normalize_be_bytes_full_aligned(key + layer_ * kSliceLen);
239  return contains_slice(slice);
240  }
241  bool needs_to_consume_original(KeySlice slice, KeyLength key_length) const {
242  return has_next_original()
243  && (
244  next_original_slice_ < slice
245  || (next_original_slice_ == slice
246  && next_original_remainder_ > kSliceLen
247  && key_length > (layer_ + 1U) * kSliceLen));
248  }
249 
250  friend std::ostream& operator<<(std::ostream& o, const PathLevel& v);
251  };
252 
256  };
257 
270  uint8_t btree_level_;
272  uint8_t removed_;
274  uint8_t layer_;
279 
286 
288  PageBoundaryInfo() = delete;
289  ~PageBoundaryInfo() = delete;
290 
291  uint32_t dynamic_sizeof() const ALWAYS_INLINE { return (layer_ + 2U) * sizeof(KeySlice) + 8U; }
292  static uint32_t calculate_hash(
293  uint8_t btree_level,
294  uint8_t layer,
295  const KeySlice* prefixes,
296  KeySlice low_fence,
297  KeySlice high_fence) ALWAYS_INLINE {
298  // good old multiply-hash
299  uint64_t hash = btree_level + (layer * 256U);
300  const uint64_t kMult = 0x7ada20dc734afb6fULL;
301  for (uint8_t i = 0; i < layer; ++i) {
302  hash = hash * kMult + prefixes[i];
303  }
304  hash = hash * kMult + low_fence;
305  hash = hash * kMult + high_fence;
306  uint32_t compressed = static_cast<uint32_t>(hash >> 32) ^ static_cast<uint32_t>(hash);
307  return compressed;
308  }
310  return static_cast<SnapshotLocalPageId>(local_snapshot_pointer_high_) << 32
311  | static_cast<SnapshotLocalPageId>(local_snapshot_pointer_low_);
312  }
313 
322  uint8_t btree_level,
323  uint8_t layer,
324  const KeySlice* prefixes,
325  KeySlice low,
326  KeySlice high) const ALWAYS_INLINE {
327  if (layer != layer_ || btree_level != btree_level_ || removed_) {
328  return false;
329  }
330  for (uint8_t i = 0; i < layer_; ++i) {
331  if (prefixes[i] != slices_[i]) {
332  return false;
333  }
334  }
335  return low == slices_[layer_] && high == slices_[layer_ + 1];
336  }
337  };
338 
345  uint32_t hash_;
349  bool operator<(const PageBoundarySort& rhs) const ALWAYS_INLINE {
350  return hash_ < rhs.hash_;
351  }
353  static bool static_less_than(
354  const PageBoundarySort& lhs,
355  const PageBoundarySort& rhs) ALWAYS_INLINE {
356  return lhs.hash_ < rhs.hash_;
357  }
358  };
359 
361  Engine* engine,
362  snapshot::MergeSort* merge_sort,
363  const Composer::ComposeArguments& args);
365 
366  private:
367  snapshot::SnapshotWriter* get_writer() const { return args_.snapshot_writer_; }
368  cache::SnapshotFileSet* get_files() const { return args_.previous_snapshot_files_; }
369  memory::PagePoolOffset allocate_page() {
370  ASSERT_ND(allocated_pages_ < max_pages_);
371  memory::PagePoolOffset new_offset = allocated_pages_;
372  ++allocated_pages_;
373  return new_offset;
374  }
375 
376  MasstreePage* get_page(memory::PagePoolOffset offset) const ALWAYS_INLINE;
377  MasstreePage* get_original(memory::PagePoolOffset offset) const ALWAYS_INLINE;
378  MasstreeIntermediatePage* as_intermdiate(MasstreePage* page) const ALWAYS_INLINE;
379  MasstreeBorderPage* as_border(MasstreePage* page) const ALWAYS_INLINE;
380  uint16_t get_cur_prefix_length() const { return get_last_layer() * sizeof(KeySlice); }
381  uint8_t get_last_level_index() const {
382  ASSERT_ND(cur_path_levels_ > 0);
383  return cur_path_levels_ - 1U;
384  }
385  PathLevel* get_last_level() { return cur_path_ + get_last_level_index(); }
386  const PathLevel* get_last_level() const { return cur_path_ + get_last_level_index(); }
387  uint8_t get_last_layer() const {
388  if (cur_path_levels_ == 0) {
389  return 0;
390  } else {
391  return get_last_level()->layer_;
392  }
393  }
394  void store_cur_prefix(uint8_t layer, KeySlice prefix_slice);
395 
396  PathLevel* get_second_last_level() {
397  ASSERT_ND(cur_path_levels_ > 1U);
398  return cur_path_ + (cur_path_levels_ - 2U);
399  }
400 
406  ErrorStack execute_a_log(uint32_t cur);
415  ErrorStack execute_same_key_group(uint32_t from, uint32_t to);
426  ErrorStack execute_insert_group(uint32_t from, uint32_t to);
432  uint32_t execute_insert_group_get_cur_run(
433  uint32_t cur,
434  uint32_t to,
435  KeySlice* min_slice,
436  KeySlice* max_slice);
438  ErrorCode execute_insert_group_append_loop(uint32_t from, uint32_t to, uint32_t hint_count);
446  ErrorStack execute_delete_group(uint32_t from, uint32_t to);
455  ErrorStack execute_update_group(uint32_t from, uint32_t to);
463  ErrorStack execute_overwrite_group(uint32_t from, uint32_t to);
464 
466  void write_dummy_page_zero();
467 
468  // init/uninit called only once or at most a few times
469  ErrorStack init_root();
470  ErrorStack open_first_level(const char* key, KeyLength key_length);
471  ErrorStack open_next_level(
472  const char* key,
473  KeyLength key_length,
474  MasstreePage* parent,
475  KeySlice prefix_slice,
476  SnapshotPagePointer* next_page_id);
477  void open_next_level_create_layer(
478  const char* key,
479  KeyLength key_length,
480  MasstreeBorderPage* parent,
481  KeySlice prefix_slice,
482  SlotIndex parent_index);
483 
501  ErrorStack flush_buffer();
502  inline ErrorStack flush_if_nearly_full() {
503  uint64_t threshold = max_pages_ * 8ULL / 10ULL;
504  if (UNLIKELY(allocated_pages_ >= threshold || allocated_pages_ + 256U >= max_pages_)) {
505  CHECK_ERROR(flush_buffer());
506  }
507  return kRetOk;
508  }
509 
510  ErrorStack adjust_path(const char* key, KeyLength key_length);
511 
512  ErrorStack consume_original_upto_border(KeySlice slice, KeyLength key_length, PathLevel* level);
513  ErrorStack consume_original_upto_intermediate(KeySlice slice, PathLevel* level);
514  ErrorStack consume_original_all();
519  ErrorStack close_level_grow_subtree(
520  SnapshotPagePointer* root_pointer,
521  KeySlice subtree_low,
522  KeySlice subtree_high);
528  ErrorStack pushup_non_root();
529 
530  // next methods called for each log entry. must be efficient! So these methods return ErrorCode.
531 
533  bool does_need_to_adjust_path(const char* key, KeyLength key_length) const ALWAYS_INLINE;
534 
540  ErrorStack close_path_layer(uint16_t max_layer);
545  ErrorStack close_last_level();
551  ErrorStack close_first_level();
552  ErrorStack close_all_levels();
553 
554  void append_border(
555  KeySlice slice,
556  xct::XctId xct_id,
557  KeyLength remainder_length,
558  const char* suffix,
559  PayloadLength payload_count,
560  const char* payload,
561  PathLevel* level);
562  void append_border_next_layer(
563  KeySlice slice,
564  xct::XctId xct_id,
565  SnapshotPagePointer pointer,
566  PathLevel* level);
567  void append_border_newpage(KeySlice slice, PathLevel* level);
568  void append_intermediate(
569  KeySlice low_fence,
570  SnapshotPagePointer pointer,
571  PathLevel* level);
572  void append_intermediate_newpage_and_pointer(
573  KeySlice low_fence,
574  SnapshotPagePointer pointer,
575  PathLevel* level);
576 
577 
579  void refresh_page_boundary_info_variables();
580  ErrorCode close_level_register_page_boundaries();
581  void remove_old_page_boundary_info(SnapshotPagePointer page_id, MasstreePage* page);
582  PageBoundaryInfo* get_page_boundary_info(snapshot::BufferPosition pos) ALWAYS_INLINE {
583  ASSERT_ND(pos <= page_boundary_info_cur_pos_);
584  return reinterpret_cast<PageBoundaryInfo*>(page_boundary_info_ + pos * 8ULL);
585  }
586  const PageBoundaryInfo* get_page_boundary_info(snapshot::BufferPosition pos) const ALWAYS_INLINE {
587  ASSERT_ND(pos <= page_boundary_info_cur_pos_);
588  return reinterpret_cast<PageBoundaryInfo*>(page_boundary_info_ + pos * 8ULL);
589  }
591  void assert_page_boundary_not_exists(
592  uint8_t btree_level,
593  uint8_t layer,
594  const KeySlice* prefixes,
595  KeySlice low,
596  KeySlice high) const ALWAYS_INLINE;
597  void sort_page_boundary_info();
598  SnapshotPagePointer lookup_page_boundary_info(
599  uint8_t btree_level,
600  uint8_t layer,
601  const KeySlice* prefixes,
602  KeySlice low,
603  KeySlice high) const ALWAYS_INLINE;
604  ErrorStack install_snapshot_pointers(uint64_t* installed_count) const;
605  ErrorCode install_snapshot_pointers_recurse(
606  const memory::GlobalVolatilePageResolver& resolver,
607  uint8_t layer,
608  KeySlice* prefixes,
609  MasstreePage* volatile_page,
610  uint64_t* installed_count) const ALWAYS_INLINE;
611  ErrorCode install_snapshot_pointers_recurse_intermediate(
612  const memory::GlobalVolatilePageResolver& resolver,
613  uint8_t layer,
614  KeySlice* prefixes,
615  MasstreeIntermediatePage* volatile_page,
616  uint64_t* installed_count) const;
617  ErrorCode install_snapshot_pointers_recurse_border(
618  const memory::GlobalVolatilePageResolver& resolver,
619  uint8_t layer,
620  KeySlice* prefixes,
621  MasstreeBorderPage* volatile_page,
622  uint64_t* installed_count) const;
623 
624  Engine* const engine_;
625  snapshot::MergeSort* const merge_sort_;
626  const StorageId id_;
627  MasstreeStorage storage_;
628  const Composer::ComposeArguments& args_;
629 
630  const snapshot::SnapshotId snapshot_id_;
631  const uint16_t numa_node_;
632  const uint32_t max_pages_;
633 
638  MasstreeIntermediatePage* const root_;
639 
641  Page* const page_base_;
643  Page* const original_base_;
644 
645  // const members up to here.
646 
651  SnapshotPagePointer page_id_base_;
652 
660  char* page_boundary_info_;
662  PageBoundarySort* page_boundary_sort_;
663 
665  snapshot::BufferPosition page_boundary_info_cur_pos_;
667  uint32_t page_boundary_elements_;
668 
670  snapshot::BufferPosition page_boundary_info_capacity_;
672  uint32_t max_page_boundary_elements_;
673 
679  uint32_t allocated_pages_;
680 
685  uint8_t root_index_;
690  uint8_t root_index_mini_;
691 
693  uint8_t cur_path_levels_;
695  PathLevel cur_path_[kMaxLevels];
697  char cur_prefix_be_[kMaxLayers * kSliceLen];
699  KeySlice cur_prefix_slices_[kMaxLayers];
701  KeySlice tmp_boundary_array_[kTmpBoundaryArraySize];
702 };
703 
704 } // namespace masstree
705 } // namespace storage
706 } // namespace foedus
707 #endif // FOEDUS_STORAGE_MASSTREE_MASSTREE_COMPOSER_IMPL_HPP_
friend std::ostream & operator<<(std::ostream &o, const PathLevel &v)
bool exact_match(uint8_t btree_level, uint8_t layer, const KeySlice *prefixes, KeySlice low, KeySlice high) const __attribute__((always_inline))
returns whether the entry exactly matches with the page boundaries we look for.
SnapshotLocalPageId get_local_page_id() const __attribute__((always_inline))
uint64_t SnapshotLocalPageId
Represents a local page ID in each one snapshot file in some NUMA node.
Definition: storage_id.hpp:89
const uint64_t kSliceLen
Shorthand for sizeof(KeySlice).
Represents a pointer to another page (usually a child page).
Definition: storage_id.hpp:271
uint16_t PayloadLength
Represents a byte-length of a payload in this package.
Definition: masstree_id.hpp:92
Definitions of IDs in this package and a few related constant values.
uint16_t SlotIndex
Index of a record in a (border) page.
MasstreeComposeContext(Engine *engine, snapshot::MergeSort *merge_sort, const Composer::ComposeArguments &args)
MasstreeComposeContext methods.
Typedefs of ID types used in snapshot package.
static uint32_t calculate_hash(uint8_t btree_level, uint8_t layer, const KeySlice *prefixes, KeySlice low_fence, KeySlice high_fence) __attribute__((always_inline))
Represents a logic to compose a new version of data pages for one storage.
Definition: composer.hpp:86
uint32_t StorageId
Unique ID for storage.
Definition: storage_id.hpp:55
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
Definition: memory_id.hpp:44
uint32_t BufferPosition
Represents a position in some buffer.
Definition: snapshot_id.hpp:72
void drop_root_volatile(const Composer::DropVolatilesArguments &args)
Represents a pointer to a volatile page with modification count for preventing ABA.
Definition: storage_id.hpp:194
Forward declarations of classes in root package.
bool contains_key(const char *key, KeyLength key_length) const
Represents one border page in Masstree Storage.
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81
Forward declarations of classes in snapshot manager package.
ErrorStack execute()
MasstreeComposeContext::execute() and subroutines for log groups.
cache::SnapshotFileSet * previous_snapshot_files_
To read existing snapshots.
Definition: composer.hpp:99
uint64_t KeySlice
Each key slice is an 8-byte integer.
Holds a set of read-only file objects for snapshot files.
uint32_t local_snapshot_pointer_low_
low 32-bits of SnapshotLocalPageId of the new page.
ErrorStack construct_root(const Composer::ConstructRootArguments &args)
uint16_t KeyLength
Represents a byte-length of a key in this package.
Definition: masstree_id.hpp:69
Maximum number of logs execute_xxx_group methods process in one shot.
Common base of MasstreeIntermediatePage and MasstreeBorderPage.
MasstreeComposer's compose() implementation separated from the class itself.
Composer::DropResult drop_volatiles(const Composer::DropVolatilesArguments &args)
drop_volatiles and related methods
MasstreeComposer(Composer *parent)
MasstreeComposer methods.
bool operator<(const PageBoundarySort &rhs) const __attribute__((always_inline))
used by std::sort
PageBoundaryInfo()=delete
This object must be reinterpreted.
static bool static_less_than(const PageBoundarySort &lhs, const PageBoundarySort &rhs) __attribute__((always_inline))
used by std::lower_bound
Receives an arbitrary number of sorted buffers and emits one fully sorted stream of logs...
Definition: merge_sort.hpp:77
uint64_t SnapshotPagePointer
Page ID of a snapshot page.
Definition: storage_id.hpp:79
const KeyLength kInitiallyNextLayer
A special key length value that denotes the record in a border page was initially a next-layer pointe...
Definition: masstree_id.hpp:86
Database engine object that holds all resources and provides APIs.
Definition: engine.hpp:109
uint8_t removed_
set to true when this page is closed/reopened so that only one entry matches exactly ...
Retrun value of drop_volatiles()
Definition: composer.hpp:171
Represents a minimal information to install a new snapshot page pointer.
uint16_t SnapshotId
Unique ID of Snapshot.
Definition: snapshot_id.hpp:43
Forward declarations of classes in memory package.
SlotIndex next_original_
If level is based on an existing snapshot page, the next entry (pointer or record) in the original pa...
bool needs_to_consume_original(KeySlice slice, KeyLength key_length) const
ErrorStack compose(const Composer::ComposeArguments &args)
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
snapshot::SnapshotWriter * snapshot_writer_
Writes out composed pages.
Definition: composer.hpp:97
snapshot::BufferPosition info_pos_
Points to PageBoundaryInfo in page_boundary_info_.
We assume B-trie depth is at most this (prefix is at most 8*this value)
const ErrorStack kRetOk
Normal return value for no-error case.
bool is_key_aligned_and_zero_padded(const char *key, KeyLength key_length)
Returns if the given key is 8-bytes aligned and also zero-padded to 8-bytes for easier slicing (which...
const KeySlice kSupremumSlice
Represents one intermediate page in Masstree Storage.
Forward declarations of classes in masstree storage package.
#define UNLIKELY(x)
Hints that x is highly likely false.
Definition: compiler.hpp:104
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
uint32_t page_count_
number of pages in this level including head/tail, without original page (so, initial=1).
Points to PageBoundaryInfo with a sorting information.
#define ALWAYS_INLINE
A function suffix to hint that the function should always be inlined.
Definition: compiler.hpp:106
ErrorCode
Enum of error codes defined in error_code.xmacro.
Definition: error_code.hpp:85
KeySlice normalize_be_bytes_full_aligned(const void *be_bytes)
Convert an aligned big-endian byte array of at least 8-bytes-length to KeySlice.
uint8_t local_snapshot_pointer_high_
high 8-bits of SnapshotLocalPageId of the new page.
Writes out one snapshot file for all data pages in one reducer.