20 #include <glog/logging.h>
72 : engine_(parent->get_engine()),
73 storage_id_(parent->get_storage_id()),
74 storage_(engine_, storage_id_) {
119 VLOG(0) <<
to_string() <<
" only 1 root info page, so we just use it as the new root.";
134 LOG(INFO) <<
"MasstreeStorage-" << storage_id_ <<
" has an unbalanced depth in sub-tree";
139 uint16_t updated_count = 0;
162 VLOG(0) <<
to_string() <<
" has " << updated_count <<
" new pointers from root.";
169 uint16_t dummy_count = 0;
177 uint32_t offset = 1U + dummy_count;
186 VLOG(0) <<
to_string() <<
" has added " << dummy_count <<
" dummy pointers in root.";
197 storage_.
get_control_block()->root_page_pointer_.snapshot_pointer_ = new_root_id;
211 for (uint16_t buddy = 1; buddy < buddy_count; ++buddy) {
227 for (
SlotIndex i = 0; i <= key_count; ++i) {
228 const MasstreeIntermediatePage::MiniPage& base_mini = base->
get_minipage(i);
229 SlotIndex mini_count = base_mini.key_count_;
231 for (uint16_t buddy = 1; buddy < buddy_count; ++buddy) {
232 const MasstreeIntermediatePage* page
233 =
reinterpret_cast<const MasstreeIntermediatePage*
>(args.
root_info_pages_[buddy]);
242 for (
SlotIndex j = 0; j <= mini_count; ++j) {
243 ASSERT_ND(base_mini.pointers_[j].volatile_pointer_.is_null());
244 uint16_t updated_by = buddy_count;
251 for (uint16_t buddy = 1; buddy < buddy_count; ++buddy) {
252 const MasstreeIntermediatePage* page
253 =
reinterpret_cast<const MasstreeIntermediatePage*
>(args.
root_info_pages_[buddy]);
254 const MasstreeIntermediatePage::MiniPage& mini = page->get_minipage(i);
255 ASSERT_ND(mini.pointers_[j].volatile_pointer_.is_null());
256 if (j < mini_count) {
257 if (mini.separators_[j] != base_mini.separators_[j]) {
263 if (buddy == 1U && updated_by == 0) {
264 old_pointer = pointer;
267 if (updated_by != buddy_count) {
271 }
else if (old_pointer != pointer) {
283 return std::string(
"MasstreeComposer-") + std::to_string(storage_id_);
296 merge_sort_(merge_sort),
297 id_(merge_sort->get_storage_id()),
298 storage_(engine, id_),
300 snapshot_id_(args.snapshot_writer_->get_snapshot_id()),
301 numa_node_(get_writer()->get_numa_node()),
302 max_pages_(get_writer()->get_page_size()),
304 page_base_(reinterpret_cast<
Page*>(get_writer()->get_page_base())),
305 original_base_(merge_sort->get_original_pages()) {
307 refresh_page_boundary_info_variables();
309 root_index_mini_ = 0;
310 allocated_pages_ = 0;
311 cur_path_levels_ = 0;
312 page_boundary_info_cur_pos_ = 0;
313 page_boundary_elements_ = 0;
314 std::memset(cur_path_, 0,
sizeof(cur_path_));
315 std::memset(cur_prefix_be_, 0,
sizeof(cur_prefix_be_));
316 std::memset(cur_prefix_slices_, 0,
sizeof(cur_prefix_slices_));
317 std::memset(tmp_boundary_array_, 0,
sizeof(tmp_boundary_array_));
320 void MasstreeComposeContext::refresh_page_boundary_info_variables() {
323 uint64_t size_in_bytes = size_in_pages *
sizeof(
Page);
328 max_page_boundary_elements_ = size_in_bytes / (16ULL * 8ULL);
329 page_boundary_info_capacity_ = max_page_boundary_elements_ * 15ULL;
331 page_boundary_sort_ =
reinterpret_cast<PageBoundarySort*
>(
332 page_boundary_info_ + page_boundary_info_capacity_ * 8ULL);
335 void MasstreeComposeContext::write_dummy_page_zero() {
337 Page* dummy_page = page_base_;
338 std::memset(dummy_page, 0xDA,
sizeof(
Page));
343 allocated_pages_ = 1;
350 return reinterpret_cast<MasstreePage*
>(page_base_ + offset);
352 inline MasstreeIntermediatePage* MasstreeComposeContext::as_intermdiate(MasstreePage* page)
const {
354 return reinterpret_cast<MasstreeIntermediatePage*
>(page);
356 inline MasstreeBorderPage* MasstreeComposeContext::as_border(MasstreePage* page)
const {
358 return reinterpret_cast<MasstreeBorderPage*
>(page);
363 return reinterpret_cast<MasstreePage*
>(original_base_ + offset);
372 write_dummy_page_zero();
376 char debug_prev[8192];
377 uint16_t debug_prev_length = 0;
380 uint64_t processed = 0;
390 for (uint64_t i = 0; i < count; ++i) {
393 const char* key = entry->
get_key();
396 if (key_length < debug_prev_length) {
397 ASSERT_ND(std::memcmp(debug_prev, key, key_length) <= 0);
399 ASSERT_ND(std::memcmp(debug_prev, key, debug_prev_length) <= 0);
401 std::memcpy(debug_prev, key, key_length);
402 debug_prev_length = key_length;
409 uint32_t group_count = 0;
410 while (cur < count) {
435 processed += group.
count_;
439 VLOG(1) << storage_ <<
" processed a batch of " << cur <<
" logs that has " << group_count
443 VLOG(0) << storage_ <<
" processed " << processed <<
" logs. now pages=" << allocated_pages_;
447 uint64_t installed_count = 0;
448 CHECK_ERROR(install_snapshot_pointers(&installed_count));
452 ErrorStack MasstreeComposeContext::execute_same_key_group(uint32_t from, uint32_t to) {
488 const bool starts_with_insert
493 uint32_t last_active_delete = to;
496 for (uint32_t i = static_cast<uint32_t>(to - 1);
497 i >= from && i < static_cast<uint32_t>(to);
502 last_active_delete = i;
507 uint32_t insert_count = 0;
508 uint32_t delete_count = 0;
509 for (uint32_t j = from; j < i; ++j) {
511 switch (log_type_j) {
513 ASSERT_ND((starts_with_insert && insert_count == delete_count)
514 || (!starts_with_insert && insert_count + 1U == delete_count));
518 ASSERT_ND((!starts_with_insert && insert_count == delete_count)
519 || (starts_with_insert && insert_count == delete_count + 1U));
525 ASSERT_ND((!starts_with_insert && insert_count == delete_count)
526 || (starts_with_insert && insert_count == delete_count + 1U));
532 ASSERT_ND((!starts_with_insert && insert_count == delete_count)
533 || (starts_with_insert && insert_count == delete_count + 1U));
542 uint32_t last_active_insert = to;
545 bool is_last_active_insert_merged =
false;
548 uint32_t last_active_update = to;
551 bool is_last_active_update_merged =
false;
553 uint32_t next_to_check = from;
554 if (last_active_delete != to) {
555 ASSERT_ND(last_active_delete >= from && last_active_delete < to);
556 if (last_active_delete + 1U == to) {
558 if (starts_with_insert) {
559 DVLOG(1) <<
"yay, same-key-group completely eliminated a group of "
560 << (to - from) <<
" logs";
563 DVLOG(1) <<
"yay, same-key-group compressed " << (to - from) <<
" logs into a delete";
570 uint32_t next = last_active_delete + 1U;
574 if (starts_with_insert) {
578 DVLOG(2) <<
"I and D cancelled each other. from=" << from
579 <<
" delete at=" << last_active_delete;
580 last_active_insert = next;
585 DVLOG(2) <<
"D+I merged into U. from=" << from <<
" delete at=" << last_active_delete;
586 last_active_update = next;
587 is_last_active_update_merged =
true;
589 next_to_check = next + 1U;
590 last_active_delete = to;
598 for (uint32_t i = next_to_check; i < to; ++i) {
604 if (last_active_insert != to) {
605 DVLOG(2) <<
"U replaced all preceding I and O, turning into a new I. i=" << i;
607 ASSERT_ND(!is_last_active_update_merged);
608 last_active_insert = i;
609 is_last_active_insert_merged =
true;
611 DVLOG(2) <<
"U nullified all preceding O and U. i=" << i;
612 last_active_update = i;
613 is_last_active_update_merged =
false;
618 ASSERT_ND(starts_with_insert || last_active_insert != to);
627 if (last_active_insert != to || last_active_update != to) {
631 if (last_active_insert != to) {
633 ASSERT_ND(!is_last_active_update_merged);
635 cur = last_active_insert;
636 if (is_last_active_insert_merged) {
642 ASSERT_ND(!is_last_active_insert_merged);
644 cur = last_active_update;
645 if (is_last_active_update_merged) {
653 if (type_after_merge != type_before_merge) {
654 DVLOG(2) <<
"Disguising log type for merge. cur=" << cur
655 <<
", type_before_merge=" << type_before_merge <<
", type_after_merge=" << type_after_merge;
677 PathLevel* last = get_last_level();
679 MasstreeBorderPage* page = as_border(get_page(last->tail_));
680 SlotIndex key_count = page->get_key_count();
683 ASSERT_ND(!page->does_point_to_layer(index));
684 char* record = page->get_record(index);
686 for (uint32_t i = cur; i < to; ++i) {
687 const MasstreeOverwriteLogType* casted =
690 ASSERT_ND(page->equal_key(index, casted->get_key(), casted->key_length_));
696 const MasstreeOverwriteLogType* next =
697 reinterpret_cast<const MasstreeOverwriteLogType*
>(
699 if ((next->payload_offset_ <= casted->payload_offset_)
700 && (next->payload_offset_ + next->payload_count_
701 >= casted->payload_offset_ + casted->payload_count_)) {
702 DVLOG(3) <<
"Skipped redundant overwrites";
707 casted->apply_record(
nullptr, id_, page->get_owner_id(index), record);
712 ErrorStack MasstreeComposeContext::execute_insert_group(uint32_t from, uint32_t to) {
724 uint32_t run_count = execute_insert_group_get_cur_run(cur, to, &min_slice, &max_slice);
725 if (run_count <= 1U) {
732 DVLOG(2) <<
"Great, " << run_count <<
" insert-logs to process in the current context.";
734 PathLevel* last = get_last_level();
736 MasstreeBorderPage* page = as_border(get_page(last->tail_));
744 ASSERT_ND(min_slice >= page->get_low_fence());
745 ASSERT_ND(max_slice <= page->get_high_fence());
746 uint32_t found_boundaries_count = 0;
747 MasstreeStorage::PeekBoundariesArguments args = {
754 &found_boundaries_count };
757 DVLOG(2) << found_boundaries_count <<
" boundary hints for " << run_count <<
" insert-logs.";
760 <<
" as hints for " << run_count <<
" insert-logs!!";
766 WRAP_ERROR_CODE(execute_insert_group_append_loop(cur, cur + run_count, found_boundaries_count));
774 ErrorCode MasstreeComposeContext::execute_insert_group_append_loop(
777 uint32_t hint_count) {
778 const uint16_t kFetch = 8;
779 const KeySlice* hints = tmp_boundary_array_;
780 const log::RecordLogType* logs[kFetch];
782 uint32_t next_hint = 0;
783 uint32_t dubious_hints = 0;
784 PathLevel* last = get_last_level();
786 MasstreeBorderPage* page = as_border(get_page(last->tail_));
787 uint8_t cur_layer = page->get_layer();
790 uint16_t fetch_count = std::min<uint32_t>(kFetch, to - cur);
791 uint16_t actual_count = merge_sort_->
fetch_logs(cur, fetch_count, logs);
794 for (uint32_t i = 0; i < actual_count; ++i) {
797 ASSERT_ND(next_hint >= hint_count || page->get_low_fence() < hints[next_hint]);
799 || page->get_key_count() == 0
800 || page->get_slice(page->get_key_count() - 1) < hints[next_hint]);
802 const MasstreeInsertLogType* entry
803 =
reinterpret_cast<const MasstreeInsertLogType*
>(
ASSUME_ALIGNED(logs[i], 8));
805 const char* key = entry->get_key();
806 KeyLength key_length = entry->key_length_;
810 for (uint8_t layer = 0; layer < cur_layer; ++layer) {
812 ASSERT_ND(prefix_slice == cur_prefix_slices_[layer]);
817 const char* suffix = key + (cur_layer + 1) *
sizeof(
KeySlice);
820 SlotIndex key_count = page->get_key_count();
821 ASSERT_ND(key_count == 0 || slice > page->get_slice(key_count - 1));
823 const char* payload = entry->get_payload();
824 xct::XctId xct_id = entry->header_.xct_id_;
836 const uint32_t kDubiousHintsThreshold = 2;
838 bool page_switch_hinted =
false;
840 if (
UNLIKELY(next_hint < hint_count && slice >= hints[next_hint])) {
841 DVLOG(3) <<
"the hint tells that we should close the page now.";
842 bool too_empty = key_count == 0 || page->available_space() <= kTooEmptySize;
844 DVLOG(1) <<
"however, the page is still quite empty!";
845 if (dubious_hints >= kDubiousHintsThreshold) {
846 DVLOG(1) <<
"um, let's ignore the hints.";
848 DVLOG(1) <<
"neverthelss, let's trust the hints for now.";
850 page_switch_hinted =
true;
851 hint_suggested_slice = hints[next_hint];
856 while (next_hint < hint_count && slice >= hints[next_hint]) {
862 ||
UNLIKELY(!page->can_accomodate_snapshot(remainder_length, payload_count))) {
867 MasstreeBorderPage* new_page =
reinterpret_cast<MasstreeBorderPage*
>(get_page(new_offset));
870 if (page_switch_hinted) {
871 ASSERT_ND(hint_suggested_slice > page->get_low_fence());
872 ASSERT_ND(hint_suggested_slice <= slice);
873 ASSERT_ND(hint_suggested_slice < page->get_high_fence());
874 middle = hint_suggested_slice;
879 KeySlice high_fence = page->get_high_fence();
880 new_page->initialize_snapshot_page(id_, new_page_id, cur_layer, middle, high_fence);
881 page->set_foster_major_offset_unsafe(new_offset);
882 page->set_high_fence_unsafe(middle);
883 last->tail_ = new_offset;
889 ASSERT_ND(page->can_accomodate_snapshot(remainder_length, payload_count));
890 page->reserve_record_space(key_count, xct_id, slice, suffix, remainder_length, payload_count);
891 page->increment_key_count();
901 uint32_t MasstreeComposeContext::execute_insert_group_get_cur_run(
910 if (cur_path_levels_ == 0) {
915 PathLevel* last = get_last_level();
916 MasstreePage* tail_page = get_page(last->tail_);
917 if (!tail_page->is_border()) {
918 LOG(WARNING) <<
"um, why this can happen? anyway, let's process it per log";
922 const MasstreeBorderPage* page = as_border(tail_page);
923 const KeySlice low_fence = page->get_low_fence();
924 const KeySlice high_fence = page->get_high_fence();
925 const SlotIndex existing_keys = page->get_key_count();
927 = existing_keys == 0 ?
kInfimumSlice : page->get_slice(existing_keys - 1);
928 ASSERT_ND(last->layer_ == page->get_layer());
929 const uint8_t prefix_count = last->layer_;
930 const snapshot::MergeSort::SortEntry* sort_entries = merge_sort_->
get_sort_entries();
932 KeySlice prev_slice = existing_slice;
933 if (prefix_count == 0) {
935 for (run_count = 0; cur + run_count < to; ++run_count) {
936 KeySlice slice = sort_entries[cur + run_count].get_key();
938 if (slice >= high_fence) {
941 if (slice == prev_slice) {
946 if (last->has_next_original() && slice >= last->next_original_slice_) {
953 if (run_count == 0) {
962 for (run_count = 0; cur + run_count < to; ++run_count) {
964 KeySlice first_slice = sort_entries[cur + run_count].get_key();
965 ASSERT_ND(first_slice >= cur_prefix_slices_[0]);
966 if (first_slice != cur_prefix_slices_[0]) {
971 const MasstreeCommonLogType* entry =
972 reinterpret_cast<const MasstreeCommonLogType*
>(
974 const char* key = entry->get_key();
975 KeyLength key_length = entry->key_length_;
977 if (key_length <= prefix_count *
sizeof(
KeySlice)) {
982 for (uint8_t layer = 1; layer < prefix_count; ++layer) {
984 ASSERT_ND(prefix_slice >= cur_prefix_slices_[layer]);
985 if (prefix_slice != cur_prefix_slices_[layer]) {
993 if (slice >= high_fence
994 || slice == prev_slice
995 || (last->has_next_original() && slice >= last->next_original_slice_)) {
1000 if (run_count == 0) {
1012 ErrorStack MasstreeComposeContext::execute_delete_group(uint32_t from, uint32_t to) {
1014 for (uint32_t i = from; i < to; ++i) {
1020 ErrorStack MasstreeComposeContext::execute_update_group(uint32_t from, uint32_t to) {
1022 for (uint32_t i = from; i < to; ++i) {
1028 ErrorStack MasstreeComposeContext::execute_overwrite_group(uint32_t from, uint32_t to) {
1030 for (uint32_t i = from; i < to; ++i) {
1036 inline ErrorStack MasstreeComposeContext::execute_a_log(uint32_t cur) {
1037 ASSERT_ND(cur < merge_sort_->get_current_count());
1039 const MasstreeCommonLogType* entry =
1041 const char* key = entry->get_key();
1042 KeyLength key_length = entry->key_length_;
1044 ASSERT_ND(cur_path_levels_ == 0 || get_page(get_last_level()->tail_)->is_border());
1047 if (
UNLIKELY(does_need_to_adjust_path(key, key_length))) {
1051 PathLevel* last = get_last_level();
1057 if (last->needs_to_consume_original(slice, key_length)) {
1058 CHECK_ERROR(consume_original_upto_border(slice, key_length, last));
1059 ASSERT_ND(!last->needs_to_consume_original(slice, key_length));
1062 MasstreeBorderPage* page = as_border(get_page(last->tail_));
1063 SlotIndex key_count = page->get_key_count();
1067 while (key_count > 0
1068 && key_length > (last->layer_ + 1U) *
kSliceLen
1069 && page->get_slice(key_count - 1) == slice
1070 && page->get_remainder_length(key_count - 1) >
kSliceLen) {
1072 if (page->does_point_to_layer(key_count - 1)) {
1074 ASSERT_ND(page->get_next_layer(key_count - 1)->volatile_pointer_.is_null());
1075 SnapshotPagePointer* next_pointer = &page->get_next_layer(key_count - 1)->snapshot_pointer_;
1076 CHECK_ERROR(open_next_level(key, key_length, page, slice, next_pointer));
1079 ASSERT_ND(page->will_conflict(key_count - 1, slice, key_length - last->layer_ *
kSliceLen));
1080 open_next_level_create_layer(key, key_length, page, slice, key_count - 1);
1081 ASSERT_ND(page->does_point_to_layer(key_count - 1));
1085 last = get_last_level();
1089 page = as_border(get_page(last->tail_));
1090 key_count = page->get_key_count();
1097 ASSERT_ND(!page->does_point_to_layer(index));
1098 ASSERT_ND(page->equal_key(index, key, key_length));
1099 char* record = page->get_record(index);
1100 const MasstreeOverwriteLogType* casted
1101 =
reinterpret_cast<const MasstreeOverwriteLogType*
>(entry);
1102 casted->apply_record(
nullptr, id_, page->get_owner_id(index), record);
1114 ASSERT_ND(!page->does_point_to_layer(index));
1115 ASSERT_ND(page->equal_key(index, key, key_length));
1116 page->set_key_count(index);
1123 ASSERT_ND(key_count == 0 || !page->equal_key(key_count - 1, key, key_length));
1127 entry->header_.xct_id_,
1130 entry->payload_count_,
1131 entry->get_payload(),
1146 ErrorStack MasstreeComposeContext::init_root() {
1149 if (snapshot_page_id != 0) {
1151 ASSERT_ND(page_boundary_info_cur_pos_ == 0 && page_boundary_elements_ == 0);
1159 MasstreePartitionerData* data
1160 =
reinterpret_cast<MasstreePartitionerData*
>(metadata->locate_data(engine_));
1167 for (uint16_t i = 1; i < data->partition_count_; ++i) {
1175 ErrorStack MasstreeComposeContext::open_first_level(
const char* key,
KeyLength key_length) {
1179 PathLevel* first = cur_path_;
1183 MasstreeIntermediatePage::MiniPage& minipage = root_->
get_minipage(index);
1187 root_index_ = index;
1188 root_index_mini_ = index_mini;
1194 if (old_pointer != 0) {
1203 first->head_ = allocate_page();
1204 first->page_count_ = 1;
1205 first->tail_ = first->head_;
1206 first->set_no_more_next_original();
1207 first->low_fence_ = low_fence;
1208 first->high_fence_ = high_fence;
1209 MasstreePage* page = get_page(first->head_);
1211 *pointer_address = page_id;
1212 MasstreeBorderPage* casted =
reinterpret_cast<MasstreeBorderPage*
>(page);
1213 casted->initialize_snapshot_page(id_, page_id, 0, low_fence, high_fence);
1214 page->header().page_id_ = page_id;
1220 ASSERT_ND(first->low_fence_ == low_fence);
1221 ASSERT_ND(first->high_fence_ == high_fence);
1222 ASSERT_ND(minipage.pointers_[index_mini].snapshot_pointer_ != old_pointer);
1226 ErrorStack MasstreeComposeContext::open_next_level(
1229 MasstreePage* parent,
1237 uint8_t this_level = cur_path_levels_;
1238 PathLevel* level = cur_path_ + this_level;
1241 if (parent->is_border()) {
1242 store_cur_prefix(parent->get_layer(), prefix_slice);
1243 level->layer_ = parent->get_layer() + 1;
1245 level->layer_ = parent->get_layer();
1247 level->head_ = allocate_page();
1248 level->page_count_ = 1;
1249 level->tail_ = level->head_;
1250 level->set_no_more_next_original();
1253 *next_page_id = page_id;
1257 MasstreePage* target = get_page(level->head_);
1258 MasstreePage* original = get_original(this_level);
1260 level->low_fence_ = original->get_low_fence();
1261 level->high_fence_ = original->get_high_fence();
1265 if (
UNLIKELY(old_snapshot_id == snapshot_id_)) {
1268 remove_old_page_boundary_info(old_page_id, original);
1274 std::memcpy(target, original,
kPageSize);
1275 target->header().page_id_ = page_id;
1277 if (original->is_border()) {
1279 const MasstreeBorderPage* casted = as_border(original);
1280 SlotIndex key_count = original->get_key_count();
1281 auto result = casted->find_key_for_snapshot(slice, key + skip, key_length - skip);
1284 bool go_down =
false;
1285 bool create_layer =
false;
1286 switch (result.match_type_) {
1288 copy_count = result.index_;
1292 copy_count = result.index_ + 1U;
1296 create_layer =
true;
1298 copy_count = result.index_ + 1U;
1304 copy_count = result.index_ + 1U;
1308 MasstreeBorderPage* target_casted = as_border(target);
1310 target_casted->set_key_count(copy_count);
1311 level->next_original_ = copy_count + 1U;
1312 if (level->next_original_ >= key_count) {
1313 level->set_no_more_next_original();
1315 level->next_original_slice_ = casted->get_slice(level->next_original_);
1316 level->next_original_remainder_ = casted->get_remainder_length(level->next_original_);
1328 ASSERT_ND(!level->has_next_original() || level->next_original_slice_ > slice);
1330 SlotIndex next_layer_index = target_casted->get_key_count() - 1;
1331 ASSERT_ND(target_casted->get_slice(next_layer_index) == slice);
1334 open_next_level_create_layer(key, key_length, target_casted, slice, next_layer_index);
1335 ASSERT_ND(target_casted->does_point_to_layer(next_layer_index));
1338 ASSERT_ND(target_casted->does_point_to_layer(next_layer_index));
1339 ASSERT_ND(target_casted->get_next_layer(next_layer_index)->volatile_pointer_.is_null());
1341 = &target_casted->get_next_layer(next_layer_index)->snapshot_pointer_;
1342 CHECK_ERROR(open_next_level(key, key_length, target_casted, slice, next_layer));
1344 ASSERT_ND(cur_path_levels_ > this_level + 1U);
1346 const MasstreeIntermediatePage* casted = as_intermdiate(original);
1347 uint8_t index = casted->find_minipage(slice);
1348 const MasstreeIntermediatePage::MiniPage& minipage = casted->get_minipage(index);
1349 uint8_t index_mini = minipage.find_pointer(slice);
1353 MasstreeIntermediatePage* target_casted = as_intermdiate(target);
1354 target_casted->set_key_count(index);
1355 target_casted->get_minipage(index).key_count_ = index_mini;
1358 target_casted->extract_separators_snapshot(index, index_mini, &this_fence, &next_fence);
1359 level->next_original_ = index;
1360 level->next_original_mini_ = index_mini + 1U;
1361 level->next_original_slice_ = next_fence;
1362 if (level->next_original_mini_ > minipage.key_count_) {
1363 ++level->next_original_;
1364 level->next_original_mini_ = 0;
1365 if (level->next_original_ > casted->get_key_count()) {
1366 level->set_no_more_next_original();
1372 = &target_casted->get_minipage(index).pointers_[index_mini].snapshot_pointer_;
1373 CHECK_ERROR(open_next_level(key, key_length, target_casted, slice, next_address));
1374 ASSERT_ND(cur_path_levels_ > this_level + 1U);
1379 void MasstreeComposeContext::open_next_level_create_layer(
1382 MasstreeBorderPage* parent,
1385 ASSERT_ND(!parent->does_point_to_layer(parent_index));
1386 ASSERT_ND(parent->get_slice(parent_index) == prefix_slice);
1390 uint8_t this_level = cur_path_levels_;
1391 PathLevel* level = cur_path_ + this_level;
1394 store_cur_prefix(parent->get_layer(), prefix_slice);
1395 level->layer_ = parent->get_layer() + 1;
1396 level->head_ = allocate_page();
1397 level->page_count_ = 1;
1398 level->tail_ = level->head_;
1399 level->set_no_more_next_original();
1407 MasstreeBorderPage* target =
reinterpret_cast<MasstreeBorderPage*
>(get_page(level->head_));
1414 const char* parent_suffix = parent->get_record(parent_index);
1417 const char* suffix = parent_suffix +
kSliceLen;
1418 const char* payload = parent->get_record_payload(parent_index);
1419 PayloadLength payload_count = parent->get_payload_length(parent_index);
1420 xct::XctId xct_id = parent->get_owner_id(parent_index)->xct_id_;
1426 MasstreeBorderPage* original =
reinterpret_cast<MasstreeBorderPage*
>(get_original(this_level));
1428 ASSERT_ND(original->can_accomodate(0, remainder_length, payload_count));
1429 original->reserve_record_space(0, xct_id, slice, suffix, remainder_length, payload_count);
1430 original->increment_key_count();
1433 ASSERT_ND(original->ltgt_key(0, key, key_length) != 0);
1434 if (original->ltgt_key(0, key, key_length) < 0) {
1435 VLOG(1) <<
"Interesting, moving an original record to a dummy page in next layer";
1437 level->next_original_ = 0;
1438 level->next_original_slice_ = slice;
1439 level->next_original_remainder_ = remainder_length;
1442 target->reserve_record_space(0, xct_id, slice, suffix, remainder_length, payload_count);
1443 target->increment_key_count();
1445 ASSERT_ND(target->ltgt_key(0, key, key_length) > 0);
1451 parent->replace_next_layer_snapshot(page_id);
1455 inline bool MasstreeComposeContext::does_need_to_adjust_path(
1458 if (cur_path_levels_ == 0) {
1460 }
else if (!cur_path_[0].contains_key(key, key_length)) {
1463 }
else if (get_last_level()->layer_ == 0) {
1468 uint8_t cur_path_layer = get_last_layer();
1469 if (cur_path_layer > 0) {
1470 uint16_t max_key_layer = (key_length - 1U) / kSliceLen;
1471 if (max_key_layer < cur_path_layer) {
1475 if (common_layers < cur_path_layer) {
1487 ErrorStack MasstreeComposeContext::adjust_path(
const char* key,
KeyLength key_length) {
1489 bool closed_something =
false;
1490 if (cur_path_levels_ > 0) {
1491 PathLevel* last = get_last_level();
1492 if (last->layer_ > 0) {
1494 uint16_t max_key_layer = (key_length - 1U) / kSliceLen;
1495 uint16_t cmp_layer = std::min<uint16_t>(last->layer_, max_key_layer);
1497 if (common_layers < last->layer_) {
1499 ASSERT_ND(common_layers == get_last_layer());
1500 closed_something =
true;
1504 last = get_last_level();
1508 while (cur_path_levels_ > 0 && !last->contains_slice(next_slice)) {
1510 if (cur_path_levels_ == 1U) {
1515 closed_something =
true;
1516 last = get_last_level();
1517 ASSERT_ND(get_last_level()->layer_ * kSliceLen == skip);
1522 if (cur_path_levels_ == 0) {
1530 PathLevel* last = get_last_level();
1533 ASSERT_ND(last->contains_slice(next_slice));
1534 MasstreePage* page = get_page(last->tail_);
1535 if (!page->is_border()) {
1537 MasstreeIntermediatePage* casted = as_intermdiate(page);
1540 ASSERT_ND(casted->find_minipage(next_slice) == casted->get_key_count());
1541 ASSERT_ND(casted->get_minipage(casted->get_key_count()).find_pointer(next_slice)
1542 == casted->get_minipage(casted->get_key_count()).key_count_);
1543 if (last->has_next_original() && last->next_original_slice_ <= next_slice) {
1544 ASSERT_ND(last->next_original_slice_ != next_slice);
1545 CHECK_ERROR(consume_original_upto_intermediate(next_slice, last));
1547 ASSERT_ND(casted->find_minipage(next_slice) == casted->get_key_count());
1548 ASSERT_ND(casted->get_minipage(casted->get_key_count()).find_pointer(next_slice)
1549 == casted->get_minipage(casted->get_key_count()).key_count_);
1550 SlotIndex index = casted->get_key_count();
1551 MasstreeIntermediatePage::MiniPage& minipage = casted->get_minipage(index);
1552 SlotIndex index_mini = minipage.key_count_;
1556 casted->extract_separators_snapshot(index, index_mini, &existing_low, &existing_high);
1558 ASSERT_ND(existing_high == casted->get_high_fence());
1561 CHECK_ERROR(open_next_level(key, key_length, page, 0, next_pointer));
1563 ASSERT_ND(get_last_level()->contains_slice(next_slice));
1564 ASSERT_ND(get_page(get_last_level()->tail_)->is_border());
1568 inline void MasstreeComposeContext::append_border(
1574 const char* payload,
1578 MasstreeBorderPage* target = as_border(get_page(level->tail_));
1579 SlotIndex key_count = target->get_key_count();
1581 ASSERT_ND(key_count == 0 || !target->will_conflict(key_count - 1, slice, remainder_length));
1583 || !target->will_contain_next_layer(key_count - 1, slice, remainder_length));
1584 ASSERT_ND(key_count == 0 || target->ltgt_key(key_count - 1, slice, suffix, remainder_length) > 0);
1587 const bool spacious = target->can_accomodate_snapshot(remainder_length, payload_count);
1589 append_border_newpage(slice, level);
1590 MasstreeBorderPage* new_target = as_border(get_page(level->tail_));
1592 target = new_target;
1596 target->reserve_record_space(new_index, xct_id, slice, suffix, remainder_length, payload_count);
1597 target->increment_key_count();
1601 inline void MasstreeComposeContext::append_border_next_layer(
1606 MasstreeBorderPage* target = as_border(get_page(level->tail_));
1607 SlotIndex key_count = target->get_key_count();
1609 ASSERT_ND(key_count == 0 || !target->will_conflict(key_count - 1, slice, 0xFF));
1610 ASSERT_ND(key_count == 0 || target->ltgt_key(key_count - 1, slice,
nullptr, kSliceLen) > 0);
1612 append_border_newpage(slice, level);
1613 MasstreeBorderPage* new_target = as_border(get_page(level->tail_));
1615 target = new_target;
1619 target->append_next_layer_snapshot(xct_id, slice, pointer);
1622 void MasstreeComposeContext::append_border_newpage(
KeySlice slice, PathLevel* level) {
1623 MasstreeBorderPage* target = as_border(get_page(level->tail_));
1624 SlotIndex key_count = target->get_key_count();
1628 MasstreeBorderPage* new_target =
reinterpret_cast<MasstreeBorderPage*
>(get_page(new_offset));
1631 KeySlice high_fence = target->get_high_fence();
1632 new_target->initialize_snapshot_page(id_, new_page_id, level->layer_, middle, high_fence);
1633 target->set_foster_major_offset_unsafe(new_offset);
1634 target->set_high_fence_unsafe(middle);
1635 level->tail_ = new_offset;
1636 ++level->page_count_;
1641 for (migrate_from = key_count; target->get_slice(migrate_from - 1U) == slice; --migrate_from) {
1644 if (migrate_from != key_count) {
1645 LOG(INFO) <<
"Interesting, migrate records to avoid key slice spanning two pages";
1646 for (
SlotIndex old_index = migrate_from; old_index < key_count; ++old_index) {
1647 ASSERT_ND(target->get_slice(old_index) == slice);
1648 xct::XctId xct_id = target->get_owner_id(old_index)->xct_id_;
1649 SlotIndex new_index = new_target->get_key_count();
1650 if (target->does_point_to_layer(old_index)) {
1651 const DualPagePointer* pointer = target->get_next_layer(old_index);
1652 ASSERT_ND(pointer->volatile_pointer_.is_null());
1653 new_target->append_next_layer_snapshot(xct_id, slice, pointer->snapshot_pointer_);
1655 PayloadLength payload_count = target->get_payload_length(old_index);
1656 KeyLength remainder = target->get_remainder_length(old_index);
1657 new_target->reserve_record_space(
1661 target->get_record(old_index),
1664 new_target->increment_key_count();
1666 new_target->get_record_payload(new_index),
1667 target->get_record_payload(old_index),
1672 target->header().set_key_count(migrate_from);
1673 ASSERT_ND(new_target->get_key_count() == key_count - migrate_from);
1678 inline void MasstreeComposeContext::append_intermediate(
1684 MasstreeIntermediatePage* target = as_intermdiate(get_page(level->tail_));
1685 if (
UNLIKELY(target->is_full_snapshot())) {
1686 append_intermediate_newpage_and_pointer(low_fence, pointer, level);
1688 target->append_pointer_snapshot(low_fence, pointer);
1692 void MasstreeComposeContext::append_intermediate_newpage_and_pointer(
1696 MasstreeIntermediatePage* target = as_intermdiate(get_page(level->tail_));
1700 MasstreeIntermediatePage* new_target
1701 =
reinterpret_cast<MasstreeIntermediatePage*
>(get_page(new_offset));
1704 KeySlice high_fence = target->get_high_fence();
1705 uint8_t btree_level = target->get_btree_level();
1706 uint8_t layer = level->layer_;
1707 new_target->initialize_snapshot_page(id_, new_page_id, layer, btree_level, middle, high_fence);
1708 target->set_foster_major_offset_unsafe(new_offset);
1709 target->set_high_fence_unsafe(middle);
1710 level->tail_ = new_offset;
1711 ++level->page_count_;
1716 new_target->get_minipage(0).pointers_[0].volatile_pointer_.clear();
1717 new_target->get_minipage(0).pointers_[0].snapshot_pointer_ = pointer;
1720 ErrorStack MasstreeComposeContext::consume_original_upto_border(
1725 ASSERT_ND(key_length > level->layer_ * kSliceLen);
1726 ASSERT_ND(level->needs_to_consume_original(slice, key_length));
1727 uint16_t level_index = level - cur_path_;
1728 MasstreeBorderPage* original = as_border(get_original(level_index));
1729 while (level->needs_to_consume_original(slice, key_length)) {
1730 SlotIndex index = level->next_original_;
1731 ASSERT_ND(level->next_original_slice_ == original->get_slice(index));
1732 ASSERT_ND(level->next_original_remainder_ == original->get_remainder_length(index));
1734 ASSERT_ND(level->next_original_slice_ <= slice);
1735 KeySlice original_slice = original->get_slice(index);
1736 KeyLength original_remainder = original->get_remainder_length(index);
1737 xct::XctId xct_id = original->get_owner_id(index)->xct_id_;
1738 if (original->does_point_to_layer(index)) {
1739 const DualPagePointer* pointer = original->get_next_layer(index);
1740 ASSERT_ND(pointer->volatile_pointer_.is_null());
1741 append_border_next_layer(original_slice, xct_id, pointer->snapshot_pointer_, level);
1747 original->get_record(index),
1748 original->get_payload_length(index),
1749 original->get_record_payload(index),
1752 ++level->next_original_;
1753 if (level->next_original_ >= original->get_key_count()) {
1754 level->set_no_more_next_original();
1756 level->next_original_slice_ = original->get_slice(level->next_original_);
1757 level->next_original_remainder_ = original->get_remainder_length(level->next_original_);
1763 ErrorStack MasstreeComposeContext::consume_original_upto_intermediate(
1767 ASSERT_ND(level->next_original_slice_ < slice);
1768 uint16_t level_index = level - cur_path_;
1769 MasstreeIntermediatePage* original = as_intermdiate(get_original(level_index));
1770 while (level->has_next_original() && level->next_original_slice_ < slice) {
1771 MasstreeIntermediatePointerIterator it(original);
1772 it.index_ = level->next_original_;
1773 it.index_mini_ = level->next_original_mini_;
1775 append_intermediate(it.get_low_key(), it.get_pointer().snapshot_pointer_, level);
1777 if (it.is_valid()) {
1778 level->next_original_ = it.index_;
1779 level->next_original_mini_ = it.index_mini_;
1780 level->next_original_slice_ = it.get_low_key();
1782 level->set_no_more_next_original();
1788 ErrorStack MasstreeComposeContext::consume_original_all() {
1789 PathLevel* last = get_last_level();
1790 if (!last->has_next_original()) {
1794 MasstreePage* page = get_original(get_last_level_index());
1795 if (page->is_border()) {
1796 MasstreeBorderPage* original = as_border(page);
1797 while (last->has_next_original()) {
1799 KeySlice slice = original->get_slice(index);
1800 xct::XctId xct_id = original->get_owner_id(index)->xct_id_;
1801 if (original->does_point_to_layer(index)) {
1802 const DualPagePointer* pointer = original->get_next_layer(index);
1803 ASSERT_ND(pointer->volatile_pointer_.is_null());
1804 append_border_next_layer(slice, xct_id, pointer->snapshot_pointer_, last);
1809 original->get_remainder_length(index),
1810 original->get_record(index),
1811 original->get_payload_length(index),
1812 original->get_record_payload(index),
1815 ++last->next_original_;
1816 if (last->next_original_ >= original->get_key_count()) {
1821 MasstreeIntermediatePage* original = as_intermdiate(page);
1822 MasstreeIntermediatePointerIterator it(original);
1823 it.index_ = last->next_original_;
1824 it.index_mini_ = last->next_original_mini_;
1825 for (; it.is_valid(); it.next()) {
1826 append_intermediate(it.get_low_key(), it.get_pointer().snapshot_pointer_, last);
1829 last->set_no_more_next_original();
1833 ErrorStack MasstreeComposeContext::close_level_grow_subtree(
1837 PathLevel* last = get_last_level();
1838 ASSERT_ND(last->low_fence_ == subtree_low);
1839 ASSERT_ND(last->high_fence_ == subtree_high);
1843 std::vector<FenceAndPointer> children;
1844 children.reserve(last->page_count_);
1845 uint8_t cur_btree_level = 0;
1847 MasstreePage* page = get_page(cur);
1848 ASSERT_ND(page->is_foster_minor_null());
1849 FenceAndPointer child;
1850 child.pointer_ = page_id_base_ + cur;
1851 child.low_fence_ = page->get_low_fence();
1852 children.emplace_back(child);
1853 cur = page->get_foster_major().get_offset();
1854 page->set_foster_major_offset_unsafe(0);
1855 cur_btree_level = page->get_btree_level();
1857 ASSERT_ND(children.size() == last->page_count_);
1863 MasstreeIntermediatePage* new_page
1864 =
reinterpret_cast<MasstreeIntermediatePage*
>(get_page(new_offset));
1865 new_page->initialize_snapshot_page(
1869 cur_btree_level + 1U,
1872 last->head_ = new_offset;
1873 last->tail_ = new_offset;
1874 last->page_count_ = 1;
1876 ASSERT_ND(children[0].low_fence_ == subtree_low);
1877 new_page->get_minipage(0).pointers_[0].volatile_pointer_.clear();
1878 new_page->get_minipage(0).pointers_[0].snapshot_pointer_ = children[0].pointer_;
1879 for (uint32_t i = 1; i < children.size(); ++i) {
1880 const FenceAndPointer& child = children[i];
1881 append_intermediate(child.low_fence_, child.pointer_, last);
1888 if (last->page_count_ > 1U) {
1891 CHECK_ERROR(close_level_grow_subtree(root_pointer, subtree_low, subtree_high));
1893 *root_pointer = new_page_id;
1898 ErrorStack MasstreeComposeContext::pushup_non_root() {
1900 PathLevel* last = get_last_level();
1901 PathLevel* parent = get_second_last_level();
1906 bool is_head =
true;
1908 MasstreePage* page = get_page(cur);
1909 ASSERT_ND(page->is_foster_minor_null());
1911 KeySlice low_fence = page->get_low_fence();
1912 ASSERT_ND(cur != last->head_ || low_fence == last->low_fence_);
1913 ASSERT_ND(cur != last->tail_ || page->get_high_fence() == last->high_fence_);
1914 ASSERT_ND(cur != last->tail_ || page->is_foster_major_null());
1915 ASSERT_ND(cur == last->tail_ || !page->is_foster_major_null());
1916 cur = page->get_foster_major().get_offset();
1917 page->set_foster_major_offset_unsafe(0);
1920 if (parent->has_next_original() && parent->next_original_slice_ <= low_fence) {
1921 ASSERT_ND(parent->next_original_slice_ != low_fence);
1922 CHECK_ERROR(consume_original_upto_intermediate(low_fence, parent));
1928 bool already_appended =
false;
1930 const MasstreeIntermediatePage* parent_page = as_intermdiate(get_page(parent->tail_));
1931 SlotIndex index = parent_page->get_key_count();
1932 const MasstreeIntermediatePage::MiniPage& mini = parent_page->get_minipage(index);
1934 if (mini.pointers_[index_mini].snapshot_pointer_ == pointer) {
1935 already_appended =
true;
1939 parent_page->extract_separators_snapshot(index, index_mini, &check_low, &check_high);
1942 MasstreePage* tail = get_page(last->tail_);
1943 ASSERT_ND(check_high == tail->get_high_fence());
1947 if (!already_appended) {
1948 append_intermediate(low_fence, pointer, parent);
1956 ErrorStack MasstreeComposeContext::close_path_layer(uint16_t max_layer) {
1957 DVLOG(1) <<
"Closing path up to layer-" << max_layer;
1958 ASSERT_ND(get_last_layer() > max_layer);
1960 PathLevel* last = get_last_level();
1961 if (last->layer_ <= max_layer) {
1966 ASSERT_ND(get_last_layer() == max_layer);
1970 ErrorStack MasstreeComposeContext::close_last_level() {
1972 DVLOG(1) <<
"Closing the last level " << *get_last_level();
1973 PathLevel* last = get_last_level();
1974 PathLevel* parent = get_second_last_level();
1984 uint32_t counted = 0;
1986 const MasstreePage* page = get_page(cur);
1988 ASSERT_ND(page->get_low_fence() == prev);
1989 ASSERT_ND(page->get_high_fence() > prev);
1990 ASSERT_ND(page->get_layer() == last->layer_);
1991 prev = page->get_high_fence();
1992 cur = page->get_foster_major().get_offset();
1993 if (page->is_border()) {
1998 ASSERT_ND(counted == last->page_count_);
2003 if (last->page_count_ > 1U) {
2004 ASSERT_ND(parent->layer_ <= last->layer_);
2005 bool last_is_root = (parent->layer_ != last->layer_);
2006 if (!last_is_root) {
2011 MasstreeBorderPage* parent_tail = as_border(get_page(parent->tail_));
2012 ASSERT_ND(parent_tail->get_key_count() > 0);
2013 ASSERT_ND(parent_tail->does_point_to_layer(parent_tail->get_key_count() - 1));
2015 = &parent_tail->get_next_layer(parent_tail->get_key_count() - 1)->snapshot_pointer_;
2025 ErrorStack MasstreeComposeContext::close_first_level() {
2028 PathLevel* first = cur_path_;
2036 ASSERT_ND(root_pointer.volatile_pointer_.is_null());
2037 ASSERT_ND(root_pointer.snapshot_pointer_ == current_pointer);
2040 if (first->page_count_ > 1U) {
2041 CHECK_ERROR(close_level_grow_subtree(&root_pointer.snapshot_pointer_, root_low, root_high));
2047 MasstreePage* child = get_page(first->head_);
2049 VLOG(0) <<
"Masstree-" << id_ <<
" subtree of first layer root has grown.";
2053 cur_path_levels_ = 0;
2055 root_index_mini_= 0xDA;
2059 ErrorStack MasstreeComposeContext::close_all_levels() {
2060 LOG(INFO) <<
"Closing all levels except root_";
2061 while (cur_path_levels_ > 1U) {
2065 if (cur_path_levels_ > 0) {
2069 LOG(INFO) <<
"Closed all levels. now buffered pages=" << allocated_pages_;
2073 ErrorStack MasstreeComposeContext::flush_buffer() {
2074 LOG(INFO) <<
"Flushing buffer. buffered pages=" << allocated_pages_;
2078 ASSERT_ND(allocated_pages_ <= max_pages_);
2081 allocated_pages_ = 0;
2083 write_dummy_page_zero();
2087 void MasstreeComposeContext::store_cur_prefix(uint8_t layer,
KeySlice prefix_slice) {
2088 assorted::write_bigendian<KeySlice>(prefix_slice, cur_prefix_be_ + layer *
kSliceLen);
2089 cur_prefix_slices_[layer] = prefix_slice;
2097 void MasstreeComposeContext::remove_old_page_boundary_info(
2099 MasstreePage* page) {
2103 ASSERT_ND(page->header().page_id_ == page_id);
2104 ASSERT_ND(page_boundary_info_cur_pos_ > 0);
2110 LOG(INFO) <<
"Removing a page_boundary_info entry for a re-opened page. This should happen"
2111 <<
" very infrequently, or the buffer size is too small.";
2112 const uint8_t btree_level = page->get_btree_level();
2113 const uint8_t layer = page->get_layer();
2114 const KeySlice* prefixes = cur_prefix_slices_;
2115 const KeySlice low = page->get_low_fence();
2116 const KeySlice high = page->get_high_fence();
2121 for (uint32_t i = 0;
LIKELY(i < page_boundary_elements_); ++i) {
2122 if (
LIKELY(page_boundary_sort_[i].hash_ != hash)) {
2125 ASSERT_ND(page_boundary_sort_[i].info_pos_ < page_boundary_info_cur_pos_);
2126 PageBoundaryInfo* info = get_page_boundary_info(page_boundary_sort_[i].info_pos_);
2127 if (info->exact_match(btree_level, layer, prefixes, low, high)) {
2131 info->removed_ =
true;
2138 LOG(ERROR) <<
"WTF. The old page_boundary_info entry was not found. page_id="
2139 << assorted::Hex(page_id);
2143 inline void MasstreeComposeContext::assert_page_boundary_not_exists(
2144 uint8_t btree_level,
2151 for (uint32_t i = 0;
LIKELY(i < page_boundary_elements_); ++i) {
2152 if (
LIKELY(page_boundary_sort_[i].hash_ != hash)) {
2155 ASSERT_ND(page_boundary_sort_[i].info_pos_ < page_boundary_info_cur_pos_);
2156 const PageBoundaryInfo* info = get_page_boundary_info(page_boundary_sort_[i].info_pos_);
2157 bool exists = info->exact_match(btree_level, layer, prefixes, low, high);
2160 info->exact_match(btree_level, layer, prefixes, low, high);
2173 ErrorCode MasstreeComposeContext::close_level_register_page_boundaries() {
2177 PathLevel* last = get_last_level();
2183 uint32_t counted = 0;
2185 const MasstreePage* page = get_page(cur);
2187 ASSERT_ND(page->get_low_fence() == prev);
2188 ASSERT_ND(page->get_high_fence() > prev);
2189 ASSERT_ND(page->get_layer() == last->layer_);
2190 prev = page->get_high_fence();
2191 cur = page->get_foster_major().get_offset();
2194 ASSERT_ND(counted == last->page_count_);
2200 const MasstreePage* page = get_page(cur);
2202 const uint8_t btree_level = page->get_btree_level();
2203 const uint8_t layer = last->layer_;
2204 const KeySlice low = page->get_low_fence();
2205 const KeySlice high = page->get_high_fence();
2207 ASSERT_ND(cur == last->head_ || low == prev_high);
2211 uint16_t info_size = (layer + 3U) *
sizeof(
KeySlice);
2212 if (
UNLIKELY(page_boundary_elements_ == max_page_boundary_elements_
2213 || page_boundary_info_cur_pos_ + (info_size / 8U) >= page_boundary_info_capacity_)) {
2214 LOG(INFO) <<
"Automatically expanding memory for page_boundary_info. This is super-rare";
2216 get_writer()->get_intermediate_size() * 2U,
2218 refresh_page_boundary_info_variables();
2220 ASSERT_ND(page_boundary_elements_ < max_page_boundary_elements_);
2221 ASSERT_ND(page_boundary_info_cur_pos_ + (info_size / 8U) < page_boundary_info_capacity_);
2222 assert_page_boundary_not_exists(btree_level, layer, cur_prefix_slices_, low, high);
2224 PageBoundaryInfo* info = get_page_boundary_info(page_boundary_info_cur_pos_);
2225 info->btree_level_ = btree_level;
2226 info->removed_ =
false;
2227 info->layer_ = layer;
2228 ASSERT_ND(info->dynamic_sizeof() == info_size);
2232 info->local_snapshot_pointer_high_ = local_id >> 32;
2233 info->local_snapshot_pointer_low_ =
static_cast<uint32_t
>(local_id);
2234 for (uint8_t i = 0; i < layer; ++i) {
2235 info->slices_[i] = cur_prefix_slices_[i];
2237 info->slices_[layer] = low;
2238 info->slices_[layer + 1] = high;
2240 page_boundary_sort_[page_boundary_elements_].
hash_
2242 page_boundary_sort_[page_boundary_elements_].
info_pos_ = page_boundary_info_cur_pos_;
2243 ++page_boundary_elements_;
2244 page_boundary_info_cur_pos_ += info->dynamic_sizeof() / 8U;
2245 ASSERT_ND(!page->get_foster_major().is_null() || cur == last->tail_);
2246 cur = page->get_foster_major().get_offset();
2253 void MasstreeComposeContext::sort_page_boundary_info() {
2254 ASSERT_ND(page_boundary_info_cur_pos_ <= page_boundary_info_capacity_);
2255 ASSERT_ND(page_boundary_elements_ <= max_page_boundary_elements_);
2256 debugging::StopWatch watch;
2257 std::sort(page_boundary_sort_, page_boundary_sort_ + page_boundary_elements_);
2259 VLOG(0) <<
"MasstreeStorage-" << id_ <<
" sorted " << page_boundary_elements_ <<
" entries"
2260 <<
" to track page_boundary_info in " << watch.elapsed_ms() <<
"ms";
2264 uint8_t btree_level,
2269 PageBoundarySort dummy;
2271 const PageBoundarySort* begin_entry = std::lower_bound(
2272 page_boundary_sort_,
2273 page_boundary_sort_ + page_boundary_elements_,
2276 uint32_t begin = begin_entry - page_boundary_sort_;
2277 for (uint32_t i = begin; i < page_boundary_elements_; ++i) {
2278 if (page_boundary_sort_[i].hash_ != dummy.hash_) {
2281 const PageBoundaryInfo* info = get_page_boundary_info(page_boundary_sort_[i].info_pos_);
2282 if (!info->exact_match(btree_level, layer, prefixes, low, high)) {
2288 ASSERT_ND(page_id < get_writer()->get_next_page_id());
2291 DVLOG(1) <<
"exactly corresponding page boundaries not found. cannot install a snapshot"
2296 ErrorStack MasstreeComposeContext::install_snapshot_pointers(uint64_t* installed_count)
const {
2298 *installed_count = 0;
2299 VolatilePagePointer pointer = storage_.
get_control_block()->root_page_pointer_.volatile_pointer_;
2300 if (pointer.is_null()) {
2301 VLOG(0) <<
"No volatile pages.. maybe while restart?";
2305 const memory::GlobalVolatilePageResolver& resolver
2307 MasstreeIntermediatePage* volatile_root
2308 =
reinterpret_cast<MasstreeIntermediatePage*
>(resolver.resolve_offset(pointer));
2311 std::memset(prefixes, 0,
sizeof(prefixes));
2317 debugging::StopWatch watch;
2325 struct InstallationInfo {
2330 std::vector<InstallationInfo> infos;
2332 for (MasstreeIntermediatePointerIterator it(root_); it.is_valid(); it.next()) {
2340 if (snapshot_id == snapshot_id_) {
2343 InstallationInfo info = {it.get_low_key(), it.get_high_key(), pointer};
2344 infos.emplace_back(info);
2348 std::vector<VolatilePagePointer> recursion_targets;
2353 xct::McsOwnerlessLockScope lock_scope(volatile_root->get_lock_address());
2355 for (MasstreeIntermediatePointerIterator it(volatile_root); it.is_valid(); it.next()) {
2363 while (cur < infos.size() && (infos[cur].low_ > low || infos[cur].high_ < high)) {
2366 if (cur == infos.size()) {
2369 ASSERT_ND(infos[cur].low_ <= low && infos[cur].high_ >= high);
2370 DualPagePointer& pointer = volatile_root->get_minipage(it.index_).pointers_[it.index_mini_];
2371 if (infos[cur].low_ == low && infos[cur].high_ == high) {
2372 pointer.snapshot_pointer_ = infos[cur].pointer_;
2376 if (!pointer.volatile_pointer_.is_null()) {
2377 recursion_targets.emplace_back(pointer.volatile_pointer_);
2384 for (VolatilePagePointer pointer : recursion_targets) {
2385 MasstreePage* child =
reinterpret_cast<MasstreePage*
>(resolver.resolve_offset(pointer));
2386 if (no_next_layer && child->is_border()) {
2400 LOG(INFO) <<
"MasstreeStorage-" << id_ <<
" installed " << *installed_count <<
" pointers"
2401 <<
" in " << watch.elapsed_ms() <<
"ms";
2405 inline ErrorCode MasstreeComposeContext::install_snapshot_pointers_recurse(
2406 const memory::GlobalVolatilePageResolver& resolver,
2409 MasstreePage* volatile_page,
2410 uint64_t* installed_count)
const {
2411 if (volatile_page->is_border()) {
2412 return install_snapshot_pointers_recurse_border(
2416 reinterpret_cast<MasstreeBorderPage*>(volatile_page),
2419 return install_snapshot_pointers_recurse_intermediate(
2423 reinterpret_cast<MasstreeIntermediatePage*>(volatile_page),
2428 ErrorCode MasstreeComposeContext::install_snapshot_pointers_recurse_intermediate(
2429 const memory::GlobalVolatilePageResolver& resolver,
2432 MasstreeIntermediatePage* volatile_page,
2433 uint64_t* installed_count)
const {
2434 ASSERT_ND(volatile_page->get_layer() == layer);
2439 const bool is_child_intermediate = volatile_page->get_btree_level() >= 2U;
2440 const bool follow_children = !no_next_layer || is_child_intermediate;
2441 const uint8_t btree_level = volatile_page->get_btree_level() - 1U;
2450 uint16_t recursion_target_count = 0;
2455 xct::McsOwnerlessLockScope lock_scope(volatile_page->get_lock_address());
2456 for (MasstreeIntermediatePointerIterator it(volatile_page); it.is_valid(); it.next()) {
2459 DualPagePointer& pointer = volatile_page->get_minipage(it.index_).pointers_[it.index_mini_];
2461 = lookup_page_boundary_info(btree_level, layer, prefixes, low, high);
2462 if (snapshot_pointer != 0) {
2463 pointer.snapshot_pointer_ = snapshot_pointer;
2464 ++(*installed_count);
2466 DVLOG(3) <<
"Oops, no matching boundary. low=" << low <<
", high=" << high;
2468 if (follow_children && !pointer.volatile_pointer_.is_null()) {
2470 recursion_targets[recursion_target_count] = pointer.volatile_pointer_;
2471 ++recursion_target_count;
2476 for (uint16_t i = 0; i < recursion_target_count; ++i) {
2478 =
reinterpret_cast<MasstreePage*
>(resolver.resolve_offset(recursion_targets[i]));
2479 if (no_next_layer && child->is_border()) {
2493 ErrorCode MasstreeComposeContext::install_snapshot_pointers_recurse_border(
2494 const memory::GlobalVolatilePageResolver& resolver,
2497 MasstreeBorderPage* volatile_page,
2498 uint64_t* installed_count)
const {
2499 ASSERT_ND(volatile_page->get_layer() == layer);
2508 SlotIndex key_count = volatile_page->get_key_count();
2510 for (
SlotIndex i = 0; i < key_count; ++i) {
2511 if (!volatile_page->does_point_to_layer(i)) {
2515 DualPagePointer* next_pointer = volatile_page->get_next_layer(i);
2516 if (next_pointer->volatile_pointer_.is_null()) {
2522 =
reinterpret_cast<MasstreePage*
>(resolver.resolve_offset(next_pointer->volatile_pointer_));
2524 ASSERT_ND(child->is_high_fence_supremum());
2525 uint8_t btree_level = child->get_btree_level();
2526 KeySlice slice = volatile_page->get_slice(i);
2527 prefixes[layer] = slice;
2530 if (snapshot_pointer != 0) {
2532 next_pointer->snapshot_pointer_ = snapshot_pointer;
2533 ++(*installed_count);
2535 DVLOG(2) <<
"Oops, no matching boundary. border next layer";
2538 if (child->is_border() && no_next_next_layer) {
2562 LOG(INFO) <<
"Keep-all-volatile: Storage-" << storage_.
get_name()
2563 <<
" is configured to keep all volatile pages.";
2571 if (volatile_page ==
nullptr) {
2572 LOG(INFO) <<
"No volatile root page. Probably while restart";
2589 result.
combine(drop_volatiles_recurse(args, pointer));
2602 partition_high = data->
low_keys_[cur + 1U];
2604 if (high <= partition_high) {
2609 ASSERT_ND(cur < data->partition_count_);
2612 VLOG(0) <<
"Not exactly matching page boundary.";
2614 result.
combine(drop_volatiles_recurse(args, pointer));
2624 LOG(INFO) <<
"Oh, but keep-all-volatile is on. Storage-" << storage_.
get_name()
2625 <<
" is configured to keep all volatile pages.";
2631 if (volatile_page ==
nullptr) {
2632 LOG(INFO) <<
"Oh, but root volatile page already null";
2637 LOG(INFO) <<
"Oh, but " << storage_ <<
" is configured to keep the root page.";
2642 LOG(INFO) <<
"Okay, drop em all!!";
2643 drop_all_recurse(args, root_pointer);
2647 void MasstreeComposer::drop_all_recurse(
2651 if (page ==
nullptr) {
2654 drop_all_recurse_page_only(args, page);
2659 void MasstreeComposer::drop_all_recurse_page_only(
2660 const Composer::DropVolatilesArguments& args,
2661 MasstreePage* page) {
2663 if (page->has_foster_child()) {
2664 MasstreePage* minor = resolve_volatile(page->get_foster_minor());
2665 MasstreePage* major = resolve_volatile(page->get_foster_major());
2666 drop_all_recurse_page_only(args, minor);
2667 drop_all_recurse_page_only(args, major);
2672 if (page->is_border()) {
2673 MasstreeBorderPage* border =
as_border(page);
2674 const SlotIndex key_count = page->get_key_count();
2675 for (
SlotIndex i = 0; i < key_count; ++i) {
2676 if (border->does_point_to_layer(i)) {
2677 DualPagePointer* pointer = border->get_next_layer(i);
2678 drop_all_recurse(args, pointer);
2683 for (MasstreeIntermediatePointerIterator it(casted); it.is_valid(); it.next()) {
2684 DualPagePointer* pointer =
const_cast<DualPagePointer*
>(&it.get_pointer());
2685 drop_all_recurse(args, pointer);
2690 inline MasstreePage* MasstreeComposer::resolve_volatile(VolatilePagePointer pointer) {
2691 if (pointer.is_null()) {
2694 const memory::GlobalVolatilePageResolver& page_resolver
2696 return reinterpret_cast<MasstreePage*
>(page_resolver.resolve_offset(pointer));
2699 inline Composer::DropResult MasstreeComposer::drop_volatiles_recurse(
2700 const Composer::DropVolatilesArguments& args,
2701 DualPagePointer* pointer) {
2702 Composer::DropResult result(args);
2703 if (pointer->volatile_pointer_.is_null()) {
2709 MasstreePage* page = resolve_volatile(pointer->volatile_pointer_);
2713 if (page->has_foster_child()) {
2714 MasstreePage* minor = resolve_volatile(page->get_foster_minor());
2715 MasstreePage* major = resolve_volatile(page->get_foster_major());
2716 if (page->is_border()) {
2717 result.combine(drop_volatiles_border(args,
as_border(minor)));
2718 result.combine(drop_volatiles_border(args,
as_border(major)));
2720 result.combine(drop_volatiles_intermediate(args,
as_intermediate(minor)));
2721 result.combine(drop_volatiles_intermediate(args,
as_intermediate(major)));
2723 ASSERT_ND(result.max_observed_ >= args.snapshot_.valid_until_epoch_);
2725 if (page->is_border()) {
2726 result.combine(drop_volatiles_border(args,
as_border(page)));
2728 result.combine(drop_volatiles_intermediate(args,
as_intermediate(page)));
2731 if (result.max_observed_ > args.snapshot_.valid_until_epoch_ || !result.dropped_all_) {
2732 DVLOG(1) <<
"Couldn't drop a volatile page that has a recent modification";
2733 result.dropped_all_ =
false;
2736 ASSERT_ND(result.max_observed_ == args.snapshot_.valid_until_epoch_);
2738 bool updated_pointer = is_updated_pointer(args, pointer->snapshot_pointer_);
2739 if (updated_pointer) {
2740 if (is_to_keep_volatile(page->get_layer(), page->get_btree_level())) {
2741 DVLOG(2) <<
"Exempted";
2742 result.dropped_all_ =
false;
2744 if (page->has_foster_child()) {
2745 drop_foster_twins(args, page);
2747 args.drop(engine_, pointer->volatile_pointer_);
2748 pointer->volatile_pointer_.clear();
2751 DVLOG(1) <<
"Couldn't drop a volatile page that has a non-matching boundary";
2752 result.dropped_all_ =
false;
2757 void MasstreeComposer::drop_foster_twins(
2758 const Composer::DropVolatilesArguments& args,
2759 MasstreePage* page) {
2761 MasstreePage* minor = resolve_volatile(page->get_foster_minor());
2762 MasstreePage* major = resolve_volatile(page->get_foster_major());
2763 if (minor->has_foster_child()) {
2764 drop_foster_twins(args, minor);
2766 if (major->has_foster_child()) {
2767 drop_foster_twins(args, major);
2769 args.drop(engine_, page->get_foster_minor());
2770 args.drop(engine_, page->get_foster_major());
2773 Composer::DropResult MasstreeComposer::drop_volatiles_intermediate(
2774 const Composer::DropVolatilesArguments& args,
2775 MasstreeIntermediatePage* page) {
2779 Composer::DropResult result(args);
2780 if (page->has_foster_child()) {
2784 MasstreeIntermediatePage* minor =
as_intermediate(resolve_volatile(page->get_foster_minor()));
2785 MasstreeIntermediatePage* major =
as_intermediate(resolve_volatile(page->get_foster_major()));
2786 result.combine(drop_volatiles_intermediate(args, minor));
2787 result.combine(drop_volatiles_intermediate(args, major));
2794 for (MasstreeIntermediatePointerIterator it(page); it.is_valid(); it.next()) {
2795 DualPagePointer* pointer = &page->get_minipage(it.index_).pointers_[it.index_mini_];
2796 result.combine(drop_volatiles_recurse(args, pointer));
2802 inline Composer::DropResult MasstreeComposer::drop_volatiles_border(
2803 const Composer::DropVolatilesArguments& args,
2804 MasstreeBorderPage* page) {
2808 Composer::DropResult result(args);
2809 if (page->has_foster_child()) {
2810 MasstreeBorderPage* minor =
as_border(resolve_volatile(page->get_foster_minor()));
2811 MasstreeBorderPage* major =
as_border(resolve_volatile(page->get_foster_major()));
2812 result.combine(drop_volatiles_border(args, minor));
2813 result.combine(drop_volatiles_border(args, major));
2818 const SlotIndex key_count = page->get_key_count();
2819 for (
SlotIndex i = 0; i < key_count; ++i) {
2820 if (page->does_point_to_layer(i)) {
2821 DualPagePointer* pointer = page->get_next_layer(i);
2822 result.combine(drop_volatiles_recurse(args, pointer));
2824 Epoch epoch = page->get_owner_id(i)->xct_id_.get_epoch();
2826 result.on_rec_observed(epoch);
2831 inline bool MasstreeComposer::is_updated_pointer(
2832 const Composer::DropVolatilesArguments& args,
2835 return (pointer != 0 && args.snapshot_.id_ == snapshot_id);
2837 inline bool MasstreeComposer::is_to_keep_volatile(uint8_t layer, uint16_t btree_level)
const {
2842 if (layer < meta->snapshot_drop_volatile_pages_layer_threshold_) {
2848 return (btree_level >= meta->snapshot_drop_volatile_pages_btree_levels_);
2853 <<
"<layer_>" <<
static_cast<int>(v.
layer_) <<
"</layer_>"
2854 <<
"<next_original_>" << static_cast<int>(v.
next_original_) <<
"</next_original_>"
2855 <<
"<next_original_mini>" <<
static_cast<int>(v.
next_original_mini_) <<
"</next_original_mini>"
2856 <<
"<head>" << v.
head_ <<
"</head>"
2857 <<
"<tail_>" << v.
tail_ <<
"</tail_>"
2858 <<
"<page_count_>" << v.
page_count_ <<
"</page_count_>"
2861 <<
"<next_original_slice_>"
const Page *const * root_info_pages_
Root info pages output by compose()
memory::PagePoolOffset head_
Offset in page_base_.
LogCode
A unique identifier of all log types.
const KeySlice kInfimumSlice
0x0033 : foedus::storage::masstree::MasstreeInsertLogType .
bool is_ended_all() const
SlotIndex get_key_count() const __attribute__((always_inline))
physical key count (those keys might be deleted) in this page.
uint16_t partitions_[kMaxIntermediatePointers]
T align8(T value)
8-alignment.
uint64_t SnapshotLocalPageId
Represents a local page ID in each one snapshot file in some NUMA node.
const uint64_t kSliceLen
Shorthand for sizeof(KeySlice).
Represents a pointer to another page (usually a child page).
const SortEntry * get_sort_entries() const __attribute__((always_inline))
uint16_t PayloadLength
Represents a byte-length of a payload in this package.
Epoch base_epoch_
All log entries in this inputs are assured to be after this epoch.
KeySlice low_fence_
same as low_fence of head
SnapshotLocalPageId extract_local_page_id_from_snapshot_pointer(SnapshotPagePointer pointer)
ErrorCode read_page(storage::SnapshotPagePointer page_id, void *out)
uint16_t SlotIndex
Index of a record in a (border) page.
void fill_payload_padded(char *destination, const char *source, PayloadLength count)
void initialize_snapshot_page(StorageId storage_id, SnapshotPagePointer page_id, uint8_t layer, KeySlice low_fence, KeySlice high_fence)
MasstreeComposeContext(Engine *engine, snapshot::MergeSort *merge_sort, const Composer::ComposeArguments &args)
MasstreeComposeContext methods.
uint16_t partition_count_
std::ostream & operator<<(std::ostream &o, const MasstreeComposeContext::PathLevel &v)
static uint32_t calculate_hash(uint8_t btree_level, uint8_t layer, const KeySlice *prefixes, KeySlice low_fence, KeySlice high_fence) __attribute__((always_inline))
void change_log_type_at(uint32_t sort_pos, log::LogCode before, log::LogCode after)
used to switch between two compatible log types.
uint32_t fetch_logs(uint32_t sort_pos, uint32_t count, log::RecordLogType const **out) const
To reduce L1 cache miss stall, we prefetch some number of position entries and the pointed log entrie...
const KeyLength kMaxKeyLength
Max length of a key.
Represents a logic to compose a new version of data pages for one storage.
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
bool is_border() const __attribute__((always_inline))
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
std::string to_string() const
SnapshotPagePointer to_snapshot_page_pointer(uint16_t snapshot_id, uint8_t node, SnapshotLocalPageId local_page_id)
Declares all log types used in this storage type.
const GlobalVolatilePageResolver & get_global_volatile_page_resolver() const
Returns the page resolver to convert volatile page ID to page pointer.
double elapsed_ms() const
uint16_t DataOffset
Byte-offset in a page.
void drop_root_volatile(const Composer::DropVolatilesArguments &args)
MiniPage & get_minipage(uint8_t index) __attribute__((always_inline))
ErrorStack uninitialize() override final
Typical implementation of Initializable::uninitialize() that provides uninitialize-once semantics...
Represents one border page in Masstree Storage.
Brings error stacktrace information as return value of functions.
log::LogCode get_log_type_from_sort_position(uint32_t sort_pos) const __attribute__((always_inline))
ErrorStack execute()
MasstreeComposeContext::execute() and subroutines for log groups.
uint32_t hash_
Hash value of the entry.
cache::SnapshotFileSet * previous_snapshot_files_
To read existing snapshots.
uint64_t KeySlice
Each key slice is an 8-byte integer.
bool are_all_single_layer_logs() const __attribute__((always_inline))
const log::RecordLogType * resolve_sort_position(uint32_t sort_pos) const __attribute__((always_inline))
ErrorStack construct_root(const Composer::ConstructRootArguments &args)
A base class for MasstreeInsertLogType/MasstreeDeleteLogType/MasstreeOverwriteLogType.
Definitions of IDs in this package and a few related constant values.
ErrorStack next_batch()
Executes merge-sort on several thousands of logs and provides the result as a batch.
uint16_t KeyLength
Represents a byte-length of a key in this package.
Maximum number of logs execute_xxx_group methods process in one shot.
#define ASSUME_ALIGNED(x, y)
Wraps GCC's __builtin_assume_aligned.
Common base of MasstreeIntermediatePage and MasstreeBorderPage.
#define LIKELY(x)
Hints that x is highly likely true.
KeySlice next_original_slice_
Slice of next entry.
uint32_t log_streams_count_
Number of sorted runs.
We assume the path wouldn't be this deep.
SlotIndex next_original_mini_
for intermediate page
log::LogCode get_log_type() const __attribute__((always_inline))
MasstreeComposer's compose() implementation separated from the class itself.
Composer::DropResult drop_volatiles(const Composer::DropVolatilesArguments &args)
drop_volatiles and related methods
MasstreeComposer(Composer *parent)
MasstreeComposer methods.
SnapshotId get_snapshot_id() const
KeySlice get_separator(uint8_t index) const __attribute__((always_inline))
KeySlice low_keys_[kMaxIntermediatePointers]
0x0032 : foedus::storage::masstree::MasstreeOverwriteLogType .
DualPagePointer pointers_[kMaxIntermediateMiniSeparators+1]
void append_pointer_snapshot(KeySlice low_fence, SnapshotPagePointer pointer)
Appends a new poiner and separator in an existing mini page, used only by snapshot composer...
VolatilePagePointer volatile_pointer_
static bool static_less_than(const PageBoundarySort &lhs, const PageBoundarySort &rhs) __attribute__((always_inline))
used by std::lower_bound
const StorageName & get_name() const
Returns the unique name of this storage.
Receives an arbitrary number of sorted buffers and emits one fully sorted stream of logs...
bool exists() const
Returns whether this storage is already created.
uint64_t SnapshotPagePointer
Page ID of a snapshot page.
Dynamic information of one partitioner.
ErrorStack initialize() override final
Typical implementation of Initializable::initialize() that provides initialize-once semantics...
ErrorCode peek_volatile_page_boundaries(Engine *engine, const PeekBoundariesArguments &args)
Checks the volatile pages of this storage to give hints to decide page boundary keys.
const KeyLength kInitiallyNextLayer
A special key length value that denotes the record in a border page was initially a next-layer pointe...
MergedPosition get_current_count() const __attribute__((always_inline))
Database engine object that holds all resources and provides APIs.
uint8_t get_btree_level() const __attribute__((always_inline))
used only in masstree.
uint64_t stop()
Take another current time tick.
Epoch get_base_epoch() const __attribute__((always_inline))
SnapshotPagePointer snapshot_pointer_
memory::PagePoolOffset get_intermediate_size() const __attribute__((always_inline))
Just a marker to denote that the memory region represents a data page.
const uint32_t kBorderPageDataPartSize
Byte size of the record data part (data_) in MasstreeBorderPage.
uint16_t extract_snapshot_id_from_snapshot_pointer(SnapshotPagePointer pointer)
storage::Page * get_page_base() __attribute__((always_inline))
bool exists(const Path &p)
Returns if the file exists.
Retrun value of drop_volatiles()
KeySlice get_high_fence() const __attribute__((always_inline))
SnapshotPagePointer * new_root_page_pointer_
[OUT] Returns pointer to new root snapshot page/
bool dropped_all_
Whether all volatile pages under the page was dropped.
MasstreeBorderPage * as_border(MasstreePage *page)
uint8_t extract_numa_node_from_snapshot_pointer(SnapshotPagePointer pointer)
uint16_t SnapshotId
Unique ID of Snapshot.
uint16_t get_longest_key_length() const __attribute__((always_inline))
uint16_t my_partition_
if partitioned_drop_ is true, the partition this thread should drop volatile pages from ...
bool from_this_snapshot(SnapshotPagePointer pointer, const Composer::ConstructRootArguments &args)
SlotIndex next_original_
If level is based on an existing snapshot page, the next entry (pointer or record) in the original pa...
KeySlice get_low_fence() const __attribute__((always_inline))
MasstreeIntermediatePage * as_intermediate(MasstreePage *page)
const SnapshotId kNullSnapshotId
Size of the tmp_boundary_array_.
bool has_common_log_code_
storage::Page * get_intermediate_base() __attribute__((always_inline))
ErrorStack compose(const Composer::ComposeArguments &args)
#define CHECK_ERROR_CODE(x)
This macro calls x and checks its returned error code.
0x0034 : foedus::storage::masstree::MasstreeDeleteLogType .
snapshot::SnapshotWriter * snapshot_writer_
Writes out composed pages.
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
0x000C : "GENERAL: Other uncategorized errors." .
snapshot::BufferPosition info_pos_
Points to PageBoundaryInfo in page_boundary_info_.
uint16_t layer_
B-tri layer of this level.
We assume B-trie depth is at most this (prefix is at most 8*this value)
const ErrorStack kRetOk
Normal return value for no-error case.
0x0035 : foedus::storage::masstree::MasstreeUpdateLogType .
bool partitioned_drop_
if true, one thread for each partition will invoke drop_volatiles()
memory::PagePoolOffset tail_
Offset in page_base_.
bool is_key_aligned_and_zero_padded(const char *key, KeyLength key_length)
Returns if the given key is 8-bytes aligned and also zero-padded to 8-bytes for easier slicing (which...
Convenient way of writing hex integers to stream.
const uint32_t kMaxIntermediatePointers
Max number of pointers (if completely filled) stored in an intermediate pages.
Represents a group of consecutive logs in the current batch.
void memory_fence_consume()
Equivalent to std::atomic_thread_fence(std::memory_order_consume).
#define ERROR_STACK_MSG(e, m)
Overload of ERROR_STACK(e) to receive a custom error message.
PageHeader & get_header()
At least the basic header exists in all pages.
const KeySlice kSupremumSlice
#define UNUSED_ND(var)
Cross-compiler UNUSED macro for the same purpose as ASSERT_ND(x).
Represents one intermediate page in Masstree Storage.
bool contains_slice(KeySlice slice) const
uint16_t count_common_slices(const void *left, const void *right, uint16_t max_slices)
Returns the number of 8-byte slices that the two strings share as prefix.
uint32_t root_info_pages_count_
Number of root info pages.
void combine(const DropResult &other)
#define UNLIKELY(x)
Hints that x is highly likely false.
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
A high-resolution stop watch.
uint32_t page_count_
number of pages in this level including head/tail, without original page (so, initial=1).
#define WRAP_ERROR_CODE(x)
Same as CHECK_ERROR(x) except it receives only an error code, thus more efficient.
ErrorCode dump_pages(memory::PagePoolOffset from_page, uint32_t count)
Write out pages that are contiguous in the main page pool.
snapshot::SortedBuffer *const * log_streams_
Sorted runs.
CONTROL_BLOCK * get_control_block() const
void drop(Engine *engine, VolatilePagePointer pointer) const
Returns (might cache) the given pointer to volatile pool.
memory::AlignedMemory * work_memory_
Working memory to be used in this method.
storage::SnapshotPagePointer get_next_page_id() const
memory::EngineMemory * get_memory_manager() const
See Memory Manager.
const uint16_t kPageSize
A constant defining the page size (in bytes) of both snapshot pages and volatile pages.
ErrorCode
Enum of error codes defined in error_code.xmacro.
uint8_t find_minipage(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of the mini pages in this page.
KeySlice high_fence_
same as high_fence of tail
void extract_separators_snapshot(uint8_t index, uint8_t index_mini, KeySlice *separator_low, KeySlice *separator_high) const
Retrieves separators defining the index, used only by snapshot composer, thus no race.
GroupifyResult groupify(uint32_t begin, uint32_t limit=0) const __attribute__((always_inline))
Find a group of consecutive logs from the given position that have either a common log type or a comm...
KeySlice normalize_be_bytes_full_aligned(const void *be_bytes)
Convert an aligned big-endian byte array of at least 8-bytes-length to KeySlice.
uint8_t find_pointer(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of pointers in this mini-page.
const MasstreeMetadata * get_masstree_metadata() const
void initialize_snapshot_page(StorageId storage_id, SnapshotPagePointer page_id, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
Arguments for drop_volatiles()
Arguments for construct_root()