20 #include <glog/logging.h>
70 reinterpret_cast<Page**>(&page),
86 DVLOG(0) <<
"Other thread seems growing the first-layer. let him do that";
106 LOG(INFO) <<
"Uninitializing a masstree-storage " <<
get_name();
108 if (!
control_block_->root_page_pointer_.volatile_pointer_.is_null()) {
127 const uint16_t kDummyNode = 0;
141 control_block_->root_page_pointer_.volatile_pointer_.set(kDummyNode, root_offset);
157 child_pointer.
set(kDummyNode, child_offset);
166 LOG(ERROR) <<
"This masstree-storage already exists: " <<
get_name();
173 LOG(INFO) <<
"Newly created an masstree-storage " <<
get_name();
195 reinterpret_cast<Page**>(&volatile_root)));
197 control_block_->root_page_pointer_.volatile_pointer_ = volatile_pointer;
199 LOG(INFO) <<
"This is an empty masstree: " <<
get_meta();
204 LOG(INFO) <<
"Loaded a masstree-storage " <<
get_meta();
216 uint8_t current_layer,
227 ASSERT_ND(cur->get_layer() == current_layer);
229 if (cur->is_border()) {
234 if (
UNLIKELY(cur->has_foster_child())) {
236 if (cur->within_foster_minor(slice)) {
264 if (!next->
is_locked() && !cur->is_locked()) {
266 Adopt functor(context, page, next);
270 DVLOG(1) <<
"Someone else seems doing something there.. already adopting? skip it";
276 DVLOG(0) <<
"Interesting. concurrent thread affected the search. local retry";
297 reinterpret_cast<MasstreeIntermediatePage**>(&layer_root)));
298 for (uint16_t current_layer = 0;; ++current_layer) {
299 KeyLength remainder_length = key_length - current_layer * 8;
301 const void* suffix =
reinterpret_cast<const char*
>(key) + (current_layer + 1) * 8;
379 reinterpret_cast<Page**>(page),
400 if (
UNLIKELY(next_root->has_foster_child())) {
420 ASSERT_ND(physical_payload_hint >= payload_count);
429 reinterpret_cast<MasstreeIntermediatePage**>(&layer_root)));
430 for (uint16_t layer = 0;; ++layer) {
433 const void*
const suffix =
reinterpret_cast<const char*
>(key) + (layer + 1) *
sizeof(
KeySlice);
474 VLOG(0) <<
"Interesting. Next-layer-retry due to concurrent transaction";
490 physical_payload_hint,
491 get_meta().should_aggresively_create_next_layer(layer, remainder),
504 physical_payload_hint,
553 VLOG(0) <<
"Interesting. Moved by concurrent transaction";
568 physical_payload_hint,
581 physical_payload_hint,
593 DLOG(INFO) <<
"Probably this should be caught beforehand. next_layer bit is on";
614 if (payload_length > *payload_capacity) {
616 DVLOG(0) <<
"buffer too small??" << payload_length <<
":" << *payload_capacity;
617 *payload_capacity = payload_length;
620 *payload_capacity = payload_length;
638 LOG(WARNING) <<
"short record";
741 common_log = log_entry;
755 common_log = log_entry;
767 common_log = log_entry;
787 LOG(WARNING) <<
"short record ";
805 template <
typename PAYLOAD>
820 LOG(WARNING) <<
"short record ";
825 *value += *
reinterpret_cast<const PAYLOAD*
>(ptr);
861 #define EXPIN_5(x) template ErrorCode MasstreeStoragePimpl::increment_general< x > \
862 (thread::Thread* context, const RecordLocation& location, \
863 const void* be_key, KeyLength key_length, x* value, PayloadLength payload_offset)
void assert_aligned_page(const void *page)
0x080A : "STORAGE: The record's payload is smaller than requested" .
const KeySlice kInfimumSlice
const StorageName & get_name() const
Metadata meta_
common part of the metadata.
xct::Xct & get_current_xct()
Returns the transaction that is currently running on this thread.
SlotIndex get_key_count() const __attribute__((always_inline))
physical key count (those keys might be deleted) in this page.
ErrorCode find_border_physical(thread::Thread *context, MasstreePage *layer_root, uint8_t current_layer, bool for_writes, KeySlice slice, MasstreeBorderPage **border) __attribute__((always_inline))
Find a border node in the layer that corresponds to the given key slice.
Represents a pointer to another page (usually a child page).
uint16_t PayloadLength
Represents a byte-length of a payload in this package.
0x080C : "STORAGE: This key is not found in this storage" .
uint16_t SlotIndex
Index of a record in a (border) page.
bool is_both_null() const
return value for find_key_for_reserve().
ErrorCode reserve_record_normalized(thread::Thread *context, KeySlice key, PayloadLength payload_count, PayloadLength physical_payload_hint, RecordLocation *result)
Automatically calls if uninitialize() wasn't called when it gets out of scope, and just complains whe...
Page * to_page(const void *address)
super-dirty way to obtain Page the address belongs to.
ErrorCode locate_record_normalized(thread::Thread *context, KeySlice key, bool for_writes, RecordLocation *result)
Identifies page and record for the normalized key.
#define ERROR_STACK(e)
Instantiates ErrorStack with the given foedus::error_code, creating an error stack with the current f...
void populate(StorageId storage_id, const void *key, KeyLength key_length, const void *payload, PayloadLength payload_offset, PayloadLength payload_count) __attribute__((always_inline))
PayloadLength get_max_payload_length(SlotIndex index) const __attribute__((always_inline))
Page pool for volatile read/write store (VolatilePage) and the read-only bufferpool (SnapshotPage)...
const KeyLength kMaxKeyLength
Max length of a key.
bool is_locked() const __attribute__((always_inline))
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Represents a record of write-access during a transaction.
bool is_moved() const __attribute__((always_inline))
ErrorCode grab_one(PagePoolOffset *offset)
Grab only one page.
bool is_border() const __attribute__((always_inline))
Represents one thread running on one NUMA core.
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
ErrorCode retrieve_part_general(thread::Thread *context, const RecordLocation &location, void *payload, PayloadLength payload_offset, PayloadLength payload_count)
void prefetch_general() const __attribute__((always_inline))
prefetch upto keys/separators, whether this page is border or interior.
ErrorCode follow_page(thread::Thread *context, bool for_writes, storage::DualPagePointer *pointer, MasstreePage **page)
Thread::follow_page_pointer() for masstree.
Declares all log types used in this storage type.
const GlobalVolatilePageResolver & get_global_volatile_page_resolver() const
Returns the page resolver to convert volatile page ID to page pointer.
Result of track_moved_record().
Represents a pointer to a volatile page with modification count for preventing ABA.
Represents a user transaction.
MiniPage & get_minipage(uint8_t index) __attribute__((always_inline))
Persistent status part of Transaction ID.
A system transaction to reserve a physical record(s) in a border page.
ErrorStack load_one_volatile_page(cache::SnapshotFileSet *fileset, storage::SnapshotPagePointer snapshot_pointer, storage::VolatilePagePointer *pointer, storage::Page **page)
Another convenience method that also reads an existing snapshot page to the volatile page...
ErrorStack uninitialize() override final
Typical implementation of Initializable::uninitialize() that provides uninitialize-once semantics...
Represents one border page in Masstree Storage.
Brings error stacktrace information as return value of functions.
DualPagePointer * get_next_layer(SlotIndex index) __attribute__((always_inline))
void prefetch() const
prefetch upto separators.
XctId xct_id_
the second 64bit: Persistent status part of TID.
static uint16_t calculate_log_length(KeyLength key_length) __attribute__((always_inline))
Engine * engine_
Most attachable object stores an engine pointer (local engine), so we define it here.
ErrorCode overwrite_general(thread::Thread *context, const RecordLocation &location, const void *be_key, KeyLength key_length, const void *payload, PayloadLength payload_offset, PayloadLength payload_count)
implementation of overwrite_record family.
The storage has been created and ready for use.
ErrorCode add_to_write_set(storage::StorageId storage_id, RwLockableXctId *owner_id_address, char *payload_address, log::RecordLogType *log_entry)
Add the given record to the write set of this transaction.
uint64_t KeySlice
Each key slice is an 8-byte integer.
Holds a set of read-only file objects for snapshot files.
ErrorCode retrieve_general(thread::Thread *context, const RecordLocation &location, void *payload, PayloadLength *payload_capacity)
implementation of get_record family.
ErrorCode follow_page_pointer(storage::VolatilePageInit page_initializer, bool tolerate_null_pointer, bool will_modify, bool take_ptr_set_snapshot, storage::DualPagePointer *pointer, storage::Page **page, const storage::Page *parent, uint16_t index_in_parent)
A general method to follow (read) a page pointer.
A base class for MasstreeInsertLogType/MasstreeDeleteLogType/MasstreeOverwriteLogType.
Definitions of IDs in this package and a few related constant values.
uint16_t KeyLength
Represents a byte-length of a key in this package.
Common base of MasstreeIntermediatePage and MasstreeBorderPage.
Log type of masstree-storage's insert operation.
#define LIKELY(x)
Hints that x is highly likely true.
char * get_record(SlotIndex index) __attribute__((always_inline))
VolatilePagePointer get_foster_minor() const __attribute__((always_inline))
PayloadLength get_payload_length(SlotIndex index) const __attribute__((always_inline))
VolatilePagePointer get_foster_major() const __attribute__((always_inline))
ErrorStack load(const StorageControlBlock &snapshot_block)
DualPagePointer * get_first_root_pointer_address()
The MCS reader-writer lock variant of LockableXctId.
storage::Page * resolve(storage::VolatilePagePointer ptr) const
Shorthand for get_global_volatile_page_resolver.resolve_offset()
DualPagePointer pointers_[kMaxIntermediateMiniSeparators+1]
FindKeyForReserveResult find_key_for_reserve(SlotIndex from_index, SlotIndex to_index, KeySlice slice, const void *suffix, KeyLength remainder) const __attribute__((always_inline))
This is for the case we are looking for either the matching slot or the slot we will modify...
log::ThreadLogBuffer & get_thread_log_buffer()
Returns the private log buffer for this thread.
VolatilePagePointer volatile_pointer_
xct::TrackMovedRecordResult track_moved_record(xct::RwLockableXctId *old_address, xct::WriteXctAccess *write_set)
Resolves a "moved" record.
void release_pages_recursive_parallel(Engine *engine)
This method is used when we release a large number of volatile pages, most likely when we drop a stor...
Log type of masstree-storage's delete operation.
bool out_split_needed_
[Out]
xct::ReadXctAccess * readset_
If this method took a read-set on the returned record, points to the corresponding read-set...
ErrorCode insert_general(thread::Thread *context, const RecordLocation &location, const void *be_key, KeyLength key_length, const void *payload, PayloadLength payload_count)
implementation of insert_record family.
SlotIndex find_key_normalized(SlotIndex from_index, SlotIndex to_index, KeySlice slice) const __attribute__((always_inline))
Specialized version for 8 byte native integer search.
MasstreeStorageControlBlock * control_block_
The shared data on shared memory that has been initialized in some SOC or master engine.
ErrorCode reserve_record(thread::Thread *context, const void *key, KeyLength key_length, PayloadLength payload_count, PayloadLength physical_payload_hint, RecordLocation *result)
Like locate_record(), this is also a logical operation.
ErrorCode add_related_write_set(ReadXctAccess *related_read_set, RwLockableXctId *tid_address, char *payload_address, log::RecordLogType *log_entry)
Registers a write-set related to an existing read-set.
ErrorCode register_record_write_log(thread::Thread *context, const RecordLocation &location, log::RecordLogType *log_entry)
Used in the following methods.
Calls Initializable::uninitialize() automatically when it gets out of scope.
ErrorStack initialize() override final
Typical implementation of Initializable::initialize() that provides initialize-once semantics...
Log type of masstree-storage's overwrite operation.
0x0802 : "STORAGE: This storage already exists" .
ErrorCode get_first_root(thread::Thread *context, bool for_write, MasstreeIntermediatePage **root)
Root-node related, such as a method to retrieve 1st-root, to grow, etc.
A system transaction to split a border page in Master-Tree.
NumaNodeMemoryRef * get_node_memory(foedus::thread::ThreadGroupId group) const
SnapshotPagePointer snapshot_pointer_
const SlotIndex kBorderPageMaxSlots
Maximum number of slots in one MasstreeBorderPage.
bool is_next_layer() const __attribute__((always_inline))
void set(uint8_t numa_node, memory::PagePoolOffset offset)
uint8_t get_layer() const __attribute__((always_inline))
Layer-0 stores the first 8 byte slice, Layer-1 next 8 byte...
bool is_deleted() const __attribute__((always_inline))
KeySlice get_high_fence() const __attribute__((always_inline))
P * resolve_cast(storage::VolatilePagePointer ptr) const
resolve() plus reinterpret_cast
Pimpl object of MasstreeStorage.
bool within_foster_minor(KeySlice slice) const __attribute__((always_inline))
Log type of masstree-storage's update operation.
ErrorCode increment_general(thread::Thread *context, const RecordLocation &location, const void *be_key, KeyLength key_length, PAYLOAD *value, PayloadLength payload_offset)
implementation of increment_record family.
bool has_foster_child() const __attribute__((always_inline))
KeySlice get_low_fence() const __attribute__((always_inline))
void initialize_volatile_page(StorageId storage_id, VolatilePagePointer page_id, uint8_t layer, KeySlice low_fence, KeySlice high_fence)
SlotIndex find_key(KeySlice slice, const void *suffix, KeyLength remainder) const __attribute__((always_inline))
Navigates a searching key-slice to one of the record in this page.
static uint16_t calculate_log_length(KeyLength key_length, PayloadLength payload_count) __attribute__((always_inline))
ErrorCode delete_general(thread::Thread *context, const RecordLocation &location, const void *be_key, KeyLength key_length)
implementation of delete_record family.
bool is_high_fence_supremum() const __attribute__((always_inline))
const MasstreeMetadata & get_meta() const
xct::TrackMovedRecordResult track_moved_record(Engine *engine, xct::RwLockableXctId *old_address, xct::WriteXctAccess *write_set)
#define CHECK_ERROR_CODE(x)
This macro calls x and checks its returned error code.
ErrorCode follow_layer(thread::Thread *context, bool for_writes, MasstreeBorderPage *parent, SlotIndex record_index, MasstreePage **page) __attribute__((always_inline))
Follows to next layer's root page.
const PageVersion * get_version_address() const __attribute__((always_inline))
void initialize_volatile_page(StorageId storage_id, VolatilePagePointer page_id, uint8_t layer, uint8_t level, KeySlice low_fence, KeySlice high_fence)
#define CHECK_ERROR(x)
This macro calls x and checks its returned value.
PageVersionStatus status_
0x0809 : "STORAGE: The record's payload is larger than the buffer" .
const ErrorStack kRetOk
Normal return value for no-error case.
SlotIndex index_
Index of the record in the page.
char * get_record_payload(SlotIndex index) __attribute__((always_inline))
ThreadGroupId get_numa_node() const
PagePool * get_volatile_pool()
ErrorCode add_to_page_version_set(const storage::PageVersion *version_address, storage::PageVersionStatus observed)
Add the given page version to the page version set of this transaction.
ErrorStack drop()
Storage-wide operations, such as drop, create, etc.
A system transaction to grow a first-layer root.
A system transaction to grow a second- or depper-layer root.
void memory_fence_consume()
Equivalent to std::atomic_thread_fence(std::memory_order_consume).
void populate(StorageId storage_id, const void *key, KeyLength key_length) __attribute__((always_inline))
void memory_fence_acquire()
Equivalent to std::atomic_thread_fence(std::memory_order_acquire).
ErrorStack create(const MasstreeMetadata &metadata)
const KeySlice kSupremumSlice
#define INSTANTIATE_ALL_NUMERIC_TYPES(M)
INSTANTIATE_ALL_TYPES minus std::string.
xct::RwLockableXctId * get_owner_id(SlotIndex index) __attribute__((always_inline))
Represents one intermediate page in Masstree Storage.
const LocalPageResolver & get_resolver() const
Gives an object to resolve an offset in this page pool (thus local) to an actual pointer and vice ver...
Base class for log type of record-wise operation.
void populate(StorageId storage_id, const void *key, KeyLength key_length, const void *payload, PayloadLength payload_count) __attribute__((always_inline))
#define UNLIKELY(x)
Hints that x is highly likely false.
MasstreeBorderPage * page_
The border page containing the record.
0x0A05 : "XCTION : Aborted a transaction because of a race condition. This is an expected error in hi...
bool is_moved() const __attribute__((always_inline))
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
void populate(StorageId storage_id, const void *key, KeyLength key_length, const void *payload, PayloadLength payload_count) __attribute__((always_inline))
KeySlice slice_layer(const void *be_bytes, KeyLength key_length, Layer current_layer)
Extract a part of a big-endian byte array of given length as KeySlice.
ErrorCode upsert_general(thread::Thread *context, const RecordLocation &location, const void *be_key, KeyLength key_length, const void *payload, PayloadLength payload_count)
implementation of upsert_record family.
ErrorCode run_nested_sysxct(xct::SysxctFunctor *functor, uint32_t max_retries=0)
Methods related to System transactions (sysxct) nested under this thread.
#define WRAP_ERROR_CODE(x)
Same as CHECK_ERROR(x) except it receives only an error code, thus more efficient.
A base layout of shared data for all storage types.
xct::XctId observed_
TID as of locate_record() identifying the record.
A system transaction to adopt foster twins into an intermediate page.
bool is_next_layer() const __attribute__((always_inline))
char * reserve_new_log(uint16_t log_length) __attribute__((always_inline))
Reserves a space for a new (uncommitted) log entry at the tail.
return value of MasstreeStoragePimpl::locate_record()/reserve_record().
bool within_fences(KeySlice slice) const __attribute__((always_inline))
bool does_point_to_layer(SlotIndex index) const __attribute__((always_inline))
memory::EngineMemory * get_memory_manager() const
See Memory Manager.
ErrorCode
Enum of error codes defined in error_code.xmacro.
uint8_t find_minipage(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of the mini pages in this page.
static ErrorCode check_next_layer_bit(xct::XctId observed) __attribute__((always_inline))
uint8_t find_pointer(KeySlice slice) const __attribute__((always_inline))
Navigates a searching key-slice to one of pointers in this mini-page.
0x080B : "STORAGE: This key already exists in this storage" .
xct::TrackMovedRecordResult track_moved_record(xct::RwLockableXctId *old_address, xct::WriteXctAccess *write_set) __attribute__((always_inline))
ErrorCode populate_logical(xct::Xct *cur_xct, MasstreeBorderPage *page, SlotIndex index, bool intended_for_write)
Populates the result with XID and possibly readset.
ErrorCode locate_record(thread::Thread *context, const void *key, KeyLength key_length, bool for_writes, RecordLocation *result)
Identifies page and record for the key.
const PageVersion & get_version() const __attribute__((always_inline))