libfoedus-core
FOEDUS Core Library
cache_hashtable.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
18 #ifndef FOEDUS_CACHE_CACHE_HASHTABLE_HPP_
19 #define FOEDUS_CACHE_CACHE_HASHTABLE_HPP_
20 
21 #include <stdint.h>
22 
23 #include <iosfwd>
24 
25 #include "foedus/assert_nd.hpp"
26 #include "foedus/compiler.hpp"
27 #include "foedus/cxx11.hpp"
28 #include "foedus/error_code.hpp"
29 #include "foedus/error_stack.hpp"
33 #include "foedus/cache/fwd.hpp"
35 #include "foedus/memory/fwd.hpp"
38 #include "foedus/storage/page.hpp"
40 #include "foedus/thread/fwd.hpp"
41 
42 namespace foedus {
43 namespace cache {
44 
45 // these are actually of the same integer type, but have very different meanings.
49 typedef uint32_t BucketId;
59 
67 typedef uint32_t PageIdTag;
68 
72 typedef uint32_t OverflowPointer;
73 
80 const uint16_t kHopNeighbors = 16U;
81 
92  const BucketId logical_buckets_;
94  const BucketId physical_buckets_;
97 
98  explicit HashFunc(BucketId physical_buckets);
99 
103  static uint32_t get_hash(storage::SnapshotPagePointer page_id) ALWAYS_INLINE;
105  static PageIdTag get_tag(storage::SnapshotPagePointer page_id) ALWAYS_INLINE;
106 
110  BucketId get_bucket_number(storage::SnapshotPagePointer page_id) const ALWAYS_INLINE {
111  uint32_t seed = get_hash(page_id);
112  uint32_t quotient = bucket_div_.div32(seed);
113  uint32_t bucket = bucket_div_.rem32(seed, logical_buckets_, quotient);
114  ASSERT_ND(bucket == seed % logical_buckets_);
115  return bucket;
116  }
117 
118  friend std::ostream& operator<<(std::ostream& o, const HashFunc& v);
119 };
120 
141  uint64_t data_;
142 
143  void reset(ContentId id = 0, PageIdTag tag = 0) {
144  data_ = (static_cast<uint64_t>(tag) << 32) | id;
145  }
146 
147  ContentId get_content_id() const { return data_; }
152  void set_content_id(ContentId id) { data_ = (data_ & 0xFFFFFFFF00000000ULL) | id; }
153  bool is_content_set() const { return get_content_id() != 0; }
154 
155  PageIdTag get_tag() const { return static_cast<PageIdTag>(data_ >> 32); }
157  void set_tag(PageIdTag tag) {
158  data_ = (data_ & 0x00000000FFFFFFFFULL) | (static_cast<uint64_t>(tag) << 32);
159  }
160 };
161 
164  uint16_t count_;
165 
167  if (count_ < 0xFFFFU) {
168  ++count_;
169  }
170  }
172  bool decrement(uint16_t subtract) ALWAYS_INLINE {
173  if (count_ >= subtract) {
174  count_ -= subtract;
175  } else {
176  count_ = 0;
177  }
178  return (count_ > 0);
179  }
180 };
181 
192  CacheBucket bucket_; // +8 -> 8
198  OverflowPointer next_; // +4 -> 12
200  uint16_t padding_; // +2 -> 16 (unused)
201 };
202 
234  public:
235  enum Constants {
238  };
239  CacheHashtable(BucketId physical_buckets, uint16_t numa_node);
240 
259  ContentId find(storage::SnapshotPagePointer page_id) const ALWAYS_INLINE;
260 
272  uint16_t batch_size,
273  const storage::SnapshotPagePointer* page_ids,
274  ContentId* out) const;
275 
283  ErrorCode install(storage::SnapshotPagePointer page_id, ContentId content);
284 
286  struct EvictArgs {
288  uint64_t target_count_;
290  uint64_t evicted_count_;
292  ContentId* evicted_contents_;
293  // probably will add more in/out parameters to fine-tune its behavior later.
294 
295  void add_evicted(ContentId content) {
296  // because of the loose synchronization, we might get zero. skip it.
297  if (content) {
298  evicted_contents_[evicted_count_] = content;
299  ++evicted_count_;
300  }
301  }
302  };
311  void evict(EvictArgs* args);
312 
313  BucketId get_logical_buckets() const ALWAYS_INLINE { return hash_func_.logical_buckets_; }
314  BucketId get_physical_buckets() const ALWAYS_INLINE { return hash_func_.physical_buckets_; }
315 
319  BucketId get_bucket_number(storage::SnapshotPagePointer page_id) const ALWAYS_INLINE {
320  return hash_func_.get_bucket_number(page_id);
321  }
322 
325 
326  const CacheBucket& get_bucket(BucketId bucket_id) const { return buckets_[bucket_id]; }
327 
328  struct Stat {
329  uint32_t normal_entries_;
331  };
334 
335  friend std::ostream& operator<<(std::ostream& o, const CacheHashtable& v);
336 
337  protected:
338  const uint16_t numa_node_;
339  const uint32_t overflow_buckets_count_;
344 
345  // these two have the same indexes.
348 
349  // these are for overflow linked list
355  OverflowPointer overflow_buckets_head_;
356 
365 
371 
378  BucketId clockhand_;
379 
380  BucketId evict_main_loop(EvictArgs* args, BucketId cur, uint16_t loop);
381  void evict_overflow_loop(EvictArgs* args, uint16_t loop);
382 };
383 
385  uint16_t snapshot_id = storage::extract_snapshot_id_from_snapshot_pointer(page_id);
386  uint8_t numa_node = storage::extract_numa_node_from_snapshot_pointer(page_id);
387  uint64_t local_page_id = storage::extract_local_page_id_from_snapshot_pointer(page_id);
388  // snapshot_id is usually very sparse. and has no correlation with local_page_id.
389  // numa_node too. thus just use them as seeds for good old multiplicative hashing.
390  uint32_t seed = snapshot_id * 0x5a2948074497175aULL;
391  seed += numa_node * 0xc30a95f6e63dd908ULL;
392 
393  // local_page_id is 40 bits. 32-40 bits are very sparse, and it has no correlation with
394  // other bits (how come 4kb-th page and 16Tb+4kb-th page has anything in common..).
395  // so, let's treat it with another multiplicative.
396  uint8_t highest_bits = static_cast<uint8_t>(local_page_id >> 32);
397  uint32_t folded_local_page_id = local_page_id + highest_bits * 0xd3561f5bedac324cULL;
398 
399  // now, we don't want to place adjacent pages in adjacent hash buckets. so, let's swap bytes.
400  // this benefits hop-scotch hashing table.
401  uint32_t swaped_page_id = __builtin_bswap32(folded_local_page_id);
402  seed ^= swaped_page_id;
403  return seed;
404 }
405 
407  PageIdTag high_bits = static_cast<PageIdTag>(page_id >> 32);
408  PageIdTag tag = high_bits * 0x8e1d76486c3e638dULL + static_cast<PageIdTag>(page_id);
409  if (tag == 0) {
410  tag = 1; // we avoid 0 as a valid value. this enables sanity checks.
411  }
412  return tag;
413 }
414 
415 inline ContentId CacheHashtable::find(storage::SnapshotPagePointer page_id) const {
416  ASSERT_ND(page_id > 0);
417  BucketId bucket_number = get_bucket_number(page_id);
418  ASSERT_ND(bucket_number < get_logical_buckets());
419 
420  // we prefetch up to 128 bytes (16 entries).
421  assorted::prefetch_cachelines(buckets_ + bucket_number, 2);
422 
423  PageIdTag tag = HashFunc::get_tag(page_id);
424  ASSERT_ND(tag != 0);
425  for (uint16_t i = 0; i < kHopNeighbors; ++i) {
426  const CacheBucket& bucket = buckets_[bucket_number + i];
427  if (bucket.get_tag() == tag) {
428  // found (probably)!
429  refcounts_[bucket_number + i].increment();
430  return bucket.get_content_id();
431  }
432  }
433 
434  // Not found. let's check overflow list
436  for (OverflowPointer i = overflow_buckets_head_; i != 0;) {
437  if (overflow_buckets_[i].bucket_.get_tag() == tag) {
440  }
441  i = overflow_buckets_[i].next_;
442  }
443  }
444 
445  return 0;
446 }
447 
448 } // namespace cache
449 } // namespace foedus
450 #endif // FOEDUS_CACHE_CACHE_HASHTABLE_HPP_
void set_tag(PageIdTag tag)
same note as set_content_id()
void evict_overflow_loop(EvictArgs *args, uint16_t loop)
BucketId clockhand_
We previously stopped eviction here for usual buckets.
soc::SharedMutex overflow_free_buckets_mutex_
The mutex to protect free overflow entries.
Definitions of IDs in this package and a few related constant values.
SnapshotLocalPageId extract_local_page_id_from_snapshot_pointer(SnapshotPagePointer pointer)
Definition: storage_id.hpp:91
CacheOverflowEntry * overflow_buckets_
An entry in the overflow linked list for snapshot cache.
uint32_t BucketId
Offset in hashtable bucket.
memory::AlignedMemory buckets_memory_
ErrorCode install(storage::SnapshotPagePointer page_id, ContentId content)
Called when a cached page is not found.
memory::AlignedMemory refcounts_memory_
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
ContentId * evicted_contents_
[Out] Array of ContentId evicted.
static PageIdTag get_tag(storage::SnapshotPagePointer page_id) __attribute__((always_inline))
Returns another hash value used to differentiate IDs with similar hash values.
uint32_t PagePoolOffset
Offset in PagePool that compactly represents the page address (unlike 8 bytes pointer).
Definition: memory_id.hpp:44
const BucketId logical_buckets_
Number of logical (actually used) buckets in this table.
BucketId get_physical_buckets() const __attribute__((always_inline))
uint32_t OverflowPointer
Position in the overflow buckets.
Brings error stacktrace information as return value of functions.
Definition: error_stack.hpp:81
HashFunc(BucketId physical_buckets)
void prefetch_cachelines(const void *address, int cacheline_count)
Prefetch multiple contiguous cachelines to L1 cache.
Definition: cacheline.hpp:66
Definitions of IDs in this package and a few related constant values.
A simple hash function logic used in snasphot cache.
void set_content_id(ContentId id)
The pre-calculated p-m pair for optimized integer division by constant.
Definition: const_div.hpp:67
void increment() __attribute__((always_inline))
Forward declarations of classes in cache package.
OverflowPointer overflow_free_buckets_head_
This forms another singly-linked list of free overflow entries.
A mutex that can be placed in shared memory and used from multiple processes.
uint64_t evicted_count_
[Out] Number of entries that were actually evicted
friend std::ostream & operator<<(std::ostream &o, const CacheHashtable &v)
CacheHashtable(BucketId physical_buckets, uint16_t numa_node)
void evict(EvictArgs *args)
Evict some entries from the hashtable.
ErrorCode find_batch(uint16_t batch_size, const storage::SnapshotPagePointer *page_ids, ContentId *out) const
Batched version of find().
uint32_t rem32(uint32_t n, uint32_t d, uint32_t q) const
Calculate remainder.
Definition: const_div.hpp:205
bool decrement(uint16_t subtract) __attribute__((always_inline))
returns whether the counter is still non-zero
BucketId evict_main_loop(EvictArgs *args, BucketId cur, uint16_t loop)
uint32_t PageIdTag
This is a lossy-compressed representation of SnapshotPagePointer used to quickly identify whether the...
#define CXX11_FINAL
Used in public headers in place of "final" of C++11.
Definition: cxx11.hpp:131
ErrorStack verify_single_thread() const
only for debugging.
uint64_t SnapshotPagePointer
Page ID of a snapshot page.
Definition: storage_id.hpp:79
Constants and methods related to CPU cacheline and its prefetching.
memory::PagePoolOffset ContentId
Offset of the actual page image in PagePool for snapshot pages.
uint16_t extract_snapshot_id_from_snapshot_pointer(SnapshotPagePointer pointer)
Definition: storage_id.hpp:98
A NUMA-local hashtable of cached snapshot pages.
memory::AlignedMemory overflow_buckets_memory_
const CacheBucket & get_bucket(BucketId bucket_id) const
const uint16_t kHopNeighbors
Starting from the given position, we consider this many buckets for the entry.
uint8_t extract_numa_node_from_snapshot_pointer(SnapshotPagePointer pointer)
Definition: storage_id.hpp:95
BucketId get_bucket_number(storage::SnapshotPagePointer page_id) const __attribute__((always_inline))
Returns a bucket number the given snapshot page ID should belong to.
uint64_t target_count_
[In] Evicts entries up to about target_count (maybe a bit more or less)
ContentId get_content_id() const
Forward declarations of classes in memory package.
Hash bucket in cache table.
BucketId get_logical_buckets() const __attribute__((always_inline))
Represents one memory block aligned to actual OS/hardware pages.
Atomic fence methods and load/store with fences that work for both C++11/non-C++11 code...
Stat get_stat_single_thread() const
only for debugging.
friend std::ostream & operator<<(std::ostream &o, const HashFunc &v)
void reset(ContentId id=0, PageIdTag tag=0)
static uint32_t get_hash(storage::SnapshotPagePointer page_id) __attribute__((always_inline))
Returns a hash value for the given snapshot page ID.
const assorted::ConstDiv bucket_div_
to efficiently divide a number by bucket_div_.
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
Forward declarations of classes in thread package.
const BucketId physical_buckets_
Number of buckets that are physically allocated, at least kHopNeighbors more than logical...
uint32_t div32(uint32_t n) const
32-bit integer division that outputs both quotient and remainder.
Definition: const_div.hpp:228
BucketId get_bucket_number(storage::SnapshotPagePointer page_id) const __attribute__((always_inline))
Returns a bucket number the given page ID should belong to.
#define ALWAYS_INLINE
A function suffix to hint that the function should always be inlined.
Definition: compiler.hpp:106
OverflowPointer next_
Note that we don't have to atomically maintain/follow this pointer thanks to the loose requirements...
OverflowPointer overflow_buckets_head_
This forms a singly-linked list of active overflow entries.
ErrorCode
Enum of error codes defined in error_code.xmacro.
Definition: error_code.hpp:85
A loosely maintained reference count for CLOCK algorithm.
ContentId find(storage::SnapshotPagePointer page_id) const __attribute__((always_inline))
Returns an offset for the given page ID opportunistically.