libfoedus-core
FOEDUS Core Library
hash_hashinate.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015, Hewlett-Packard Development Company, LP.
3  * This program is free software; you can redistribute it and/or modify it
4  * under the terms of the GNU General Public License as published by the Free
5  * Software Foundation; either version 2 of the License, or (at your option)
6  * any later version.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details. You should have received a copy of the GNU General Public
12  * License along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  *
15  * HP designates this particular file as subject to the "Classpath" exception
16  * as provided by HP in the LICENSE.txt file that accompanied this code.
17  */
18 #ifndef FOEDUS_STORAGE_HASH_HASHINATE_HPP_
19 #define FOEDUS_STORAGE_HASH_HASHINATE_HPP_
20 
21 #include <stdint.h>
22 
23 #include <cstring>
24 #include <iosfwd>
25 
26 #include "foedus/compiler.hpp"
27 #include "foedus/cxx11.hpp"
30 
38 namespace foedus {
39 namespace storage {
40 namespace hash {
41 
46 const uint64_t kXxhashKeySeed = 0x3f119e0435262a17ULL;
47 
52 HashValue hashinate(const void *key, uint16_t key_length);
53 
60 template <typename T>
61 HashValue hashinate(T key);
62 
69 const uint16_t kHashDataPageBloomFilterBytes = 64;
70 
75 const uint16_t kHashDataPageBloomFilterBits = kHashDataPageBloomFilterBytes * 8;
76 
83 
85  kHashDataPageBloomFilterBits == (1U << kHashDataPageBloomFilterIndexSize),
86  "If you change the size of bloom filter, change kHashDataPageBloomFilterIndexSize accordingly.");
87 
89 
98 
106 
107  bool operator==(const BloomFilterFingerprint& other) const {
108  for (uint8_t k = 0; k < kHashDataPageBloomFilterHashes; ++k) {
109  if (indexes_[k] != other.indexes_[k]) {
110  return false;
111  }
112  }
113  return true;
114  }
115  bool operator!=(const BloomFilterFingerprint& other) const {
116  return !(operator==(other));
117  }
118  friend std::ostream& operator<<(std::ostream& o, const BloomFilterFingerprint& v);
119 };
120 
145 
147  void clear() {
148  std::memset(values_, 0, sizeof(values_));
149  }
150 
152  inline bool contains(const BloomFilterFingerprint& fingerprint) const ALWAYS_INLINE {
153  for (uint8_t k = 0; k < kHashDataPageBloomFilterHashes; ++k) {
154  ASSERT_ND(fingerprint.indexes_[k] < kHashDataPageBloomFilterBits);
155  uint8_t byte_index = fingerprint.indexes_[k] / 8U;
156  uint8_t bit_index = fingerprint.indexes_[k] % 8U;
157  if ((values_[byte_index] & (1U << bit_index)) == 0) {
158  return false;
159  }
160  }
161  return true;
162  }
163 
165  inline void add(const BloomFilterFingerprint& fingerprint) ALWAYS_INLINE {
166  for (uint8_t k = 0; k < kHashDataPageBloomFilterHashes; ++k) {
167  ASSERT_ND(fingerprint.indexes_[k] < kHashDataPageBloomFilterBits);
168  uint8_t byte_index = fingerprint.indexes_[k] / 8U;
169  uint8_t bit_index = fingerprint.indexes_[k] % 8U;
170  values_[byte_index] |= (1U << bit_index);
171  }
172  }
173 
177  for (uint8_t k = 0; k < kHashDataPageBloomFilterHashes; ++k) {
178  uint64_t index = fullhash >> (k * kHashDataPageBloomFilterIndexSize);
179  // as kHashDataPageBloomFilterBits is power of 2, this is a bit simpler, but not much.
181  }
182  return ret;
183  }
184 
185  friend std::ostream& operator<<(std::ostream& o, const DataPageBloomFilter& v);
186 };
187 
198  uint64_t word;
203  uint8_t route[8];
204 
205  bool operator==(const IntermediateRoute& rhs) const { return word == rhs.word; }
206  bool operator!=(const IntermediateRoute& rhs) const { return word != rhs.word; }
207  bool operator<(const IntermediateRoute& rhs) const {
208  for (uint16_t i = 1; i <= 8U; ++i) {
209  // compare the higher level first. if the machine is big-endian, we can just compare word.
210  // but, this method is not used in performance-sensitive place, so let's be explicit.
211  if (route[8U - i] != rhs.route[8U - i]) {
212  return route[8U - i] < rhs.route[8U - i];
213  }
214  }
215  return false;
216  }
217  bool operator<=(const IntermediateRoute& rhs) const { return *this == rhs || *this < rhs; }
218  friend std::ostream& operator<<(std::ostream& o, const IntermediateRoute& v);
219 
225  HashBin bin = 0;
226  for (uint8_t level = 0; level < kHashMaxLevels; ++level) {
227  bin += route[level] * kHashMaxBins[level];
228  }
229  return bin;
230  }
231 
234  IntermediateRoute ret;
235  ret.word = 0;
236  uint8_t level = 0;
237  HashBin remaining = bin;
238  while (remaining > 0) {
239  // in hash, fanout is always fixed, so no need for pre-calculated const_div. compiler does it.
240  ret.route[level] = remaining % kHashIntermediatePageFanout;
241  remaining /= kHashIntermediatePageFanout;
242  ++level;
243  ASSERT_ND(level < 8U);
244  }
245 
246  ASSERT_ND(ret.convert_back() == bin);
247  return ret;
248  }
249 };
250 
251 } // namespace hash
252 } // namespace storage
253 } // namespace foedus
254 #endif // FOEDUS_STORAGE_HASH_HASHINATE_HPP_
bool operator<=(const IntermediateRoute &rhs) const
const uint8_t kHashDataPageBloomFilterIndexSize
How many bits we need in order to represent an index in bloom filter in each HashDataPage.
uint16_t indexes_[kHashDataPageBloomFilterHashes]
friend std::ostream & operator<<(std::ostream &o, const IntermediateRoute &v)
Root package of FOEDUS (Fast Optimistic Engine for Data Unification Services).
Definition: assert_nd.hpp:44
const uint64_t kXxhashKeySeed
Default seed value used for xxhash's xxh32/64 to hashinate keys.
bool operator!=(const IntermediateRoute &rhs) const
bool contains(const BloomFilterFingerprint &fingerprint) const __attribute__((always_inline))
const uint8_t kHashDataPageBloomFilterHashes
Number of hash functions (k) of bloom filter in each HashDataPage.
bool operator==(const BloomFilterFingerprint &other) const
friend std::ostream & operator<<(std::ostream &o, const BloomFilterFingerprint &v)
Compactly represents the route of intermediate pages to reach the given hash bin. ...
const uint8_t kHashMaxLevels
Max level of intermediate pages.
Definition: hash_id.hpp:104
void clear()
usually zero-cleared as part of a data page, but in case of specifically clearing this ...
HashValue hashinate(const void *key, uint16_t key_length)
Calculates hash value for general input.
#define CXX11_FINAL
Used in public headers in place of "final" of C++11.
Definition: cxx11.hpp:131
const uint16_t kHashDataPageBloomFilterBytes
Byte size of bloom filter in each HashDataPage.
bool operator==(const IntermediateRoute &rhs) const
uint8_t route[8]
[0] means ordinal in level-0 intermediate page, [1] in its parent page, [2]...
#define CXX11_STATIC_ASSERT(expr, message)
Used in public headers in place of "static_assert" of C++11.
Definition: cxx11.hpp:135
const uint16_t kHashDataPageBloomFilterBits
Bit size of bloom filter in each HashDataPage.
friend std::ostream & operator<<(std::ostream &o, const DataPageBloomFilter &v)
uint8_t values_[kHashDataPageBloomFilterBytes]
bool operator<(const IntermediateRoute &rhs) const
static BloomFilterFingerprint extract_fingerprint(HashValue fullhash)
uint64_t HashBin
Represents a bin of a hash value.
Definition: hash_id.hpp:142
void add(const BloomFilterFingerprint &fingerprint) __attribute__((always_inline))
Adds the fingerprint to this bloom filter.
const HashValue kHashDataPageBloomFilterIndexMask
A fingerprint for bloom filter in each HashDataPage.
To quickly check whether a HashDataPage might contain a specific hash value, we maintain a non-counti...
const uint8_t kHashIntermediatePageFanout
Number of pointers in an intermediate page of hash storage.
Definition: hash_id.hpp:49
const uint64_t kHashMaxBins[]
kHashTotalBins[n] gives the maximum number of hash bins n-level hash can hold.
Definition: hash_id.hpp:74
static IntermediateRoute construct(HashBin bin)
Calculates the rout for the given hash bin.
#define ASSERT_ND(x)
A warning-free wrapper macro of assert() that has no performance effect in release mode even when 'x'...
Definition: assert_nd.hpp:72
Definitions of IDs in this package and a few related constant values.
#define ALWAYS_INLINE
A function suffix to hint that the function should always be inlined.
Definition: compiler.hpp:106
bool operator!=(const BloomFilterFingerprint &other) const
uint64_t word
This is a 64bit data.
Forward declarations of classes in hash storage package.
uint64_t HashValue
Represents a full 64-bit hash value calculated from a key.
Definition: hash_id.hpp:129