diff --git a/BUILD b/BUILD index 6231a31..79d8071 100644 --- a/BUILD +++ b/BUILD @@ -35,6 +35,7 @@ filegroup( name = "private_headers", srcs = [ "cwisstable/internal/absl_hash.h", + "cwisstable/internal/ahash.h", "cwisstable/internal/base.h", "cwisstable/internal/bits.h", "cwisstable/internal/capacity.h", @@ -151,6 +152,27 @@ cc_binary( testonly = 1, ) +cc_binary( + name = "cwisstable_benchmark_no_aes", + srcs = ["cwisstable/cwisstable_benchmark.cc"], + tags = ["benchmark"], + deps = [ + ":cwisstable", + ":debug", + ":test_helpers", + + "@com_google_absl//absl/cleanup", + "@com_google_absl//absl/strings:str_format", + "@com_github_google_benchmark//:benchmark_main", + ], + defines = [ + "CWISS_HAVE_AES=0", + ], + copts = CWISS_TEST_COPTS + CWISS_CXX_VERSION, + linkopts = CWISS_DEFAULT_LINKOPTS, + testonly = 1, +) + config_setting( name = "clang_compiler", flag_values = {"@bazel_tools//tools/cpp:compiler": "clang"}, diff --git a/cwisstable/cwisstable_benchmark.cc b/cwisstable/cwisstable_benchmark.cc index 3afa12a..5656fef 100644 --- a/cwisstable/cwisstable_benchmark.cc +++ b/cwisstable/cwisstable_benchmark.cc @@ -41,7 +41,7 @@ struct StringGen { template std::string operator()(Gen& rng) const { std::string res; - res.resize(12); + res.resize(size); std::uniform_int_distribution printable_ascii(0x20, 0x7E); std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); }); return res; @@ -56,7 +56,7 @@ struct StringGen { void BM_CacheInSteadyState(benchmark::State& state) { std::random_device rd; std::mt19937 rng(rd()); - StringGen gen{12}; + StringGen gen{64}; auto t = StringTable_new(0); absl::Cleanup c_ = [&] { StringTable_destroy(&t); }; diff --git a/cwisstable/hash.h b/cwisstable/hash.h index b8c5427..9d87893 100644 --- a/cwisstable/hash.h +++ b/cwisstable/hash.h @@ -22,6 +22,7 @@ #include #include "cwisstable/internal/absl_hash.h" +#include "cwisstable/internal/ahash.h" #include "cwisstable/internal/base.h" #include "cwisstable/internal/bits.h" @@ -37,10 +38,12 @@ /// - `size_t CWISS__Finish(State)`, digest the state into a final hash /// value. /// -/// Currently available are two hashes: `FxHash`, which is small and fast, and -/// `AbslHash`, the hash function used by Abseil. +/// Currently available are three hashes: `FxHash`, which is small and fast, +/// `AbslHash`, the hash function used by Abseil, and `AHash`, a hardware +/// AES-based hash function. /// -/// `AbslHash` is the default hash function. +/// The default hash is named `Hash`, which is chosen automatically depending +/// on detected hardware. CWISS_BEGIN CWISS_BEGIN_EXTERN @@ -111,6 +114,32 @@ static inline size_t CWISS_AbslHash_Finish(CWISS_AbslHash_State state) { return state; } +#if CWISS_HAVE_AES +typedef CWISS_AHash_State_ CWISS_AHash_State; + #define CWISS_AHash_kInit CWISS_AHash_kInit_ + +CWISS_INLINE_ALWAYS +static inline void CWISS_AHash_Write(CWISS_AHash_State* state, const void* val, + size_t len) { + CWISS_AHash_Write_(state, val, len); +} + +CWISS_INLINE_ALWAYS +static inline size_t CWISS_AHash_Finish(CWISS_AHash_State state) { + return CWISS_AHash_Finish_(state); +} + + #define CWISS_Hash_State CWISS_AHash_State + #define CWISS_Hash_kInit CWISS_AHash_kInit + #define CWISS_Hash_Write CWISS_AHash_Write + #define CWISS_Hash_Finish CWISS_AHash_Finish +#else + #define CWISS_Hash_State CWISS_AbslHash_State + #define CWISS_Hash_kInit CWISS_AbslHash_kInit + #define CWISS_Hash_Write CWISS_AbslHash_Write + #define CWISS_Hash_Finish CWISS_AbslHash_Finish +#endif + CWISS_END_EXTERN CWISS_END diff --git a/cwisstable/internal/ahash.h b/cwisstable/internal/ahash.h new file mode 100644 index 0000000..87006b0 --- /dev/null +++ b/cwisstable/internal/ahash.h @@ -0,0 +1,195 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CWISSTABLE_INTERNAL_AES_HASH_H_ +#define CWISSTABLE_INTERNAL_AES_HASH_H_ + +#include +#include +#include + +#include "cwisstable/internal/base.h" +#include "cwisstable/internal/bits.h" + +/// Implementation details of aHash. +/// +/// Based on the Apache-2.0-licensed code found at +/// https://github.com/tkaitchuck/aHash/blob/master/src/aes_hash.rs + +CWISS_BEGIN +CWISS_BEGIN_EXTERN + +#if CWISS_HAVE_AES + +typedef struct { + CWISS_U128 enc_, sum_, key_; +} CWISS_AHash_State_; + + // This is a keyed hash, so it requires "random" inputs. However, because + // its cryptographic power is unproven, we use constants for the initial value + // to avoid the overhead of randomness. You are welcome to inject randomness + // into your build system by defining these constants via + // -DCWISS_AHash_kInitN_. + // + // These numbers are the first eight SHA-256 round constants. + + #if !defined(CWISS_AHash_kInit0_) && !defined(CWISS_AHash_kInit1_) && \ + !defined(CWISS_AHash_kInit2_) && !defined(CWISS_AHash_kInit3_) + #define CWISS_AHash_kInit0_ ((uint64_t)0x71374491428a2f98) + #define CWISS_AHash_kInit1_ ((uint64_t)0xe9b5dba5b5c0fbcf) + #define CWISS_AHash_kInit2_ ((uint64_t)0x59f111f13956c25b) + #define CWISS_AHash_kInit3_ ((uint64_t)0xab1c5ed5923f82a4) + #endif + + #define CWISS_AHash_kInit_ \ + ((CWISS_AHash_State_){ \ + {CWISS_AHash_kInit0_, CWISS_AHash_kInit1_}, \ + {CWISS_AHash_kInit2_, CWISS_AHash_kInit3_}, \ + { \ + CWISS_AHash_kInit0_ ^ CWISS_AHash_kInit2_, \ + CWISS_AHash_kInit1_ ^ CWISS_AHash_kInit3_, \ + }, \ + }) + +CWISS_INLINE_ALWAYS +static inline CWISS_U128 CWISS_AHash_AddLanes(CWISS_U128 a, CWISS_U128 b) { + __m128i a_, b_; + memcpy(&a_, &a, sizeof(a)); + memcpy(&b_, &b, sizeof(b)); + + a_ = _mm_add_epi64(a_, b_); + + memcpy(&a, &a_, sizeof(a)); + return a; +} + +CWISS_INLINE_ALWAYS +static inline CWISS_U128 CWISS_AHash_ShuffleAndAdd(CWISS_U128 a, CWISS_U128 b) { + #if CWISS_HAVE_SSSE3 + const uint64_t mask[2] = {0x050f0d0806090b04, 0x020a07000c01030e}; + + __m128i a_, b_, mask_; + memcpy(&a_, &a, sizeof(a)); + memcpy(&b_, &b, sizeof(b)); + memcpy(&mask_, &mask, sizeof(mask)); + + a_ = _mm_shuffle_epi8(a_, mask_); + a_ = _mm_add_epi64(a_, b_); + + memcpy(&a, &a_, sizeof(a)); + return a; + #else + // bswap of u128. + char* a_bytes = (char*)&a; + for (size_t i = 0; i < sizeof(a) / 2; ++i) { + a_bytes[i] = a_bytes[sizeof(a) - i - 1]; + } + + return CWISS_AHash_AddLanes(a, b); + #endif +} + +CWISS_INLINE_ALWAYS +static inline void CWISS_AHash_Mix1(CWISS_AHash_State_* self, CWISS_U128 v1) { + self->enc_ = CWISS_AesEnc(self->enc_, v1); + self->sum_ = CWISS_AHash_ShuffleAndAdd(self->sum_, v1); +} + +CWISS_INLINE_ALWAYS +static inline void CWISS_AHash_Mix2(CWISS_AHash_State_* self, CWISS_U128 v1, + CWISS_U128 v2) { + self->enc_ = CWISS_AesEnc(self->enc_, v1); + self->sum_ = CWISS_AHash_ShuffleAndAdd(self->sum_, v1); + self->enc_ = CWISS_AesEnc(self->enc_, v2); + self->sum_ = CWISS_AHash_ShuffleAndAdd(self->sum_, v2); +} + +static inline void CWISS_AHash_Write_(CWISS_AHash_State_* self, const void* val, + size_t len) { + const char* val8 = (const char*)val; + self->enc_.lo += len; + + if (len > 64) { + CWISS_U128 tail[4]; + memcpy(tail, val8 + len - sizeof(tail), sizeof(tail)); + + CWISS_U128 current[4] = { + CWISS_AesEnc(self->key_, tail[0]), + CWISS_AesEnc(self->key_, tail[1]), + CWISS_AesEnc(self->key_, tail[2]), + CWISS_AesEnc(self->key_, tail[3]), + }; + + CWISS_U128 sum[2] = { + CWISS_AHash_AddLanes(self->key_, tail[0]), + CWISS_AHash_AddLanes(self->key_, tail[1]), + }; + sum[0] = CWISS_AHash_ShuffleAndAdd(sum[0], tail[2]); + sum[1] = CWISS_AHash_ShuffleAndAdd(sum[1], tail[3]); + + while (len > 64) { + CWISS_U128 blocks[4]; + memcpy(blocks, val8, sizeof(tail)); + val8 += sizeof(tail); + + current[0] = CWISS_AesEnc(current[0], blocks[0]); + current[1] = CWISS_AesEnc(current[1], blocks[1]); + current[2] = CWISS_AesEnc(current[2], blocks[2]); + current[3] = CWISS_AesEnc(current[3], blocks[3]); + sum[0] = CWISS_AHash_ShuffleAndAdd(sum[0], blocks[0]); + sum[1] = CWISS_AHash_ShuffleAndAdd(sum[1], blocks[1]); + sum[0] = CWISS_AHash_ShuffleAndAdd(sum[0], blocks[2]); + sum[1] = CWISS_AHash_ShuffleAndAdd(sum[1], blocks[3]); + } + + CWISS_AHash_Mix2(self, CWISS_AesEnc(current[0], current[1]), + CWISS_AesEnc(current[2], current[3])); + CWISS_AHash_Mix1(self, CWISS_AHash_AddLanes(sum[0], sum[1])); + } else if (len > 32) { + // Len 33..=64. + CWISS_U128 head[2]; + CWISS_U128 tail[2]; + memcpy(head, val8, sizeof(head)); + memcpy(tail, val8 + len - sizeof(tail), sizeof(tail)); + CWISS_AHash_Mix2(self, head[0], head[1]); + CWISS_AHash_Mix2(self, tail[0], tail[1]); + } else if (len > 16) { + // Len 17..=32. + CWISS_U128 head, tail; + memcpy(&head, val8, sizeof(head)); + memcpy(&tail, val8 + len - sizeof(tail), sizeof(tail)); + CWISS_AHash_Mix2(self, head, tail); + + } else if (len > 8) { + // Len 9..=16. + uint64_t head, tail; + memcpy(&head, val8, sizeof(head)); + memcpy(&tail, val8 + len - sizeof(tail), sizeof(tail)); + CWISS_AHash_Mix1(self, (CWISS_U128){head, tail}); + } else { + CWISS_AHash_Mix1(self, CWISS_Load0to8Twice(val, len)); + } +} + +static inline uint64_t CWISS_AHash_Finish_(CWISS_AHash_State_ self) { + CWISS_U128 combined = CWISS_AesDec(self.sum_, self.enc_); + return CWISS_AesEnc(CWISS_AesEnc(combined, self.key_), combined).lo; +} + +#endif // CWISS_HAVE_AES + +CWISS_END_EXTERN +CWISS_END + +#endif // CWISSTABLE_INTERNAL_AES_HASH_H_ \ No newline at end of file diff --git a/cwisstable/internal/base.h b/cwisstable/internal/base.h index 7134b0a..07a644f 100644 --- a/cwisstable/internal/base.h +++ b/cwisstable/internal/base.h @@ -124,6 +124,18 @@ #endif #endif +/// `CWISS_HAVE_AES` is nonzero if we have AESNI support. +/// +/// `-DCWISS_HAVE_AES` can be used to override it; it is otherwise detected +/// via the usual non-portable feature-detection macros. +#ifndef CWISS_HAVE_AES + #ifdef __AES__ + #define CWISS_HAVE_AES 1 + #else + #define CWISS_HAVE_AES 0 + #endif +#endif + #if CWISS_HAVE_SSE2 #include #endif @@ -135,6 +147,10 @@ #include #endif +#if CWISS_HAVE_AES + #include +#endif + /// `CWISS_HAVE_BUILTIN` will, in Clang, detect whether a Clang language /// extension is enabled. /// diff --git a/cwisstable/internal/bits.h b/cwisstable/internal/bits.h index 3fc25e1..ac8d7d9 100644 --- a/cwisstable/internal/bits.h +++ b/cwisstable/internal/bits.h @@ -137,6 +137,13 @@ static inline CWISS_U128 CWISS_Mul128(uint64_t a, uint64_t b) { return (CWISS_U128){(uint64_t)p, (uint64_t)(p >> 64)}; } +/// Loads an unaligned u16. +static inline uint16_t CWISS_Load16(const void* p) { + uint16_t v; + memcpy(&v, p, sizeof(v)); + return v; +} + /// Loads an unaligned u32. static inline uint32_t CWISS_Load32(const void* p) { uint32_t v; @@ -176,6 +183,28 @@ static inline uint32_t CWISS_Load1To3(const void* p, size_t len) { return (mem0 | (mem1 << (len / 2 * 8)) | (mem2 << ((len - 1) * 8))); } +/// Reads 0 to 8 bytes from p, twice. +CWISS_INLINE_ALWAYS +static inline CWISS_U128 CWISS_Load0to8Twice(const void* p, size_t len) { + const unsigned char* p8 = (const unsigned char*)p; + if (len >= 2) { + if (len >= 4) { + // Len 4..=8. + return (CWISS_U128){CWISS_Load32(p), CWISS_Load32(p8 + len - 4)}; + } else { + // Len 2..=3. + return (CWISS_U128){CWISS_Load16(p), p8[len - 1]}; + } + } else { + // Len 1. + if (len > 0) { + return (CWISS_U128){p8[0], p8[0]}; + } else { + return (CWISS_U128){0}; + } + } +} + /// A abstract bitmask, such as that emitted by a SIMD instruction. /// /// Specifically, this type implements a simple bitset whose representation is @@ -233,6 +262,38 @@ static inline bool CWISS_BitMask_next(CWISS_BitMask* self, uint32_t* bit) { return true; } +#if CWISS_HAVE_AES +// TODO: ARM support. + +/// Performs a single, hardware-accelerated AES encryption round. +CWISS_INLINE_ALWAYS +static inline CWISS_U128 CWISS_AesEnc(CWISS_U128 state, CWISS_U128 key) { + __m128i state_, key_; + memcpy(&state_, &state, sizeof(state)); + memcpy(&key_, &key, sizeof(key)); + + __m128i out_ = _mm_aesenc_si128(state_, key_); + + CWISS_U128 out; + memcpy(&out, &out_, sizeof(out)); + return out; +} + +/// Performs a single, hardware-accelerated AES decryption round. +CWISS_INLINE_ALWAYS +static inline CWISS_U128 CWISS_AesDec(CWISS_U128 state, CWISS_U128 key) { + __m128i state_, key_; + memcpy(&state_, &state, sizeof(state)); + memcpy(&key_, &key, sizeof(key)); + + __m128i out_ = _mm_aesdec_si128(state_, key_); + + CWISS_U128 out; + memcpy(&out, &out_, sizeof(out)); + return out; +} +#endif + CWISS_END_EXTERN CWISS_END diff --git a/cwisstable/internal/test_helpers.h b/cwisstable/internal/test_helpers.h index 00f5609..2ac72d1 100644 --- a/cwisstable/internal/test_helpers.h +++ b/cwisstable/internal/test_helpers.h @@ -25,20 +25,20 @@ namespace cwisstable { template struct DefaultHash { size_t operator()(const T& val) { - CWISS_AbslHash_State state = CWISS_AbslHash_kInit; - CWISS_AbslHash_Write(&state, &val, sizeof(T)); - return CWISS_AbslHash_Finish(state); + CWISS_Hash_State state = CWISS_Hash_kInit; + CWISS_Hash_Write(&state, &val, sizeof(T)); + return CWISS_Hash_Finish(state); } }; struct HashStdString { template size_t operator()(const S& s) { - CWISS_AbslHash_State state = CWISS_AbslHash_kInit; + CWISS_Hash_State state = CWISS_Hash_kInit; size_t size = s.size(); - CWISS_AbslHash_Write(&state, &size, sizeof(size_t)); - CWISS_AbslHash_Write(&state, s.data(), s.size()); - return CWISS_AbslHash_Finish(state); + CWISS_Hash_Write(&state, &size, sizeof(size_t)); + CWISS_Hash_Write(&state, s.data(), s.size()); + return CWISS_Hash_Finish(state); } }; diff --git a/cwisstable/policy.h b/cwisstable/policy.h index b9b0397..344ebcb 100644 --- a/cwisstable/policy.h +++ b/cwisstable/policy.h @@ -251,9 +251,9 @@ typedef struct { } \ CWISS_EXTRACT_RAW(modifiers, static, __VA_ARGS__) \ inline size_t kPolicy_##_DefaultHash(const void* val) { \ - CWISS_AbslHash_State state = CWISS_AbslHash_kInit; \ - CWISS_AbslHash_Write(&state, val, sizeof(Key_)); \ - return CWISS_AbslHash_Finish(state); \ + CWISS_Hash_State state = CWISS_Hash_kInit; \ + CWISS_Hash_Write(&state, val, sizeof(Key_)); \ + return CWISS_Hash_Finish(state); \ } \ CWISS_EXTRACT_RAW(modifiers, static, __VA_ARGS__) \ inline bool kPolicy_##_DefaultEq(const void* a, const void* b) { \