Skip to content

Commit ed92476

Browse files
NotsoanoNimuslerno
andauthored
Add wyhash2 and metro64/128 modern hashing (#2293)
* add wyhash2, metro64, and metro128 hashes; best performing non-crypto hash functions * add superfast 64-bit a5hash; not streamed, no 128-bit impl * add komihash and associated tests/benchmarks --------- Co-authored-by: Christoffer Lerno <[email protected]>
1 parent 1218afd commit ed92476

File tree

11 files changed

+1263
-0
lines changed

11 files changed

+1263
-0
lines changed
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright (c) 2025 Zack Puhl <[email protected]>. All rights reserved.
2+
// Use of this source code is governed by the MIT license
3+
// a copy of which can be found in the LICENSE_STDLIB file.
4+
module non_crypto_benchmarks;
5+
6+
7+
const usz COMMON_ITERATIONS = 1 << 18;
8+
9+
const char[] COMMON_1 = { 0xA5 };
10+
const char[] COMMON_4 = { 0xA5, 0xA5, 0xA5, 0xA5, };
11+
const char[] COMMON_8 = { [0..7] = 0xA5 };
12+
const char[] COMMON_16 = { [0..15] = 0xA5 };
13+
const char[] COMMON_32 = { [0..31] = 0xA5 };
14+
const char[] COMMON_64 = { [0..63] = 0xA5 };
15+
const char[] COMMON_128 = { [0..127] = 0xA5 };
16+
const char[] COMMON_1024 = { [0..1023] = 0xA5 };
17+
18+
19+
fn void initialize_bench() @init
20+
{
21+
set_benchmark_warmup_iterations(3);
22+
set_benchmark_max_iterations(COMMON_ITERATIONS + 3);
23+
}
24+
25+
26+
// =======================================================================================
27+
module non_crypto_benchmarks @benchmark;
28+
29+
import std::hash;
30+
31+
32+
fn void fnv64a_1() => fnv64a::hash(COMMON_1);
33+
fn void fnv32a_1() => fnv32a::hash(COMMON_1);
34+
fn void wyhash2_1() => wyhash2::hash(COMMON_1);
35+
fn void metro64_1() => metro64::hash(COMMON_1);
36+
fn void metro128_1() => metro128::hash(COMMON_1);
37+
fn void a5hash_1() => a5hash::hash(COMMON_1);
38+
fn void komi_1() => komi::hash(COMMON_1);
39+
40+
fn void fnv64a_4() => fnv64a::hash(COMMON_4);
41+
fn void fnv32a_4() => fnv32a::hash(COMMON_4);
42+
fn void wyhash2_4() => wyhash2::hash(COMMON_4);
43+
fn void metro64_4() => metro64::hash(COMMON_4);
44+
fn void metro128_4() => metro128::hash(COMMON_4);
45+
fn void a5hash_4() => a5hash::hash(COMMON_4);
46+
fn void komi_4() => komi::hash(COMMON_4);
47+
48+
fn void fnv64a_8() => fnv64a::hash(COMMON_8);
49+
fn void fnv32a_8() => fnv32a::hash(COMMON_8);
50+
fn void wyhash2_8() => wyhash2::hash(COMMON_8);
51+
fn void metro64_8() => metro64::hash(COMMON_8);
52+
fn void metro128_8() => metro128::hash(COMMON_8);
53+
fn void a5hash_8() => a5hash::hash(COMMON_8);
54+
fn void komi_8() => komi::hash(COMMON_8);
55+
56+
fn void fnv64a_16() => fnv64a::hash(COMMON_16);
57+
fn void fnv32a_16() => fnv32a::hash(COMMON_16);
58+
fn void wyhash2_16() => wyhash2::hash(COMMON_16);
59+
fn void metro64_16() => metro64::hash(COMMON_16);
60+
fn void metro128_16() => metro128::hash(COMMON_16);
61+
fn void a5hash_16() => a5hash::hash(COMMON_16);
62+
fn void komi_16() => komi::hash(COMMON_16);
63+
64+
fn void fnv64a_32() => fnv64a::hash(COMMON_32);
65+
fn void fnv32a_32() => fnv32a::hash(COMMON_32);
66+
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
67+
fn void metro64_32() => metro64::hash(COMMON_32);
68+
fn void metro128_32() => metro128::hash(COMMON_32);
69+
fn void a5hash_32() => a5hash::hash(COMMON_32);
70+
fn void komi_32() => komi::hash(COMMON_32);
71+
72+
fn void fnv64a_64() => fnv64a::hash(COMMON_64);
73+
fn void fnv32a_64() => fnv32a::hash(COMMON_64);
74+
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
75+
fn void metro64_64() => metro64::hash(COMMON_64);
76+
fn void metro128_64() => metro128::hash(COMMON_64);
77+
fn void a5hash_64() => a5hash::hash(COMMON_64);
78+
fn void komi_64() => komi::hash(COMMON_64);
79+
80+
fn void fnv64a_128() => fnv64a::hash(COMMON_128);
81+
fn void fnv32a_128() => fnv32a::hash(COMMON_128);
82+
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
83+
fn void metro64_128() => metro64::hash(COMMON_128);
84+
fn void metro128_128() => metro128::hash(COMMON_128);
85+
fn void a5hash_128() => a5hash::hash(COMMON_128);
86+
fn void komi_128() => komi::hash(COMMON_128);
87+
88+
fn void fnv64a_1024() => fnv64a::hash(COMMON_1024);
89+
fn void fnv32a_1024() => fnv32a::hash(COMMON_1024);
90+
// NOTE: wyhash2 cannot be used on inputs > 16 bytes.
91+
fn void metro64_1024() => metro64::hash(COMMON_1024);
92+
fn void metro128_1024() => metro128::hash(COMMON_1024);
93+
fn void a5hash_1024() => a5hash::hash(COMMON_1024);
94+
fn void komi_1024() => komi::hash(COMMON_1024);

lib/std/hash/a5hash.c3

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright (c) 2025 Zack Puhl <[email protected]>. All rights reserved.
2+
// Use of this source code is governed by the MIT license
3+
// a copy of which can be found in the LICENSE_STDLIB file.
4+
//
5+
// An implementation of Aleksey Vaneev's a5hash, version 5.16, in C3:
6+
// https://github.com/avaneev/komihash
7+
//
8+
// The license for komihash from the above repository at the time of writing is as follows:
9+
//
10+
// >> MIT License
11+
// >>
12+
// >> Copyright (c) 2025 Aleksey Vaneev
13+
// >>
14+
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
15+
// >> of this software and associated documentation files (the "Software"), to deal
16+
// >> in the Software without restriction, including without limitation the rights
17+
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18+
// >> copies of the Software, and to permit persons to whom the Software is
19+
// >> furnished to do so, subject to the following conditions:
20+
// >>
21+
// >> The above copyright notice and this permission notice shall be included in all
22+
// >> copies or substantial portions of the Software.
23+
// >>
24+
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25+
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26+
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27+
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28+
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29+
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30+
// >> SOFTWARE.
31+
//
32+
//
33+
module std::hash::a5hash;
34+
35+
36+
macro @a5mul(#u, #v, #lo, #hi) @local
37+
{
38+
uint128 imd = (uint128)#u * (uint128)#v;
39+
#lo = (ulong)imd;
40+
#hi = (ulong)(imd >> 64);
41+
}
42+
43+
44+
fn ulong hash(char[] data, ulong seed = 0)
45+
{
46+
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ data.len;
47+
ulong seed2 = 0x4528_21E6_38D0_1377 ^ data.len;
48+
ulong val10 = 0xAAAA_AAAA_AAAA_AAAA;
49+
ulong val01 = 0x5555_5555_5555_5555;
50+
ulong a, b;
51+
52+
@a5mul(seed2 ^ (seed & val10), seed1 ^ (seed & val01), seed1, seed2);
53+
54+
val10 ^= seed2;
55+
56+
if (@likely(data.len > 3))
57+
{
58+
if (data.len > 16)
59+
{
60+
val01 ^= seed1;
61+
62+
for (; data.len > 16; data = data[16..])
63+
{
64+
@a5mul(
65+
@unaligned_load(((ulong*)data.ptr)[0], 1) ^ seed1,
66+
@unaligned_load(((ulong*)data.ptr)[1], 1) ^ seed2,
67+
seed1, seed2
68+
);
69+
70+
seed1 += val01;
71+
seed2 += val10;
72+
}
73+
74+
a = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 16), 1);
75+
b = @unaligned_load(*(ulong*)(data.ptr + (uptr)data.len - 8), 1);
76+
}
77+
else
78+
{
79+
a = ((ulong)@unaligned_load(*(uint*)&data[0], 1) << 32)
80+
| @unaligned_load(*(uint*)&data[^4], 1);
81+
82+
b = ((ulong)@unaligned_load(*(uint*)&data[(data.len >> 3) * 4], 1) << 32)
83+
| @unaligned_load(*(uint*)(data.ptr + data.len - 4 - (data.len >> 3) * 4), 1);
84+
}
85+
}
86+
else
87+
{
88+
a = data.len ? (data[0] | (data.len > 1 ? ((ulong)data[1] << 8) : 0) | (data.len > 2 ? ((ulong)data[2] << 16) : 0)) : 0;
89+
b = 0;
90+
}
91+
92+
@a5mul(a ^ seed1, b ^ seed2, seed1, seed2);
93+
@a5mul(val01 ^ seed1, seed2, a, b);
94+
95+
return a ^ b;
96+
}

lib/std/hash/komi.c3

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
// Copyright (c) 2025 Zack Puhl <[email protected]>. All rights reserved.
2+
// Use of this source code is governed by the MIT license
3+
// a copy of which can be found in the LICENSE_STDLIB file.
4+
//
5+
// An implementation of Aleksey Vaneev's komihash, version 5.27, in C3:
6+
// https://github.com/avaneev/komihash
7+
//
8+
// The license for komihash from the above repository at the time of writing is as follows:
9+
//
10+
// >> MIT License
11+
// >>
12+
// >> Copyright (c) 2021-2025 Aleksey Vaneev
13+
// >>
14+
// >> Permission is hereby granted, free of charge, to any person obtaining a copy
15+
// >> of this software and associated documentation files (the "Software"), to deal
16+
// >> in the Software without restriction, including without limitation the rights
17+
// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18+
// >> copies of the Software, and to permit persons to whom the Software is
19+
// >> furnished to do so, subject to the following conditions:
20+
// >>
21+
// >> The above copyright notice and this permission notice shall be included in all
22+
// >> copies or substantial portions of the Software.
23+
// >>
24+
// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25+
// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26+
// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27+
// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28+
// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29+
// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30+
// >> SOFTWARE.
31+
//
32+
//
33+
module std::hash::komi;
34+
35+
36+
macro @komimul(#u, #v, #lo, #hi) @local
37+
{
38+
uint128 imd = (uint128)#u * (uint128)#v;
39+
#lo = (ulong)imd;
40+
#hi += (ulong)(imd >> 64);
41+
}
42+
43+
44+
fn ulong hash(char[] data, ulong seed = 0)
45+
{
46+
ulong seed1 = 0x243F_6A88_85A3_08D3 ^ (seed & 0x5555_5555_5555_5555);
47+
ulong seed5 = 0x4528_21E6_38D0_1377 ^ (seed & 0xAAAA_AAAA_AAAA_AAAA);
48+
ulong r1h, r2h;
49+
50+
// HASHROUND
51+
@komimul(seed1, seed5, seed1, seed5);
52+
seed1 ^= seed5;
53+
54+
if (@likely(data.len < 16))
55+
{
56+
r1h = seed1;
57+
r2h = seed5;
58+
59+
if (@likely(data.len >= 8))
60+
{
61+
r1h ^= @unaligned_load(*(ulong*)data.ptr, 1);
62+
63+
r2h ^= (data.len < 12)
64+
? ((data[data.len - 3] | ((ulong)data[data.len - 2] << 8) | ((ulong)data[data.len - 1] << 16) | ((ulong)1 << 24)) >> ((data.len * 8) ^ 88))
65+
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (128 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[8], 1));
66+
}
67+
else if (data.len != 0)
68+
{
69+
r1h ^= (data.len < 4)
70+
? (((ulong)1 << (data.len * 8)) ^ data[0] ^ (data.len > 1 ? (ulong)data[1] << 8 : 0) ^ (data.len > 2 ? (ulong)data[2] << 16 : 0))
71+
: (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (64 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[0], 1));
72+
}
73+
}
74+
else if (data.len < 32)
75+
{
76+
// HASH16
77+
@komimul(
78+
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
79+
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
80+
seed1, seed5
81+
);
82+
seed1 ^= seed5;
83+
84+
if (data.len < 24)
85+
{
86+
r1h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 184))) ^ seed1;
87+
r2h = seed5;
88+
}
89+
else
90+
{
91+
r1h = @unaligned_load(*(ulong*)&data[16], 1) ^ seed1;
92+
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 248))) ^ seed5;
93+
}
94+
}
95+
else
96+
{
97+
if (data.len >= 64)
98+
{
99+
ulong[8] seeds = {
100+
seed1, 0x1319_8A2E_0370_7344 ^ seed1, 0xA409_3822_299F_31D0 ^ seed1, 0x082E_FA98_EC4E_6C89 ^ seed1,
101+
seed5, 0xBE54_66CF_34E9_0C6C ^ seed5, 0xC0AC_29B7_C97C_50DD ^ seed5, 0x3F84_D5B5_B547_0917 ^ seed5,
102+
};
103+
104+
// HASHLOOP64
105+
for (; data.len >= 64; data = data[64:^64])
106+
{
107+
$for var $x = 0; $x < 4; ++$x :
108+
@komimul(
109+
@unaligned_load(*(ulong*)&data[0 + ($x * 8)], 1) ^ seeds[$x],
110+
@unaligned_load(*(ulong*)&data[32 + ($x * 8)], 1) ^ seeds[4 + $x],
111+
seeds[$x], seeds[4 + $x]
112+
);
113+
$endfor
114+
115+
seeds[3] ^= seeds[6];
116+
seeds[0] ^= seeds[7];
117+
seeds[2] ^= seeds[5];
118+
seeds[1] ^= seeds[4];
119+
}
120+
121+
seed1 = seeds[0] ^ seeds[1] ^ seeds[2] ^ seeds[3];
122+
seed5 = seeds[4] ^ seeds[5] ^ seeds[6] ^ seeds[7];
123+
}
124+
125+
for (; data.len >= 16; data = data[16:^16])
126+
{
127+
@komimul(
128+
@unaligned_load(*(ulong*)&data[0], 1) ^ seed1,
129+
@unaligned_load(*(ulong*)&data[8], 1) ^ seed5,
130+
seed1, seed5
131+
);
132+
seed1 ^= seed5;
133+
}
134+
135+
if (data.len < 8)
136+
{
137+
// NOTE: This is translated from the original code. It grabs the last ulong off the buffer even though the
138+
// data slice is less than 8 bytes. This is possible because this branch only occurs in a loop where
139+
// the original data slice length is >= 32.
140+
r1h = (((@unaligned_load(*(ulong*)(data.ptr + data.len - 8), 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x38)) ^ seed1;
141+
r2h = seed5;
142+
}
143+
else
144+
{
145+
r1h = @unaligned_load(*(ulong*)data.ptr, 1) ^ seed1;
146+
r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x78)) ^ seed5;
147+
}
148+
}
149+
150+
// HASHFIN
151+
@komimul(r1h, r2h, seed1, seed5);
152+
seed1 ^= seed5;
153+
@komimul(seed1, seed5, seed1, seed5);
154+
seed1 ^= seed5;
155+
return seed1;
156+
}

0 commit comments

Comments
 (0)