|
| 1 | +// Copyright (c) 2025 Zack Puhl <[email protected]>. All rights reserved. |
| 2 | +// Use of this source code is governed by the MIT license |
| 3 | +// a copy of which can be found in the LICENSE_STDLIB file. |
| 4 | +// |
| 5 | +// An implementation of Aleksey Vaneev's komihash, version 5.27, in C3: |
| 6 | +// https://github.com/avaneev/komihash |
| 7 | +// |
| 8 | +// The license for komihash from the above repository at the time of writing is as follows: |
| 9 | +// |
| 10 | +// >> MIT License |
| 11 | +// >> |
| 12 | +// >> Copyright (c) 2021-2025 Aleksey Vaneev |
| 13 | +// >> |
| 14 | +// >> Permission is hereby granted, free of charge, to any person obtaining a copy |
| 15 | +// >> of this software and associated documentation files (the "Software"), to deal |
| 16 | +// >> in the Software without restriction, including without limitation the rights |
| 17 | +// >> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 18 | +// >> copies of the Software, and to permit persons to whom the Software is |
| 19 | +// >> furnished to do so, subject to the following conditions: |
| 20 | +// >> |
| 21 | +// >> The above copyright notice and this permission notice shall be included in all |
| 22 | +// >> copies or substantial portions of the Software. |
| 23 | +// >> |
| 24 | +// >> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 25 | +// >> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 26 | +// >> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 27 | +// >> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 28 | +// >> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 29 | +// >> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 30 | +// >> SOFTWARE. |
| 31 | +// |
| 32 | +// |
| 33 | +module std::hash::komi; |
| 34 | + |
| 35 | + |
| 36 | +macro @komimul(#u, #v, #lo, #hi) @local |
| 37 | +{ |
| 38 | + uint128 imd = (uint128)#u * (uint128)#v; |
| 39 | + #lo = (ulong)imd; |
| 40 | + #hi += (ulong)(imd >> 64); |
| 41 | +} |
| 42 | + |
| 43 | + |
| 44 | +fn ulong hash(char[] data, ulong seed = 0) |
| 45 | +{ |
| 46 | + ulong seed1 = 0x243F_6A88_85A3_08D3 ^ (seed & 0x5555_5555_5555_5555); |
| 47 | + ulong seed5 = 0x4528_21E6_38D0_1377 ^ (seed & 0xAAAA_AAAA_AAAA_AAAA); |
| 48 | + ulong r1h, r2h; |
| 49 | + |
| 50 | + // HASHROUND |
| 51 | + @komimul(seed1, seed5, seed1, seed5); |
| 52 | + seed1 ^= seed5; |
| 53 | + |
| 54 | + if (@likely(data.len < 16)) |
| 55 | + { |
| 56 | + r1h = seed1; |
| 57 | + r2h = seed5; |
| 58 | + |
| 59 | + if (@likely(data.len >= 8)) |
| 60 | + { |
| 61 | + r1h ^= @unaligned_load(*(ulong*)data.ptr, 1); |
| 62 | + |
| 63 | + r2h ^= (data.len < 12) |
| 64 | + ? ((data[data.len - 3] | ((ulong)data[data.len - 2] << 8) | ((ulong)data[data.len - 1] << 16) | ((ulong)1 << 24)) >> ((data.len * 8) ^ 88)) |
| 65 | + : (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (128 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[8], 1)); |
| 66 | + } |
| 67 | + else if (data.len != 0) |
| 68 | + { |
| 69 | + r1h ^= (data.len < 4) |
| 70 | + ? (((ulong)1 << (data.len * 8)) ^ data[0] ^ (data.len > 1 ? (ulong)data[1] << 8 : 0) ^ (data.len > 2 ? (ulong)data[2] << 16 : 0)) |
| 71 | + : (((@unaligned_load(*(uint*)&data[^4], 1) | ((ulong)1 << 32)) >> (64 - data.len * 8)) << 32 | @unaligned_load(*(uint*)&data[0], 1)); |
| 72 | + } |
| 73 | + } |
| 74 | + else if (data.len < 32) |
| 75 | + { |
| 76 | + // HASH16 |
| 77 | + @komimul( |
| 78 | + @unaligned_load(*(ulong*)&data[0], 1) ^ seed1, |
| 79 | + @unaligned_load(*(ulong*)&data[8], 1) ^ seed5, |
| 80 | + seed1, seed5 |
| 81 | + ); |
| 82 | + seed1 ^= seed5; |
| 83 | + |
| 84 | + if (data.len < 24) |
| 85 | + { |
| 86 | + r1h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 184))) ^ seed1; |
| 87 | + r2h = seed5; |
| 88 | + } |
| 89 | + else |
| 90 | + { |
| 91 | + r1h = @unaligned_load(*(ulong*)&data[16], 1) ^ seed1; |
| 92 | + r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> (((int)(data.len * 8) ^ 248))) ^ seed5; |
| 93 | + } |
| 94 | + } |
| 95 | + else |
| 96 | + { |
| 97 | + if (data.len >= 64) |
| 98 | + { |
| 99 | + ulong[8] seeds = { |
| 100 | + seed1, 0x1319_8A2E_0370_7344 ^ seed1, 0xA409_3822_299F_31D0 ^ seed1, 0x082E_FA98_EC4E_6C89 ^ seed1, |
| 101 | + seed5, 0xBE54_66CF_34E9_0C6C ^ seed5, 0xC0AC_29B7_C97C_50DD ^ seed5, 0x3F84_D5B5_B547_0917 ^ seed5, |
| 102 | + }; |
| 103 | + |
| 104 | + // HASHLOOP64 |
| 105 | + for (; data.len >= 64; data = data[64:^64]) |
| 106 | + { |
| 107 | + $for var $x = 0; $x < 4; ++$x : |
| 108 | + @komimul( |
| 109 | + @unaligned_load(*(ulong*)&data[0 + ($x * 8)], 1) ^ seeds[$x], |
| 110 | + @unaligned_load(*(ulong*)&data[32 + ($x * 8)], 1) ^ seeds[4 + $x], |
| 111 | + seeds[$x], seeds[4 + $x] |
| 112 | + ); |
| 113 | + $endfor |
| 114 | + |
| 115 | + seeds[3] ^= seeds[6]; |
| 116 | + seeds[0] ^= seeds[7]; |
| 117 | + seeds[2] ^= seeds[5]; |
| 118 | + seeds[1] ^= seeds[4]; |
| 119 | + } |
| 120 | + |
| 121 | + seed1 = seeds[0] ^ seeds[1] ^ seeds[2] ^ seeds[3]; |
| 122 | + seed5 = seeds[4] ^ seeds[5] ^ seeds[6] ^ seeds[7]; |
| 123 | + } |
| 124 | + |
| 125 | + for (; data.len >= 16; data = data[16:^16]) |
| 126 | + { |
| 127 | + @komimul( |
| 128 | + @unaligned_load(*(ulong*)&data[0], 1) ^ seed1, |
| 129 | + @unaligned_load(*(ulong*)&data[8], 1) ^ seed5, |
| 130 | + seed1, seed5 |
| 131 | + ); |
| 132 | + seed1 ^= seed5; |
| 133 | + } |
| 134 | + |
| 135 | + if (data.len < 8) |
| 136 | + { |
| 137 | + // NOTE: This is translated from the original code. It grabs the last ulong off the buffer even though the |
| 138 | + // data slice is less than 8 bytes. This is possible because this branch only occurs in a loop where |
| 139 | + // the original data slice length is >= 32. |
| 140 | + r1h = (((@unaligned_load(*(ulong*)(data.ptr + data.len - 8), 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x38)) ^ seed1; |
| 141 | + r2h = seed5; |
| 142 | + } |
| 143 | + else |
| 144 | + { |
| 145 | + r1h = @unaligned_load(*(ulong*)data.ptr, 1) ^ seed1; |
| 146 | + r2h = (((@unaligned_load(*(ulong*)&data[^8], 1) >> 8) | ((ulong)1 << 56)) >> ((data.len * 8) ^ 0x78)) ^ seed5; |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + // HASHFIN |
| 151 | + @komimul(r1h, r2h, seed1, seed5); |
| 152 | + seed1 ^= seed5; |
| 153 | + @komimul(seed1, seed5, seed1, seed5); |
| 154 | + seed1 ^= seed5; |
| 155 | + return seed1; |
| 156 | +} |
0 commit comments