From 507ca362df996d798f5a301152cd865c463392e0 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 23 Mar 2025 19:15:53 +0100 Subject: [PATCH 01/12] add lookup or insert --- src/hash.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/hash.c b/src/hash.c index 5ac0c23129..5f5544cd4f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -95,6 +95,30 @@ R_xlen_t hash_lookup(const hashtab * h, SEXP key, R_xlen_t ifnotfound) { return ifnotfound; // # nocov } +R_xlen_t hash_lookup_or_insert(hashtab *h, SEXP key, R_xlen_t value) { + struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1; + for (size_t i = 0; i < h->size; ++i, cell = (cell == end ? h->tb : cell + 1)) { + if (cell->key == key) { + cell->value = value; + return cell->value; + } else if (!cell->key) { + if (!h->free) internal_error( + __func__, "no free slots left (full size=%zu)", h->size + ); + --h->free; + *cell = (struct hash_pair){.key = key, .value = value}; + return value; // insert here + } + } + + internal_error( // # nocov + __func__, "did not find a free slot for key %p; size=%zu, free=%zu", + (void*)key, h->size, h->free + ); + // Should be impossible, but just in case: + return value; +} + typedef struct dhashtab_ { dhashtab public; // must be at offset 0 size_t size, used, limit; From 47319c3189ab3ee1b57530d5e896880fc91962c2 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 23 Mar 2025 19:16:55 +0100 Subject: [PATCH 02/12] use lookup or insert --- src/chmatch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/chmatch.c b/src/chmatch.c index ac3851b1f1..5d7d75925c 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -59,8 +59,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch int nuniq=0; for (int i=0; i Date: Sun, 23 Mar 2025 19:38:48 +0100 Subject: [PATCH 03/12] use lookup_or_insert --- src/data.table.h | 2 ++ src/hash.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index f9e502be87..1456008ad5 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -294,6 +294,8 @@ hashtab * hash_create(size_t n); void hash_set(hashtab *, SEXP key, R_xlen_t value); // Returns the value corresponding to the key present in the hash, otherwise returns ifnotfound. R_xlen_t hash_lookup(const hashtab *, SEXP key, R_xlen_t ifnotfound); +// Returns the value corresponding to the key present in the hash, otherwise inserts value. +R_xlen_t hash_lookup_or_insert(hashtab *, SEXP key, R_xlen_t value); // The dynamically-allocated hash table has a public field for the R protection wrapper. // Keep it PROTECTed while the table is in use. diff --git a/src/hash.c b/src/hash.c index 5f5544cd4f..bce932cab6 100644 --- a/src/hash.c +++ b/src/hash.c @@ -99,8 +99,7 @@ R_xlen_t hash_lookup_or_insert(hashtab *h, SEXP key, R_xlen_t value) { struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1; for (size_t i = 0; i < h->size; ++i, cell = (cell == end ? h->tb : cell + 1)) { if (cell->key == key) { - cell->value = value; - return cell->value; + return cell->value; // found key, only lookup, no insert } else if (!cell->key) { if (!h->free) internal_error( __func__, "no free slots left (full size=%zu)", h->size From 337a0c2d508a31c59885416d7929ff6d6a4b0bda Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 23 Mar 2025 19:44:41 +0100 Subject: [PATCH 04/12] really use lookup or insert --- src/chmatch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chmatch.c b/src/chmatch.c index 5d7d75925c..5d39d6d217 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -59,7 +59,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch int nuniq=0; for (int i=0; i Date: Sun, 23 Mar 2025 20:20:12 +0100 Subject: [PATCH 05/12] use cuckoo hashing --- src/chmatch.c | 3 +- src/data.table.h | 2 - src/hash.c | 109 +++++++++++++++++++++-------------------------- 3 files changed, 51 insertions(+), 63 deletions(-) diff --git a/src/chmatch.c b/src/chmatch.c index 5d39d6d217..ac3851b1f1 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -59,7 +59,8 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch int nuniq=0; for (int i=0; i= 1) @@ -39,14 +40,19 @@ static hashtab * hash_create_(size_t n, double load_factor) { __func__, "n=%zu with load_factor=%g would overflow total allocation size", n, load_factor ); - hashtab * ret = (hashtab *)R_alloc(sizeof(hashtab) + sizeof(struct hash_pair[n_full]), 1); + hashtab *ret = (hashtab *)R_alloc(sizeof(hashtab), 1); ret->size = n_full; ret->free = n; // To compute floor(size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size. - ret->multiplier = n_full * hash_multiplier; + ret->multiplier1 = n_full * hash_multiplier1; + ret->multiplier2 = n_full * hash_multiplier2; + ret->tb1 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); + ret->tb2 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); // No valid SEXP is a null pointer, so it's a safe marker for empty cells. - for (size_t i = 0; i < n_full; ++i) - ret->tb[i].key = NULL; + for (size_t i = 0; i < n_full; ++i) { + ret->tb1[i].key = NULL; + ret->tb2[i].key = NULL; + } return ret; } @@ -54,70 +60,53 @@ hashtab * hash_create(size_t n) { return hash_create_(n, .5); } // Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4. // This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing. -static R_INLINE size_t hash_index(SEXP key, uintptr_t multiplier) { +static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) { // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size. // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees, // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes. return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier; } -void hash_set(hashtab * h, SEXP key, R_xlen_t value) { - struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1; - for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) { - if (cell->key == key) { - cell->value = value; +static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) { + return ((((uintptr_t)key) >> 6) & 0x0fffffff) * multiplier; +} + + +void hash_set(hashtab *h, SEXP key, R_xlen_t value) { + size_t max_relocations = h->size; + struct hash_pair item = { .key = key, .value = value }; + for (size_t i = 0; i < max_relocations; ++i) { + size_t idx1 = hash_index1(item.key, h->multiplier1) % h->size; + if (!h->tb1[idx1].key) { + h->tb1[idx1] = item; return; - } else if (!cell->key) { - if (!h->free) internal_error( - __func__, "no free slots left (full size=%zu)", h->size - ); - --h->free; - *cell = (struct hash_pair){.key = key, .value = value}; + } + struct hash_pair temp = h->tb1[idx1]; + h->tb1[idx1] = item; + item = temp; + + size_t idx2 = hash_index2(item.key, h->multiplier2) % h->size; + if (!h->tb2[idx2].key) { + h->tb2[idx2] = item; return; } + temp = h->tb2[idx2]; + h->tb2[idx2] = item; + item = temp; } - internal_error( // # nocov - __func__, "did not find a free slot for key %p; size=%zu, free=%zu", - (void*)key, h->size, h->free - ); + internal_error(__func__, "Cuckoo hashing cycle detected, rehash needed"); } -R_xlen_t hash_lookup(const hashtab * h, SEXP key, R_xlen_t ifnotfound) { - const struct hash_pair * cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1; - for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) { - if (cell->key == key) { - return cell->value; - } else if (!cell->key) { - return ifnotfound; - } - } +R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { + size_t idx1 = hash_index1(key, h->multiplier1) % h->size; + if (h->tb1[idx1].key == key) return h->tb1[idx1].value; + + size_t idx2 = hash_index2(key, h->multiplier2) % h->size; + if (h->tb2[idx2].key == key) return h->tb2[idx2].value; // Should be impossible with a load factor below 1, but just in case: return ifnotfound; // # nocov } -R_xlen_t hash_lookup_or_insert(hashtab *h, SEXP key, R_xlen_t value) { - struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1; - for (size_t i = 0; i < h->size; ++i, cell = (cell == end ? h->tb : cell + 1)) { - if (cell->key == key) { - return cell->value; // found key, only lookup, no insert - } else if (!cell->key) { - if (!h->free) internal_error( - __func__, "no free slots left (full size=%zu)", h->size - ); - --h->free; - *cell = (struct hash_pair){.key = key, .value = value}; - return value; // insert here - } - } - - internal_error( // # nocov - __func__, "did not find a free slot for key %p; size=%zu, free=%zu", - (void*)key, h->size, h->free - ); - // Should be impossible, but just in case: - return value; -} - typedef struct dhashtab_ { dhashtab public; // must be at offset 0 size_t size, used, limit; @@ -158,7 +147,7 @@ static dhashtab * dhash_create_(size_t n, double load_factor) { self->table = dhash_allocate(n_full); self->size = n_full; self->limit = n; - self->multiplier = n_full * hash_multiplier; + self->multiplier = n_full * hash_multiplier1; // this is the last time we're allowed to set the table parts piece by piece UNPROTECT(1); @@ -172,10 +161,10 @@ static void dhash_enlarge(dhashtab_ * self) { internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov size_t new_size = self->size * 2; struct hash_pair * new = dhash_allocate(new_size); - uintptr_t new_multiplier = new_size * hash_multiplier; + uintptr_t new_multiplier = new_size * hash_multiplier1; for (size_t i = 0; i < self->size; ++i) { for (size_t j = 0; j < new_size; ++j) { - size_t ii = (hash_index(self->table[i].key, new_multiplier) + j) % new_size; + size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) % new_size; if (!new[ii].key) { new[ii] = (struct hash_pair){ .key = self->table[i].key, @@ -208,7 +197,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) { dhashtab_ * self = (dhashtab_ *)h; struct hash_pair *cell, *end; again: - cell = self->table + hash_index(key, self->multiplier) % self->size; + cell = self->table + hash_index1(key, self->multiplier) % self->size; end = self->table + self->size - 1; for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) { if (cell->key == key) { @@ -234,7 +223,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) { #pragma omp flush // no locking or atomic access! this is bad dhashtab_ self = *(dhashtab_ *)h; R_xlen_t ret = ifnotfound; - const struct hash_pair * cell = self.table + hash_index(key, self.multiplier) % self.size; + const struct hash_pair * cell = self.table + hash_index1(key, self.multiplier) % self.size; const struct hash_pair * end = self.table + self.size - 1; for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) { if (cell->key == key) { From 09b3725acce257bbc6ef2cb55c36220528bc42e0 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 23 Mar 2025 20:27:52 +0100 Subject: [PATCH 06/12] add rehash --- src/hash.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/hash.c b/src/hash.c index 1a96866aa7..438d894722 100644 --- a/src/hash.c +++ b/src/hash.c @@ -71,6 +71,16 @@ static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) { return ((((uintptr_t)key) >> 6) & 0x0fffffff) * multiplier; } +void hash_rehash(hashtab *h) { + size_t new_size = h->size * 2; + hashtab *new_h = hash_create_(new_size, 0.5); + + for (size_t i = 0; i < h->size; ++i) { + if (h->tb1[i].key) hash_set(new_h, h->tb1[i].key, h->tb1[i].value); + if (h->tb2[i].key) hash_set(new_h, h->tb2[i].key, h->tb2[i].value); + } + *h = *new_h; +} void hash_set(hashtab *h, SEXP key, R_xlen_t value) { size_t max_relocations = h->size; @@ -94,7 +104,9 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { h->tb2[idx2] = item; item = temp; } - internal_error(__func__, "Cuckoo hashing cycle detected, rehash needed"); + // need to rehash + hash_rehash(h); + hash_set(h, key, value); } R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { From 7d4b0672cc03a701ce2a4699ff56b2244f6899bf Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 11:10:17 +0100 Subject: [PATCH 07/12] use power of 2 and mask instead of modulo --- src/hash.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/hash.c b/src/hash.c index 438d894722..f4aa0e7907 100644 --- a/src/hash.c +++ b/src/hash.c @@ -26,7 +26,15 @@ static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) { __func__, "n=%zu / load_factor=%g would overflow size_t", n_elements, load_factor ); - return ceil(n_elements / load_factor); + size_t min_size = ceil(n_elements / load_factor); + // Round up to next power of 2 for fast modulo using bitwise AND + size_t pow2 = 1; + while (pow2 < min_size) { + if (pow2 > SIZE_MAX / 2) + internal_error(__func__, "size %zu would overflow size_t", min_size); // # nocov + pow2 *= 2; + } + return pow2; } static hashtab * hash_create_(size_t n, double load_factor) { @@ -83,19 +91,20 @@ void hash_rehash(hashtab *h) { } void hash_set(hashtab *h, SEXP key, R_xlen_t value) { - size_t max_relocations = h->size; + size_t max_relocations = h->size; + size_t mask = h->size - 1; struct hash_pair item = { .key = key, .value = value }; for (size_t i = 0; i < max_relocations; ++i) { - size_t idx1 = hash_index1(item.key, h->multiplier1) % h->size; + size_t idx1 = hash_index1(item.key, h->multiplier1) & mask; if (!h->tb1[idx1].key) { h->tb1[idx1] = item; return; } struct hash_pair temp = h->tb1[idx1]; h->tb1[idx1] = item; - item = temp; - - size_t idx2 = hash_index2(item.key, h->multiplier2) % h->size; + item = temp; + + size_t idx2 = hash_index2(item.key, h->multiplier2) & mask; if (!h->tb2[idx2].key) { h->tb2[idx2] = item; return; @@ -110,10 +119,11 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { } R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { - size_t idx1 = hash_index1(key, h->multiplier1) % h->size; + size_t mask = h->size - 1; + size_t idx1 = hash_index1(key, h->multiplier1) & mask; if (h->tb1[idx1].key == key) return h->tb1[idx1].value; - - size_t idx2 = hash_index2(key, h->multiplier2) % h->size; + + size_t idx2 = hash_index2(key, h->multiplier2) & mask; if (h->tb2[idx2].key == key) return h->tb2[idx2].value; // Should be impossible with a load factor below 1, but just in case: return ifnotfound; // # nocov @@ -172,11 +182,13 @@ static void dhash_enlarge(dhashtab_ * self) { if (self->size > SIZE_MAX / 2) internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov size_t new_size = self->size * 2; + size_t new_mask = new_size - 1; struct hash_pair * new = dhash_allocate(new_size); uintptr_t new_multiplier = new_size * hash_multiplier1; for (size_t i = 0; i < self->size; ++i) { + if (!self->table[i].key) continue; for (size_t j = 0; j < new_size; ++j) { - size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) % new_size; + size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask; if (!new[ii].key) { new[ii] = (struct hash_pair){ .key = self->table[i].key, @@ -209,7 +221,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) { dhashtab_ * self = (dhashtab_ *)h; struct hash_pair *cell, *end; again: - cell = self->table + hash_index1(key, self->multiplier) % self->size; + cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1)); end = self->table + self->size - 1; for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) { if (cell->key == key) { @@ -235,7 +247,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) { #pragma omp flush // no locking or atomic access! this is bad dhashtab_ self = *(dhashtab_ *)h; R_xlen_t ret = ifnotfound; - const struct hash_pair * cell = self.table + hash_index1(key, self.multiplier) % self.size; + const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1)); const struct hash_pair * end = self.table + self.size - 1; for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) { if (cell->key == key) { From e169d2c4f1fdce613f8602967a4d7e48a98e2d10 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 12:48:29 +0100 Subject: [PATCH 08/12] mix instead of multiplication --- src/hash.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/hash.c b/src/hash.c index f4aa0e7907..6f4d1e22af 100644 --- a/src/hash.c +++ b/src/hash.c @@ -68,15 +68,19 @@ hashtab * hash_create(size_t n) { return hash_create_(n, .5); } // Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4. // This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing. -static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) { +static R_INLINE size_t hash_index1(SEXP key, size_t mask) { // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size. // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees, // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes. - return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier; + uintptr_t h = (uintptr_t)key >> 4; + return h & mask; } -static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) { - return ((((uintptr_t)key) >> 6) & 0x0fffffff) * multiplier; +static R_INLINE size_t hash_index2(SEXP key, size_t mask) { + // Use XOR folding to mix up the bits + uintptr_t h = (uintptr_t)key >> 4; + h ^= h >> 10; + return h & mask; } void hash_rehash(hashtab *h) { @@ -95,7 +99,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { size_t mask = h->size - 1; struct hash_pair item = { .key = key, .value = value }; for (size_t i = 0; i < max_relocations; ++i) { - size_t idx1 = hash_index1(item.key, h->multiplier1) & mask; + size_t idx1 = hash_index1(item.key, mask); if (!h->tb1[idx1].key) { h->tb1[idx1] = item; return; @@ -104,7 +108,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { h->tb1[idx1] = item; item = temp; - size_t idx2 = hash_index2(item.key, h->multiplier2) & mask; + size_t idx2 = hash_index2(item.key, mask); if (!h->tb2[idx2].key) { h->tb2[idx2] = item; return; @@ -120,10 +124,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { size_t mask = h->size - 1; - size_t idx1 = hash_index1(key, h->multiplier1) & mask; + size_t idx1 = hash_index1(key, mask); if (h->tb1[idx1].key == key) return h->tb1[idx1].value; - size_t idx2 = hash_index2(key, h->multiplier2) & mask; + size_t idx2 = hash_index2(key, mask); if (h->tb2[idx2].key == key) return h->tb2[idx2].value; // Should be impossible with a load factor below 1, but just in case: return ifnotfound; // # nocov @@ -188,7 +192,7 @@ static void dhash_enlarge(dhashtab_ * self) { for (size_t i = 0; i < self->size; ++i) { if (!self->table[i].key) continue; for (size_t j = 0; j < new_size; ++j) { - size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask; + size_t ii = (hash_index1(self->table[i].key, new_mask) + j) & new_mask; if (!new[ii].key) { new[ii] = (struct hash_pair){ .key = self->table[i].key, @@ -221,7 +225,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) { dhashtab_ * self = (dhashtab_ *)h; struct hash_pair *cell, *end; again: - cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1)); + cell = self->table + hash_index1(key, self->size - 1); end = self->table + self->size - 1; for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) { if (cell->key == key) { @@ -247,7 +251,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) { #pragma omp flush // no locking or atomic access! this is bad dhashtab_ self = *(dhashtab_ *)h; R_xlen_t ret = ifnotfound; - const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1)); + const struct hash_pair * cell = self.table + hash_index1(key, self.size - 1); const struct hash_pair * end = self.table + self.size - 1; for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) { if (cell->key == key) { From b017013da3af9f8a494eb151e6b4a67621c4fa53 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 13:07:27 +0100 Subject: [PATCH 09/12] use different mixes --- src/hash.c | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/hash.c b/src/hash.c index 6f4d1e22af..ee4a2d16ca 100644 --- a/src/hash.c +++ b/src/hash.c @@ -12,9 +12,15 @@ struct hash_tab { struct hash_pair *tb1, *tb2; }; -// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio. -static const double hash_multiplier1 = 0.618033988749895; -static const double hash_multiplier2 = 0.316227766016838; +// Fast integer hash multipliers based on golden ratio and other constants +// 0x9e3779b9 is 2^32 * phi (golden ratio) for 32-bit mixing +#if SIZE_MAX == UINT64_MAX + static const uintptr_t hash_multiplier1 = 0x9e3779b97f4a7c15ULL; + static const uintptr_t hash_multiplier2 = 0x85ebca77c2b2ae35ULL; +#else + static const uintptr_t hash_multiplier1 = 0x9e3779b9U; + static const uintptr_t hash_multiplier2 = 0x85ebca77U; +#endif static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) { if (load_factor <= 0 || load_factor >= 1) @@ -51,7 +57,7 @@ static hashtab * hash_create_(size_t n, double load_factor) { hashtab *ret = (hashtab *)R_alloc(sizeof(hashtab), 1); ret->size = n_full; ret->free = n; - // To compute floor(size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size. + // Multiply by size to get different hash functions when rehashing ret->multiplier1 = n_full * hash_multiplier1; ret->multiplier2 = n_full * hash_multiplier2; ret->tb1 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); @@ -66,21 +72,22 @@ static hashtab * hash_create_(size_t n, double load_factor) { hashtab * hash_create(size_t n) { return hash_create_(n, .5); } -// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4. -// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing. -static R_INLINE size_t hash_index1(SEXP key, size_t mask) { - // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size. - // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees, - // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes. +// Fast hash mixing using XOR-shift and integer multiplication +static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) { uintptr_t h = (uintptr_t)key >> 4; - return h & mask; + // XOR folding to mix high bits into low bits + h ^= h >> 16; + h *= multiplier; + h ^= h >> 13; + return h; } -static R_INLINE size_t hash_index2(SEXP key, size_t mask) { - // Use XOR folding to mix up the bits - uintptr_t h = (uintptr_t)key >> 4; - h ^= h >> 10; - return h & mask; +static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) { + uintptr_t h = (uintptr_t)key >> 6; + h ^= h >> 18; + h *= multiplier; + h ^= h >> 15; + return h; } void hash_rehash(hashtab *h) { @@ -99,7 +106,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { size_t mask = h->size - 1; struct hash_pair item = { .key = key, .value = value }; for (size_t i = 0; i < max_relocations; ++i) { - size_t idx1 = hash_index1(item.key, mask); + size_t idx1 = hash_index1(item.key, h->multiplier1) & mask; if (!h->tb1[idx1].key) { h->tb1[idx1] = item; return; @@ -108,7 +115,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { h->tb1[idx1] = item; item = temp; - size_t idx2 = hash_index2(item.key, mask); + size_t idx2 = hash_index2(item.key, h->multiplier2) & mask; if (!h->tb2[idx2].key) { h->tb2[idx2] = item; return; @@ -124,10 +131,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { size_t mask = h->size - 1; - size_t idx1 = hash_index1(key, mask); + size_t idx1 = hash_index1(key, h->multiplier1) & mask; if (h->tb1[idx1].key == key) return h->tb1[idx1].value; - size_t idx2 = hash_index2(key, mask); + size_t idx2 = hash_index2(key, h->multiplier2) & mask; if (h->tb2[idx2].key == key) return h->tb2[idx2].value; // Should be impossible with a load factor below 1, but just in case: return ifnotfound; // # nocov @@ -192,7 +199,7 @@ static void dhash_enlarge(dhashtab_ * self) { for (size_t i = 0; i < self->size; ++i) { if (!self->table[i].key) continue; for (size_t j = 0; j < new_size; ++j) { - size_t ii = (hash_index1(self->table[i].key, new_mask) + j) & new_mask; + size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask; if (!new[ii].key) { new[ii] = (struct hash_pair){ .key = self->table[i].key, @@ -225,7 +232,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) { dhashtab_ * self = (dhashtab_ *)h; struct hash_pair *cell, *end; again: - cell = self->table + hash_index1(key, self->size - 1); + cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1)); end = self->table + self->size - 1; for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) { if (cell->key == key) { @@ -251,7 +258,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) { #pragma omp flush // no locking or atomic access! this is bad dhashtab_ self = *(dhashtab_ *)h; R_xlen_t ret = ifnotfound; - const struct hash_pair * cell = self.table + hash_index1(key, self.size - 1); + const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1)); const struct hash_pair * end = self.table + self.size - 1; for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) { if (cell->key == key) { From 5a474e0eb4b22a825b3416934793b73be9433a18 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 13:20:54 +0100 Subject: [PATCH 10/12] change multipliers --- src/hash.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/hash.c b/src/hash.c index ee4a2d16ca..fff465c816 100644 --- a/src/hash.c +++ b/src/hash.c @@ -12,15 +12,10 @@ struct hash_tab { struct hash_pair *tb1, *tb2; }; -// Fast integer hash multipliers based on golden ratio and other constants -// 0x9e3779b9 is 2^32 * phi (golden ratio) for 32-bit mixing -#if SIZE_MAX == UINT64_MAX - static const uintptr_t hash_multiplier1 = 0x9e3779b97f4a7c15ULL; - static const uintptr_t hash_multiplier2 = 0x85ebca77c2b2ae35ULL; -#else - static const uintptr_t hash_multiplier1 = 0x9e3779b9U; - static const uintptr_t hash_multiplier2 = 0x85ebca77U; -#endif +// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio. +// +static const double hash_multiplier1 = 0.618033988749895; +static const double hash_multiplier2 = 0.316227766016838; static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) { if (load_factor <= 0 || load_factor >= 1) From 1d88ad4c9dd179fb815266fb3acb687a89b8b7c7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 13:35:20 +0100 Subject: [PATCH 11/12] use double hashing --- src/chmatch.c | 2 ++ src/hash.c | 61 ++++++++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/chmatch.c b/src/chmatch.c index ac3851b1f1..dd474853fc 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -101,10 +101,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch free(counts); free(map); } else if (chin) { + #pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false)) for (int i=0; i 100000) schedule(static) num_threads(getDTthreads(xlen, false)) for (int i=0; imultiplier1 = n_full * hash_multiplier1; ret->multiplier2 = n_full * hash_multiplier2; - ret->tb1 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); - ret->tb2 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); + ret->table = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1); // No valid SEXP is a null pointer, so it's a safe marker for empty cells. for (size_t i = 0; i < n_full; ++i) { - ret->tb1[i].key = NULL; - ret->tb2[i].key = NULL; + ret->table[i].key = NULL; } return ret; } @@ -90,35 +88,36 @@ void hash_rehash(hashtab *h) { hashtab *new_h = hash_create_(new_size, 0.5); for (size_t i = 0; i < h->size; ++i) { - if (h->tb1[i].key) hash_set(new_h, h->tb1[i].key, h->tb1[i].value); - if (h->tb2[i].key) hash_set(new_h, h->tb2[i].key, h->tb2[i].value); + if (h->table[i].key) hash_set(new_h, h->table[i].key, h->table[i].value); } - *h = *new_h; + *h = *new_h; } void hash_set(hashtab *h, SEXP key, R_xlen_t value) { - size_t max_relocations = h->size; size_t mask = h->size - 1; - struct hash_pair item = { .key = key, .value = value }; - for (size_t i = 0; i < max_relocations; ++i) { - size_t idx1 = hash_index1(item.key, h->multiplier1) & mask; - if (!h->tb1[idx1].key) { - h->tb1[idx1] = item; + size_t h1 = hash_index1(key, h->multiplier1) & mask; + size_t h2 = hash_index2(key, h->multiplier2) & mask; + + if (h2 == 0) h2 = 1; + else if ((h2 & 1) == 0) h2 |= 1; + + for (size_t i = 0; i < h->size; ++i) { + size_t idx = (h1 + i * h2) & mask; + + if (!h->table[idx].key) { + // Empty slot found + h->table[idx].key = key; + h->table[idx].value = value; + h->free--; return; } - struct hash_pair temp = h->tb1[idx1]; - h->tb1[idx1] = item; - item = temp; - size_t idx2 = hash_index2(item.key, h->multiplier2) & mask; - if (!h->tb2[idx2].key) { - h->tb2[idx2] = item; + if (h->table[idx].key == key) { + h->table[idx].value = value; return; } - temp = h->tb2[idx2]; - h->tb2[idx2] = item; - item = temp; } + // need to rehash hash_rehash(h); hash_set(h, key, value); @@ -126,12 +125,18 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) { R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { size_t mask = h->size - 1; - size_t idx1 = hash_index1(key, h->multiplier1) & mask; - if (h->tb1[idx1].key == key) return h->tb1[idx1].value; + size_t h1 = hash_index1(key, h->multiplier1) & mask; + size_t h2 = hash_index2(key, h->multiplier2) & mask; + + if (h2 == 0) h2 = 1; + else if ((h2 & 1) == 0) h2 |= 1; + + for (size_t i = 0; i < h->size; ++i) { + size_t idx = (h1 + i * h2) & mask; + if (!h->table[idx].key) return ifnotfound; + if (h->table[idx].key == key) return h->table[idx].value; + } - size_t idx2 = hash_index2(key, h->multiplier2) & mask; - if (h->tb2[idx2].key == key) return h->tb2[idx2].value; - // Should be impossible with a load factor below 1, but just in case: return ifnotfound; // # nocov } From 48b19422dd331a9d7eaddff507563bcf9643ebdd Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 6 Nov 2025 15:52:05 +0100 Subject: [PATCH 12/12] remove xor folding --- src/chmatch.c | 4 ++-- src/hash.c | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/chmatch.c b/src/chmatch.c index dd474853fc..34c50c22b3 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -101,12 +101,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch free(counts); free(map); } else if (chin) { - #pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false)) + #pragma omp parallel for num_threads(getDTthreads(xlen, true)) for (int i=0; i 100000) schedule(static) num_threads(getDTthreads(xlen, false)) + #pragma omp parallel for num_threads(getDTthreads(xlen, true)) for (int i=0; i> 4; - // XOR folding to mix high bits into low bits - h ^= h >> 16; - h *= multiplier; - h ^= h >> 13; - return h; + // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size. + // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees, + // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes. + return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier; } static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) { - uintptr_t h = (uintptr_t)key >> 6; - h ^= h >> 18; - h *= multiplier; - h ^= h >> 15; - return h; + // For double hashing, we need a different hash that's coprime with table size. + // We use higher-order bits that hash_index1 mostly ignores, and ensure + // the result is always odd (coprime with power-of-2 table sizes). + uintptr_t ptr = (uintptr_t)key; + ptr = (ptr >> 12) | (ptr << (sizeof(uintptr_t) * 8 - 12)); + return ((ptr & 0x0fffffff) * multiplier) | 1; } void hash_rehash(hashtab *h) {