Skip to content

Commit 5ff7fec

Browse files
committed
WIP MDEV-37482: Refactor btr_sea::hash_table
Let us prepare to push down the btr_sea::partition::latch to the hash table itself. This is work in progress, only refactoring the hash table, not moving the latch itself!
1 parent 21868be commit 5ff7fec

File tree

3 files changed

+133
-23
lines changed

3 files changed

+133
-23
lines changed

storage/innobase/btr/btr0sea.cc

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,24 @@ struct ahi_node {
6363
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
6464
};
6565

66+
template<typename UnaryPred>
67+
inline ahi_node *btr_sea::hash_chain::find(UnaryPred u) const noexcept
68+
{
69+
ahi_node *node= first;
70+
while (node && !u(node))
71+
node= node->next;
72+
return node;
73+
}
74+
75+
template<typename UnaryPred>
76+
inline ahi_node **btr_sea::hash_chain::search(UnaryPred u) noexcept
77+
{
78+
ahi_node **prev= &first;
79+
while (!u(*prev))
80+
prev= &(*prev)->next;
81+
return prev;
82+
}
83+
6684
inline void btr_sea::partition::init() noexcept
6785
{
6886
latch.SRW_LOCK_INIT(btr_search_latch_key);
@@ -108,6 +126,16 @@ inline void btr_sea::partition::free() noexcept
108126
blocks_mutex.destroy();
109127
}
110128

129+
inline void btr_sea::hash_table::create(ulint n) noexcept
130+
{
131+
n_cells= ut_find_prime(n);
132+
const size_t size= MY_ALIGN(pad(n_cells) * sizeof *array,
133+
CPU_LEVEL1_DCACHE_LINESIZE);
134+
void *v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE);
135+
memset_aligned<CPU_LEVEL1_DCACHE_LINESIZE>(v, 0, size);
136+
array= static_cast<hash_chain*>(v);
137+
}
138+
111139
inline void btr_sea::partition::alloc(ulint hash_size) noexcept
112140
{
113141
table.create(hash_size);
@@ -665,8 +693,8 @@ void btr_sea::partition::insert(uint32_t fold, const rec_t *rec) noexcept
665693
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
666694
ut_ad(btr_search.enabled);
667695

668-
ahi_node **prev= table.cell_get(fold)->
669-
search(&ahi_node::next, [fold](const ahi_node *node)
696+
ahi_node **prev= table.cell_get(fold).
697+
search([fold](const ahi_node *node)
670698
{ return !node || node->fold == fold; });
671699
ahi_node *node= *prev;
672700

@@ -761,8 +789,8 @@ buf_block_t *btr_sea::partition::cleanup_after_erase(ahi_node *erase) noexcept
761789
{
762790
/* Shrink the allocation by replacing the erased element with the top. */
763791
*erase= *top;
764-
ahi_node **prev= table.cell_get(top->fold)->
765-
search(&ahi_node::next, [top](const ahi_node *n) { return n == top; });
792+
ahi_node **prev= table.cell_get(top->fold).
793+
search([top](const ahi_node *n) { return n == top; });
766794
*prev= erase;
767795
}
768796

@@ -805,12 +833,12 @@ static void ha_remove_all_nodes_to_page(btr_sea::partition &part,
805833
uint32_t fold, const page_t *page)
806834
noexcept
807835
{
808-
hash_cell_t *cell= part.table.cell_get(fold);
836+
btr_sea::hash_chain &cell= part.table.cell_get(fold);
809837
const uintptr_t page_size{srv_page_size};
810838

811839
rewind:
812840
ahi_node **prev=
813-
cell->search(&ahi_node::next, [page,page_size](const ahi_node *node)
841+
cell.search([page,page_size](const ahi_node *node)
814842
{ return !node || (uintptr_t(node->rec) ^ uintptr_t(page)) < page_size; });
815843

816844
if (ahi_node *node= *prev)
@@ -824,7 +852,7 @@ static void ha_remove_all_nodes_to_page(btr_sea::partition &part,
824852
}
825853

826854
/* Check that all nodes really got deleted */
827-
ut_ad(!cell->find(&ahi_node::next, [page](const ahi_node* node)
855+
ut_ad(!cell.find([page](const ahi_node* node)
828856
{ return page_align(node->rec) == page; }));
829857
}
830858

@@ -835,8 +863,8 @@ inline bool btr_sea::partition::erase(uint32_t fold, const rec_t *rec) noexcept
835863
#endif
836864
ut_ad(btr_search.enabled);
837865

838-
ahi_node **prev= table.cell_get(fold)->
839-
search(&ahi_node::next, [rec](const ahi_node *node)
866+
ahi_node **prev= table.cell_get(fold).
867+
search([rec](const ahi_node *node)
840868
{ return !node || node->rec == rec; });
841869

842870
if (ahi_node *node= *prev)
@@ -862,7 +890,8 @@ updates the pointer to data if found.
862890
@param data pointer to the data
863891
@param new_data new pointer to the data
864892
@return whether the element was found */
865-
static bool ha_search_and_update_if_found(hash_table_t *table, uint32_t fold,
893+
static bool ha_search_and_update_if_found(btr_sea::hash_table *table,
894+
uint32_t fold,
866895
const rec_t *data,
867896
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
868897
/** block containing new_data */
@@ -875,9 +904,8 @@ static bool ha_search_and_update_if_found(hash_table_t *table, uint32_t fold,
875904
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
876905
ut_ad(btr_search.enabled);
877906

878-
if (ahi_node *node= table->cell_get(fold)->
879-
find(&ahi_node::next, [data](const ahi_node *node)
880-
{ return node->rec == data; }))
907+
if (ahi_node *node= table->cell_get(fold).
908+
find([data](const ahi_node *node){ return node->rec == data; }))
881909
{
882910
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
883911
if (node->block != new_block)
@@ -1027,9 +1055,8 @@ btr_search_guess_on_hash(
10271055
return false;
10281056
}
10291057

1030-
const ahi_node *node= part.table.cell_get(fold)->
1031-
find(&ahi_node::next, [fold](const ahi_node* node)
1032-
{ return node->fold == fold; });
1058+
const ahi_node *node= part.table.cell_get(fold).
1059+
find([fold](const ahi_node* node){ return node->fold == fold; });
10331060

10341061
if (!node)
10351062
{
@@ -1783,17 +1810,18 @@ void btr_search_update_hash_on_insert(btr_cur_t *cursor, bool reorg) noexcept
17831810
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
17841811
__attribute__((nonnull))
17851812
/** @return whether a range of the cells is valid */
1786-
static bool ha_validate(const hash_table_t *table,
1813+
static bool ha_validate(const btr_sea::hash_table *table,
17871814
ulint start_index, ulint end_index)
17881815
{
17891816
ut_a(start_index <= end_index);
17901817
ut_a(end_index < table->n_cells);
17911818

17921819
bool ok= true;
17931820

1821+
/* FIXME: skip latches */
17941822
for (ulint i= start_index; i <= end_index; i++)
17951823
{
1796-
for (auto node= static_cast<const ahi_node*>(table->array[i].node); node;
1824+
for (auto node= static_cast<const ahi_node*>(table->array[i].first); node;
17971825
node= node->next)
17981826
{
17991827
if (table->calc_hash(node->fold) != i) {
@@ -1850,7 +1878,7 @@ static bool btr_search_hash_table_validate(THD *thd, ulint hash_table_id)
18501878
btr_sea::partition& part = btr_search.parts[hash_table_id];
18511879

18521880
cell_count = part.table.n_cells;
1853-
1881+
/* FIXME: skip latches */
18541882
for (i = 0; i < cell_count; i++) {
18551883
/* We release search latches every once in a while to
18561884
give other queries a chance to run. */
@@ -1882,7 +1910,7 @@ static bool btr_search_hash_table_validate(THD *thd, ulint hash_table_id)
18821910
}
18831911
}
18841912

1885-
node = static_cast<ahi_node*>(part.table.array[i].node);
1913+
node = static_cast<ahi_node*>(part.table.array[i].first);
18861914

18871915
for (; node != NULL; node = node->next) {
18881916
const buf_block_t* block

storage/innobase/include/btr0sea.h

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,94 @@ struct btr_sea
117117
@param resize whether buf_pool_t::resize() is the caller */
118118
ATTRIBUTE_COLD void enable(bool resize= false) noexcept;
119119

120+
/** Hash cell chain in hash_table */
121+
struct hash_chain
122+
{
123+
/** pointer to the first block */
124+
ahi_node *first;
125+
126+
/** Find an element.
127+
@param u unary predicate for searching the element
128+
@return the first matching element
129+
@retval nullptr if not found */
130+
template<typename UnaryPred>
131+
inline ahi_node *find(UnaryPred u) const noexcept;
132+
133+
/** Search for a pointer to an element.
134+
@tparam UnaryPred unary predicate
135+
@param u unary predicate for searching the element
136+
@return pointer to the first matching element,
137+
or to the last element in the chain */
138+
template<typename UnaryPred>
139+
inline ahi_node **search(UnaryPred u) noexcept;
140+
};
141+
142+
/** Hash table with singly-linked overflow lists.
143+
FIXME: deduplicate with buf_pool_t::page_hash_table */
144+
struct hash_table
145+
{
146+
static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes");
147+
static_assert(!(CPU_LEVEL1_DCACHE_LINESIZE & 63),
148+
"not a multiple of 64 bytes");
149+
150+
/** Number of array[] elements per page_hash_latch.
151+
Must be one less than a power of 2. */
152+
static constexpr size_t ELEMENTS_PER_LATCH= 64 / sizeof(void*) - 1;
153+
static constexpr size_t EMPTY_SLOTS_PER_LATCH=
154+
((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*));
155+
156+
/** number of payload elements in array[] */
157+
Atomic_relaxed<ulint> n_cells;
158+
/** the hash table, with pad(n_cells) elements, aligned to L1 cache size */
159+
hash_chain *array;
160+
161+
/** Create the hash table.
162+
@param n the lower bound of n_cells */
163+
inline void create(ulint n) noexcept;
164+
165+
/** Free the hash table. */
166+
void free() noexcept { aligned_free(array); array= nullptr; }
167+
168+
/** @return the index of an array element */
169+
ulint calc_hash(ulint fold) const noexcept
170+
{ return calc_hash(fold, n_cells); }
171+
/** @return raw array index converted to padded index */
172+
static ulint pad(ulint h) noexcept
173+
{
174+
ulint latches= h / ELEMENTS_PER_LATCH;
175+
ulint empty_slots= latches * EMPTY_SLOTS_PER_LATCH;
176+
return 1 + latches + empty_slots + h;
177+
}
178+
private:
179+
/** @return the index of an array element */
180+
static ulint calc_hash(ulint fold, ulint n_cells) noexcept
181+
{
182+
return pad(fold % n_cells);
183+
}
184+
public:
185+
/** @return the latch covering a hash table chain */
186+
static page_hash_latch &lock_get(hash_chain &chain) noexcept
187+
{
188+
static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
189+
"must be one less than a power of 2");
190+
const size_t addr= reinterpret_cast<size_t>(&chain);
191+
ut_ad(addr & (ELEMENTS_PER_LATCH * sizeof chain));
192+
return *reinterpret_cast<page_hash_latch*>
193+
(addr & ~(ELEMENTS_PER_LATCH * sizeof chain));
194+
}
195+
196+
/** Get a hash table slot. */
197+
hash_chain &cell_get(ulint fold) const
198+
{ return array[calc_hash(fold, n_cells)]; }
199+
};
200+
120201
/** Partition of the hash table */
121202
struct partition
122203
{
123204
/** latch protecting table */
124-
alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock latch;
205+
alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock latch; /* FIXME: remove this */
125206
/** map of CRC-32C of rec prefix to rec_t* in buf_page_t::frame */
126-
hash_table_t table;
207+
hash_table table;
127208
/** latch protecting blocks, spare; may be acquired while holding latch */
128209
srw_mutex blocks_mutex;
129210
/** allocated blocks */

storage/innobase/srv/srv0srv.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ srv_printf_innodb_monitor(
746746
part.blocks_mutex.wr_lock();
747747
fprintf(file, "Hash table size " ULINTPF
748748
", node heap has " ULINTPF " buffer(s)\n",
749-
part.table.n_cells, part.blocks.count + !!part.spare);
749+
size_t{part.table.n_cells},
750+
part.blocks.count + !!part.spare);
750751
part.blocks_mutex.wr_unlock();
751752
}
752753

0 commit comments

Comments
 (0)