Skip to content

Commit e931d20

Browse files
committed
MDEV-29930 Lock order inversion in ibuf_remove_free_page()
The function ibuf_remove_free_page() was waiting for ibuf_mutex while holding ibuf.index->lock. This constitutes a lock order inversion and may cause InnoDB to hang when innodb_change_buffering is enabled and ibuf_merge_or_delete_for_page() is being executed concurrently. In fact, there is no need for ibuf_remove_free_page() to reacquire ibuf_mutex if we make ibuf.seg_size and ibuf.free_list_len protected by the ibuf.index->lock as well as the root page latch rather than by ibuf_mutex. ibuf.seg_size, ibuf.free_list_len: Instead of ibuf_mutex, let the ibuf.index->lock and the root page latch protect these, like ibuf.empty. ibuf_init_at_db_start(): Acquire the root page latch before updating ibuf.seg_size. (The ibuf.index would be created later.) ibuf_data_enough_free_for_insert(), ibuf_data_too_much_free(): Assert also ibuf.index->lock.have_u_or_x(). ibuf_remove_free_page(): Acquire the ibuf.index->lock and the root page latch before accessing ibuf.free_list_len. Simplify the way how the root page latch is released and reacquired. Acquire and release ibuf_mutex only once. ibuf_free_excess_pages(), ibuf_insert_low(): Acquire also ibuf.index->lock before reading ibuf.free_list_len. ibuf_print(): Acquire ibuf.index->lock before reading ibuf.free_list_len and ibuf.seg_size. Reviewed by: Vladislav Lesin
1 parent 77a3d7a commit e931d20

File tree

2 files changed

+41
-35
lines changed

2 files changed

+41
-35
lines changed

storage/innobase/ibuf/ibuf0ibuf.cc

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ ibuf_size_update(
371371
const page_t* root) /*!< in: ibuf tree root */
372372
{
373373
mysql_mutex_assert_owner(&ibuf_mutex);
374+
ut_ad(!ibuf.index || ibuf.index->lock.have_u_or_x());
374375

375376
ibuf.free_list_len = flst_get_len(root + PAGE_HEADER
376377
+ PAGE_BTR_IBUF_FREE_LIST);
@@ -414,6 +415,15 @@ ibuf_init_at_db_start(void)
414415
return err;
415416
}
416417

418+
if (buf_block_t* block =
419+
buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
420+
FSP_IBUF_TREE_ROOT_PAGE_NO),
421+
0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) {
422+
root = buf_block_get_frame(block);
423+
} else {
424+
goto err_exit;
425+
}
426+
417427
fseg_n_reserved_pages(*header_page,
418428
IBUF_HEADER + IBUF_TREE_SEG_HEADER
419429
+ header_page->page.frame, &ibuf.seg_size, &mtr);
@@ -424,15 +434,6 @@ ibuf_init_at_db_start(void)
424434
ut_ad(ibuf.seg_size >= 2);
425435
} while (0);
426436

427-
if (buf_block_t* block =
428-
buf_page_get_gen(page_id_t(IBUF_SPACE_ID,
429-
FSP_IBUF_TREE_ROOT_PAGE_NO),
430-
0, RW_X_LATCH, nullptr, BUF_GET, &mtr, &err)) {
431-
root = buf_block_get_frame(block);
432-
} else {
433-
goto err_exit;
434-
}
435-
436437
DBUG_EXECUTE_IF("ibuf_init_corrupt",
437438
err = DB_CORRUPTION;
438439
goto err_exit;);
@@ -1741,6 +1742,7 @@ dare to start a pessimistic insert to the insert buffer.
17411742
static inline bool ibuf_data_enough_free_for_insert()
17421743
{
17431744
mysql_mutex_assert_owner(&ibuf_mutex);
1745+
ut_ad(ibuf.index->lock.have_u_or_x());
17441746

17451747
/* We want a big margin of free pages, because a B-tree can sometimes
17461748
grow in size also if records are deleted from it, as the node pointers
@@ -1754,13 +1756,11 @@ static inline bool ibuf_data_enough_free_for_insert()
17541756
/*********************************************************************//**
17551757
Checks if there are enough pages in the free list of the ibuf tree that we
17561758
should remove them and free to the file space management.
1757-
@return TRUE if enough free pages in list */
1758-
UNIV_INLINE
1759-
ibool
1760-
ibuf_data_too_much_free(void)
1761-
/*=========================*/
1759+
@return whether enough free pages in list */
1760+
static inline bool ibuf_data_too_much_free()
17621761
{
17631762
mysql_mutex_assert_owner(&ibuf_mutex);
1763+
ut_ad(ibuf.index->lock.have_u_or_x());
17641764

17651765
return(ibuf.free_list_len >= 3 + (ibuf.size / 2) + 3 * ibuf.height);
17661766
}
@@ -1839,6 +1839,7 @@ static bool ibuf_add_free_page()
18391839
goto corrupted;
18401840
}
18411841

1842+
ut_ad(ibuf.index->lock.have_u_or_x());
18421843
ibuf.seg_size++;
18431844
ibuf.free_list_len++;
18441845

@@ -1876,7 +1877,7 @@ static dberr_t ibuf_remove_free_page(bool all = false)
18761877
mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
18771878
mysql_mutex_lock(&ibuf_mutex);
18781879

1879-
if (!header_page || (!all && !ibuf_data_too_much_free())) {
1880+
if (!header_page) {
18801881
early_exit:
18811882
mysql_mutex_unlock(&ibuf_mutex);
18821883
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
@@ -1892,7 +1893,10 @@ static dberr_t ibuf_remove_free_page(bool all = false)
18921893
goto early_exit;
18931894
}
18941895

1895-
const auto root_savepoint = mtr.get_savepoint() - 1;
1896+
if (!all && !ibuf_data_too_much_free()) {
1897+
goto early_exit;
1898+
}
1899+
18961900
const uint32_t page_no = flst_get_last(PAGE_HEADER
18971901
+ PAGE_BTR_IBUF_FREE_LIST
18981902
+ root->page.frame).page;
@@ -1910,7 +1914,6 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19101914
because in fseg_free_page we access level 1 pages, and the root
19111915
is a level 2 page. */
19121916
root->page.lock.u_unlock();
1913-
mtr.lock_register(root_savepoint, MTR_MEMO_BUF_FIX);
19141917
ibuf_exit(&mtr);
19151918

19161919
/* Since pessimistic inserts were prevented, we know that the
@@ -1926,15 +1929,14 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19261929
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
19271930
fil_system.sys_space, page_no, &mtr);
19281931

1932+
root->page.lock.u_lock();
1933+
19291934
if (err != DB_SUCCESS) {
19301935
goto func_exit;
19311936
}
19321937

19331938
ibuf_enter(&mtr);
19341939

1935-
mysql_mutex_lock(&ibuf_mutex);
1936-
mtr.upgrade_buffer_fix(root_savepoint, RW_X_LATCH);
1937-
19381940
/* Remove the page from the free list and update the ibuf size data */
19391941
if (buf_block_t* block =
19401942
buf_page_get_gen(page_id, 0, RW_X_LATCH, nullptr, BUF_GET,
@@ -1946,6 +1948,7 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19461948
}
19471949

19481950
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
1951+
ut_ad(ibuf.index->lock.have_u_or_x());
19491952

19501953
if (err == DB_SUCCESS) {
19511954
ibuf.seg_size--;
@@ -1954,8 +1957,6 @@ static dberr_t ibuf_remove_free_page(bool all = false)
19541957
}
19551958

19561959
func_exit:
1957-
mysql_mutex_unlock(&ibuf_mutex);
1958-
19591960
if (bitmap_page) {
19601961
/* Set the bit indicating that this page is no more an
19611962
ibuf tree page (level 2 page) */
@@ -1983,12 +1984,11 @@ ibuf_free_excess_pages(void)
19831984
requested service too much */
19841985

19851986
for (ulint i = 0; i < 4; i++) {
1986-
1987-
ibool too_much_free;
1988-
19891987
mysql_mutex_lock(&ibuf_mutex);
1990-
too_much_free = ibuf_data_too_much_free();
1988+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
1989+
bool too_much_free = ibuf_data_too_much_free();
19911990
mysql_mutex_unlock(&ibuf_mutex);
1991+
ibuf.index->lock.u_unlock();
19921992

19931993
if (!too_much_free) {
19941994
return;
@@ -3169,8 +3169,11 @@ ibuf_insert_low(
31693169
for (;;) {
31703170
mysql_mutex_lock(&ibuf_pessimistic_insert_mutex);
31713171
mysql_mutex_lock(&ibuf_mutex);
3172+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
3173+
bool enough = ibuf_data_enough_free_for_insert();
3174+
ibuf.index->lock.u_unlock();
31723175

3173-
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3176+
if (UNIV_LIKELY(enough)) {
31743177

31753178
break;
31763179
}
@@ -4486,8 +4489,11 @@ ibuf_print(
44864489
}
44874490

44884491
const uint32_t size= ibuf.size;
4492+
ibuf.index->lock.u_lock(SRW_LOCK_CALL);
44894493
const uint32_t free_list_len= ibuf.free_list_len;
44904494
const uint32_t seg_size= ibuf.seg_size;
4495+
ibuf.index->lock.u_unlock();
4496+
44914497
mysql_mutex_unlock(&ibuf_mutex);
44924498

44934499
fprintf(file,

storage/innobase/include/ibuf0ibuf.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@ struct ibuf_t{
6868
ibuf index tree, in pages */
6969
uint32_t seg_size; /*!< allocated pages of the file
7070
segment containing ibuf header and
71-
tree */
72-
bool empty; /*!< Protected by the page
73-
latch of the root page of the
74-
insert buffer tree
75-
(FSP_IBUF_TREE_ROOT_PAGE_NO). true
76-
if and only if the insert
77-
buffer tree is empty. */
71+
tree; protected by ibuf.index->lock
72+
and the root page latch */
73+
bool empty; /*!< whether the change buffer is
74+
empty; protected by ibuf.index->lock
75+
and the root page latch */
7876
uint8_t height; /*!< tree height */
79-
uint32_t free_list_len; /*!< length of the free list */
77+
uint32_t free_list_len; /*!< length of the free list;
78+
protected by ibuf.index->lock and
79+
the root page latch */
8080
dict_index_t* index; /*!< insert buffer index */
8181

8282
/** number of pages merged */

0 commit comments

Comments
 (0)