Skip to content

Commit e5c1970

Browse files
ayurchenAlexey Yurchenko
authored andcommitted
MDEV-37494 Diagnostics_area does not always contain apply error info
It appears that some error conditions don't store error information in the Diagnostics_area. For example when table_def::compatible_with() check fails error message is stored in Relay_log_info instead. This results in optimistically identical votes and zero error buffer size breaks wsrep-lib logic as it relies on error buffer size to decide whether voting took place. To account for this, first try to obtain error info from Diagnostics_area, then fallback to Relay_log_info. If that fails use some "random" data to distinguish this condition from success in production.
1 parent a1bba0e commit e5c1970

File tree

4 files changed

+196
-4
lines changed

4 files changed

+196
-4
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
connection node_2;
2+
connection node_1;
3+
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 BLOB) ENGINE=InnoDB;
4+
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
5+
connection node_3;
6+
SET GLOBAL wsrep_on=OFF;
7+
ALTER TABLE t1 MODIFY f2 LONGTEXT;
8+
SET GLOBAL wsrep_on=ON;
9+
INSERT INTO t1 VALUES (3, 'a');
10+
connection node_1;
11+
SHOW STATUS LIKE 'wsrep_cluster_status';
12+
Variable_name Value
13+
wsrep_cluster_status Primary
14+
connection node_2;
15+
SHOW STATUS LIKE 'wsrep_cluster_status';
16+
Variable_name Value
17+
wsrep_cluster_status Primary
18+
INSERT INTO t1 VALUES (2, 'a');
19+
connection node_3;
20+
SET SESSION wsrep_sync_wait=0;
21+
SET SESSION wsrep_on=OFF;
22+
# restart
23+
SET SESSION wsrep_on=ON;
24+
INSERT INTO t1 VALUES (3, 'a');
25+
connection node_1;
26+
SHOW CREATE TABLE t1;
27+
Table Create Table
28+
t1 CREATE TABLE `t1` (
29+
`f1` int(11) NOT NULL,
30+
`f2` blob DEFAULT NULL,
31+
PRIMARY KEY (`f1`)
32+
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
33+
SELECT * FROM t1;
34+
f1 f2
35+
2 a
36+
3 a
37+
connection node_2;
38+
SHOW CREATE TABLE t1;
39+
Table Create Table
40+
t1 CREATE TABLE `t1` (
41+
`f1` int(11) NOT NULL,
42+
`f2` blob DEFAULT NULL,
43+
PRIMARY KEY (`f1`)
44+
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
45+
SELECT * FROM t1;
46+
f1 f2
47+
2 a
48+
3 a
49+
connection node_3;
50+
SHOW CREATE TABLE t1;
51+
Table Create Table
52+
t1 CREATE TABLE `t1` (
53+
`f1` int(11) NOT NULL,
54+
`f2` blob DEFAULT NULL,
55+
PRIMARY KEY (`f1`)
56+
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
57+
SELECT * FROM t1;
58+
f1 f2
59+
2 a
60+
3 a
61+
DROP TABLE t1;
62+
connection node_1;
63+
CALL mtr.add_suppression("Replica SQL: Column 1 of table 'test.t1' cannot be converted from type 'longblob' to type 'blob', Error_code: MY-013146");
64+
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 3, seqno");
65+
connection node_2;
66+
CALL mtr.add_suppression("Replica SQL: Column 1 of table 'test.t1' cannot be converted from type 'longblob' to type 'blob', Error_code: MY-013146");
67+
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 3, seqno");
68+
connection node_3;
69+
CALL mtr.add_suppression("Vote 0 \\(success\\) on (.*) is inconsistent with group. Leaving cluster.");
70+
CALL mtr.add_suppression("Plugin 'InnoDB' will be forced to shutdown");
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
!include ../galera_3nodes.cnf
2+
3+
[mysqld]
4+
wsrep-ignore-apply-errors=0
5+
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#
2+
# MDEV-37494: Inconsistency voting: create conditions where applying would
3+
# fail on replicas in table_def::compatible_with() and check that
4+
# that replicas survive and the primary (trx source) bails out.
5+
#
6+
--source include/galera_cluster.inc
7+
8+
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 BLOB) ENGINE=InnoDB;
9+
10+
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
11+
--connection node_3
12+
SET GLOBAL wsrep_on=OFF;
13+
ALTER TABLE t1 MODIFY f2 LONGTEXT; # Introducing schema inconsistency
14+
SET GLOBAL wsrep_on=ON;
15+
INSERT INTO t1 VALUES (3, 'a'); # Nodes 1 and 2 should fail to apply this
16+
17+
--connection node_1
18+
# Wait until node #3 leaves the cluster
19+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size'
20+
--source include/wait_condition.inc
21+
SHOW STATUS LIKE 'wsrep_cluster_status';
22+
23+
--connection node_2
24+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size'
25+
# Wait until node #3 leaves the cluster
26+
--source include/wait_condition.inc
27+
SHOW STATUS LIKE 'wsrep_cluster_status';
28+
29+
INSERT INTO t1 VALUES (2, 'a'); # Nodes 1 and 2 should successfully apply this
30+
31+
--connection node_3
32+
SET SESSION wsrep_sync_wait=0;
33+
--let $wait_condition = SELECT VARIABLE_VALUE = 'Disconnected' FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status';
34+
--source include/wait_condition.inc
35+
SET SESSION wsrep_on=OFF;
36+
--source include/restart_mysqld.inc
37+
--source include/wait_wsrep_ready.inc
38+
SET SESSION wsrep_on=ON;
39+
40+
INSERT INTO t1 VALUES (3, 'a'); # All nodes should successfully apply this
41+
42+
# Check that consistency is restored
43+
--connection node_1
44+
SHOW CREATE TABLE t1;
45+
SELECT * FROM t1;
46+
47+
--connection node_2
48+
SHOW CREATE TABLE t1;
49+
SELECT * FROM t1;
50+
51+
--connection node_3
52+
SHOW CREATE TABLE t1;
53+
SELECT * FROM t1;
54+
55+
DROP TABLE t1;
56+
57+
--connection node_1
58+
CALL mtr.add_suppression("Replica SQL: Column 1 of table 'test.t1' cannot be converted from type 'longblob' to type 'blob', Error_code: MY-013146");
59+
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 3, seqno");
60+
61+
--connection node_2
62+
CALL mtr.add_suppression("Replica SQL: Column 1 of table 'test.t1' cannot be converted from type 'longblob' to type 'blob', Error_code: MY-013146");
63+
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 3, seqno");
64+
65+
--connection node_3
66+
CALL mtr.add_suppression("Vote 0 \\(success\\) on (.*) is inconsistent with group. Leaving cluster.");
67+
CALL mtr.add_suppression("Plugin 'InnoDB' will be forced to shutdown");

sql/wsrep_applier.cc

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,35 @@ wsrep_get_apply_format(THD* thd)
8383
return thd->wsrep_rgi->rli->relay_log.description_event_for_exec;
8484
}
8585

86-
void wsrep_store_error(const THD* const thd,
87-
wsrep::mutable_buffer& dst,
88-
bool const include_msg)
86+
/* store error from rli */
87+
static void wsrep_store_error_rli(const THD* const thd,
88+
wsrep::mutable_buffer& dst,
89+
bool const include_msg)
90+
{
91+
Slave_reporting_capability* const rli= thd->wsrep_rgi->rli;
92+
if (rli && rli->last_error().number != 0)
93+
{
94+
auto error= rli->last_error();
95+
std::ostringstream os;
96+
if (include_msg)
97+
{
98+
os << error.message << ",";
99+
}
100+
os << " Error_code: " << error.number << ';';
101+
std::string const err_str= os.str();
102+
dst.resize(err_str.length() + 1);
103+
sprintf(dst.data(), "%s", err_str.c_str());
104+
105+
WSREP_DEBUG("Error buffer (RLI) for thd %u seqno %lld, %zu bytes: '%s'",
106+
thd->thread_id, (long long)wsrep_thd_trx_seqno(thd),
107+
dst.size(), dst.size() ? dst.data() : "(null)");
108+
}
109+
}
110+
111+
/* store error from diagnostic area */
112+
static void wsrep_store_error_da(const THD* const thd,
113+
wsrep::mutable_buffer& dst,
114+
bool const include_msg)
89115
{
90116
Diagnostics_area::Sql_condition_iterator it=
91117
thd->get_stmt_da()->sql_conditions();
@@ -123,11 +149,35 @@ void wsrep_store_error(const THD* const thd,
123149

124150
dst.resize(slider - dst.data());
125151

126-
WSREP_DEBUG("Error buffer for thd %llu seqno %lld, %zu bytes: '%s'",
152+
WSREP_DEBUG("Error buffer (DA) for thd %llu seqno %lld, %zu bytes: '%s'",
127153
thd->thread_id, (long long)wsrep_thd_trx_seqno(thd),
128154
dst.size(), dst.size() ? dst.data() : "(null)");
129155
}
130156

157+
/* store error info after applying error */
158+
void wsrep_store_error(const THD* const thd,
159+
wsrep::mutable_buffer& dst,
160+
bool const include_msg)
161+
{
162+
dst.clear();
163+
wsrep_store_error_da(thd, dst, include_msg);
164+
if (dst.size() == 0)
165+
{
166+
wsrep_store_error_rli(thd, dst, include_msg);
167+
}
168+
if (dst.size() == 0)
169+
{
170+
WSREP_WARN("Failed to get apply error description from either "
171+
"Relay_log_info or Diagnostics_area, will use random data.");
172+
assert(0);
173+
uintptr_t const n1= reinterpret_cast<uintptr_t>(&dst);
174+
uintptr_t const n2= reinterpret_cast<uintptr_t>(thd);
175+
uintptr_t const data= n1 ^ (n2 < 1);
176+
const char* const data_ptr= reinterpret_cast<const char*>(&data);
177+
dst.push_back(data_ptr, data_ptr + sizeof(data));
178+
}
179+
}
180+
131181
int wsrep_apply_events(THD* thd,
132182
Relay_log_info* rli,
133183
const void* events_buf,

0 commit comments

Comments
 (0)