From 868c2ceb013e06c29ba37d4634f2d543b96539aa Mon Sep 17 00:00:00 2001 From: Nirbhay Choubey <nirbhay@mariadb.com> Date: Sun, 12 Jun 2016 19:28:56 -0400 Subject: [PATCH 1/4] MDEV-9083: Slave IO thread does not handle autoreconnect to restarting Galera Cluster node Chery-picked commits from codership/mysql-wsrep. MW-284: Slave I/O retry on ER_COM_UNKNOWN_ERROR Slave would treat ER_COM_UNKNOWN_ERROR as fatal error and stop. The fix here is to treat it as a network error and rely on the built-in mechanism to retry. MW-284: Add an MTR test --- mysql-test/suite/galera/r/MW-284.result | 13 ++++++ mysql-test/suite/galera/t/MW-284.cnf | 1 + mysql-test/suite/galera/t/MW-284.test | 57 +++++++++++++++++++++++++ sql/slave.cc | 4 ++ 4 files changed, 75 insertions(+) create mode 100644 mysql-test/suite/galera/r/MW-284.result create mode 100644 mysql-test/suite/galera/t/MW-284.cnf create mode 100644 mysql-test/suite/galera/t/MW-284.test diff --git a/mysql-test/suite/galera/r/MW-284.result b/mysql-test/suite/galera/r/MW-284.result new file mode 100644 index 0000000000000..8b5119663ce35 --- /dev/null +++ b/mysql-test/suite/galera/r/MW-284.result @@ -0,0 +1,13 @@ +CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB; +SET GLOBAL wsrep_provider_options='gmcast.isolate=1'; +SET SESSION wsrep_on = OFF; +SET SESSION wsrep_on = ON; +START SLAVE; +include/wait_for_slave_param.inc [Slave_IO_Running] +SET GLOBAL wsrep_provider_options='gmcast.isolate=0'; +include/wait_for_slave_to_start.inc +INSERT INTO t1 VALUES (1); +DROP TABLE t1; +STOP SLAVE; +RESET SLAVE ALL; +CALL mtr.add_suppression('failed registering on master'); diff --git a/mysql-test/suite/galera/t/MW-284.cnf b/mysql-test/suite/galera/t/MW-284.cnf new file mode 100644 index 0000000000000..52fd3093931d6 --- /dev/null +++ b/mysql-test/suite/galera/t/MW-284.cnf @@ -0,0 +1 @@ +!include ../galera_2nodes_as_master.cnf diff --git a/mysql-test/suite/galera/t/MW-284.test b/mysql-test/suite/galera/t/MW-284.test new file mode 100644 index 0000000000000..5998e22ed1eb4 --- /dev/null +++ b/mysql-test/suite/galera/t/MW-284.test @@ -0,0 +1,57 @@ +# +# MW-284 Slave I/O retry on ER_COM_UNKNOWN_ERROR +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc + +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--disable_query_log +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$NODE_MYPORT_1, MASTER_USER='root', MASTER_CONNECT_RETRY=1; +--enable_query_log + +--connection node_1 +CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB; +SET GLOBAL wsrep_provider_options='gmcast.isolate=1'; +SET SESSION wsrep_on = OFF; +--let $wait_condition = SELECT VARIABLE_VALUE = 'non-Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status' +--source include/wait_condition.inc +SET SESSION wsrep_on = ON; + +--connection node_3 +START SLAVE; +--sleep 1 +--let $slave_param= Slave_IO_Running +--let $slave_param_value= Connecting +--source include/wait_for_slave_param.inc + +--connection node_1 +SET GLOBAL wsrep_provider_options='gmcast.isolate=0'; + +# We expect the slave to reconnect and resume replication + +--connection node_3 +--source include/wait_for_slave_to_start.inc + +--connection node_1 +INSERT INTO t1 VALUES (1); + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) > 0 FROM t1 +--source include/wait_condition.inc + +# Cleanup + +--connection node_1 +DROP TABLE t1; + +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1' +--source include/wait_condition.inc + +STOP SLAVE; +RESET SLAVE ALL; + +CALL mtr.add_suppression('failed registering on master'); diff --git a/sql/slave.cc b/sql/slave.cc index 4bc4cd4ab838a..e9db77a09240c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1361,6 +1361,10 @@ bool is_network_error(uint errorno) errorno == ER_NET_READ_INTERRUPTED || errorno == ER_SERVER_SHUTDOWN) return TRUE; +#ifdef WITH_WSREP + if (errorno == ER_UNKNOWN_COM_ERROR) + return TRUE; +#endif return FALSE; } From fc799e317d3f4e758c2c7196c2d860f3c20cc98e Mon Sep 17 00:00:00 2001 From: Nirbhay Choubey <nirbhay@mariadb.com> Date: Fri, 24 Jun 2016 13:54:04 -0400 Subject: [PATCH 2/4] Fix build failure. --- storage/xtradb/srv/srv0srv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 603b8b8af4fef..ed7ef96cfe508 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -2226,7 +2226,7 @@ DECLARE_THREAD(srv_error_monitor_thread)( if (sync_array_print_long_waits(&waiter, &sema) && sema == old_sema && os_thread_eq(waiter, old_waiter)) { #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) - if (srv_allow_writes_event->is_set) { + if (srv_allow_writes_event->is_set()) { #endif /* WITH_WSREP */ fatal_cnt++; #if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) From 94a61838fe0ed2bd7a26b1b53d49d179d3013c45 Mon Sep 17 00:00:00 2001 From: Nirbhay Choubey <nirbhay@mariadb.com> Date: Sun, 26 Jun 2016 21:15:36 -0400 Subject: [PATCH 3/4] Fix galera,wsrep tests. --- mysql-test/suite/galera/r/mysql-wsrep#237.result | 1 + mysql-test/suite/galera/suite.pm | 1 + mysql-test/suite/galera/t/mysql-wsrep#237.test | 5 +++++ mysql-test/suite/wsrep/suite.pm | 2 ++ 4 files changed, 9 insertions(+) diff --git a/mysql-test/suite/galera/r/mysql-wsrep#237.result b/mysql-test/suite/galera/r/mysql-wsrep#237.result index 3fd9aed1480db..1889a8feca094 100644 --- a/mysql-test/suite/galera/r/mysql-wsrep#237.result +++ b/mysql-test/suite/galera/r/mysql-wsrep#237.result @@ -8,3 +8,4 @@ SLEEP(1) 0 SET DEBUG_SYNC= 'now SIGNAL continue'; DROP TABLE t; +SET DEBUG_SYNC= 'RESET'; diff --git a/mysql-test/suite/galera/suite.pm b/mysql-test/suite/galera/suite.pm index d6cd4273014e3..8a25434f2e9e5 100644 --- a/mysql-test/suite/galera/suite.pm +++ b/mysql-test/suite/galera/suite.pm @@ -68,6 +68,7 @@ push @::global_suppressions, qr|WSREP: gcs_caused\(\) returned .*|, qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), + qr(InnoDB: Some resources were not cleaned up in shutdown.*), ); diff --git a/mysql-test/suite/galera/t/mysql-wsrep#237.test b/mysql-test/suite/galera/t/mysql-wsrep#237.test index 7a65cb52ae970..4a539e1ba1568 100644 --- a/mysql-test/suite/galera/t/mysql-wsrep#237.test +++ b/mysql-test/suite/galera/t/mysql-wsrep#237.test @@ -28,4 +28,9 @@ SET DEBUG_SYNC= 'now SIGNAL continue'; --connection node_1 --reap +# Cleanup DROP TABLE t; + +--connection node_1a +SET DEBUG_SYNC= 'RESET'; + diff --git a/mysql-test/suite/wsrep/suite.pm b/mysql-test/suite/wsrep/suite.pm index ec7a3e374f525..a1b5fddc628de 100644 --- a/mysql-test/suite/wsrep/suite.pm +++ b/mysql-test/suite/wsrep/suite.pm @@ -25,9 +25,11 @@ return "No my_print_defaults" unless $epath; push @::global_suppressions, ( qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: .*), qr(WSREP: option --wsrep-causal-reads is deprecated), qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0), qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(InnoDB: Some resources were not cleaned up in shutdown.*), ); $ENV{PATH}="$epath:$ENV{PATH}"; From f629f5354850728d85d6b915d2a3fe7192f00cd5 Mon Sep 17 00:00:00 2001 From: Nirbhay Choubey <nirbhay@mariadb.com> Date: Mon, 27 Jun 2016 14:44:07 -0400 Subject: [PATCH 4/4] Fix memory leak in XtraDB. --- mysql-test/suite/galera/suite.pm | 1 - mysql-test/suite/wsrep/suite.pm | 1 - storage/xtradb/srv/srv0srv.cc | 3 +++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/galera/suite.pm b/mysql-test/suite/galera/suite.pm index 8a25434f2e9e5..d6cd4273014e3 100644 --- a/mysql-test/suite/galera/suite.pm +++ b/mysql-test/suite/galera/suite.pm @@ -68,7 +68,6 @@ push @::global_suppressions, qr|WSREP: gcs_caused\(\) returned .*|, qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), - qr(InnoDB: Some resources were not cleaned up in shutdown.*), ); diff --git a/mysql-test/suite/wsrep/suite.pm b/mysql-test/suite/wsrep/suite.pm index a1b5fddc628de..31d9faeb0ddd0 100644 --- a/mysql-test/suite/wsrep/suite.pm +++ b/mysql-test/suite/wsrep/suite.pm @@ -29,7 +29,6 @@ push @::global_suppressions, qr(WSREP: option --wsrep-causal-reads is deprecated), qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0), qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, - qr(InnoDB: Some resources were not cleaned up in shutdown.*), ); $ENV{PATH}="$epath:$ENV{PATH}"; diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index ed7ef96cfe508..73fc93f9597a0 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1209,6 +1209,9 @@ srv_free(void) os_event_free(srv_buf_dump_event); os_event_free(srv_checkpoint_completed_event); os_event_free(srv_redo_log_tracked_event); +#ifdef WITH_INNODB_DISALLOW_WRITES + os_event_free(srv_allow_writes_event); +#endif /* WITH_INNODB_DISALLOW_WRITES */ mutex_free(&srv_sys->mutex); mutex_free(&srv_sys->tasks_mutex); }