@@ -81,6 +81,7 @@ enum raft_failure_test {
FT_STOP_RAFT_RPC,
FT_TRANSFER_LEADERSHIP,
FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ,
+ FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD,
};
static enum raft_failure_test failure_test;
@@ -2740,15 +2741,22 @@ raft_become_follower(struct raft *raft)
* new configuration. Our AppendEntries processing will properly update
* the server configuration later, if necessary.
*
+ * However, since we're sending replies about a failure to add, those new
+ * servers has to be cleaned up. Otherwise, they will stuck in a 'CATCHUP'
+ * phase in case this server regains leadership before they join through
+ * the current new leader. They are not yet in 'raft->servers', so not
+ * part of the shared configuration.
+ *
* Also we do not complete commands here, as they can still be completed
* if their log entries have already been replicated to other servers.
* If the entries were actually committed according to the new leader, our
* AppendEntries processing will complete the corresponding commands.
*/
struct raft_server *s;
- HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) {
+ HMAP_FOR_EACH_POP (s, hmap_node, &raft->add_servers) {
raft_send_add_server_reply__(raft, &s->sid, s->address, false,
RAFT_SERVER_LOST_LEADERSHIP);
+ raft_server_destroy(s);
}
if (raft->remove_server) {
raft_send_remove_server_reply__(raft, &raft->remove_server->sid,
@@ -4023,6 +4031,10 @@ raft_handle_add_server_request(struct raft *raft,
"to cluster "CID_FMT, s->nickname, SID_ARGS(&s->sid),
rq->address, CID_ARGS(&raft->cid));
raft_send_append_request(raft, s, 0, "initialize new server");
+
+ if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD) {
+ failure_test = FT_TRANSFER_LEADERSHIP;
+ }
}
static void
@@ -5148,6 +5160,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
} else if (!strcmp(test,
"transfer-leadership-after-sending-append-request")) {
failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ;
+ } else if (!strcmp(test, "transfer-leadership-after-starting-to-add")) {
+ failure_test = FT_TRANSFER_LEADERSHIP_AFTER_STARTING_TO_ADD;
} else if (!strcmp(test, "transfer-leadership")) {
failure_test = FT_TRANSFER_LEADERSHIP;
} else if (!strcmp(test, "clear")) {
@@ -525,6 +525,59 @@ for i in $(seq $n); do
OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid])
done
+AT_CLEANUP
+
+AT_SETUP([OVSDB cluster - leadership change before replication while joining])
+AT_KEYWORDS([ovsdb server negative unix cluster join])
+
+n=5
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl
+ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=$(ovsdb-tool db-cid s1.db)
+schema_name=$(ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema)
+for i in $(seq 2 $n); do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill $(cat *.pid)'
+on_exit "
+ for i in \$(ls $(pwd)/s[[0-$n]]); do
+ ovs-appctl --timeout 1 -t \$i cluster/status $schema_name;
+ done
+"
+
+dnl Starting servers one by one asking all exisitng servers to transfer
+dnl leadership right after starting to add a server. Joining server will
+dnl need to find a new leader that will also transfer leadership.
+dnl This will continue until the same server will not become a leader
+dnl for the second time and will be able to add a new server.
+for i in $(seq $n); do
+ dnl Make sure that all already started servers joined the cluster.
+ for j in $(seq $((i - 1)) ); do
+ AT_CHECK([ovsdb_client_wait unix:s$j.ovsdb $schema_name connected])
+ done
+ for j in $(seq $((i - 1)) ); do
+ OVS_WAIT_UNTIL([ovs-appctl -t "$(pwd)"/s$j \
+ cluster/failure-test \
+ transfer-leadership-after-starting-to-add \
+ | grep -q "engaged"])
+ done
+
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off \
+ --detach --no-chdir --log-file=s$i.log \
+ --pidfile=s$i.pid --unixctl=s$i \
+ --remote=punix:s$i.ovsdb s$i.db])
+done
+
+dnl Make sure that all servers joined the cluster.
+for i in $(seq $n); do
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+for i in $(seq $n); do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([$(pwd)/s$i], [s$i.pid])
+done
+
AT_CLEANUP