diff mbox series

[ovs-dev] ovn-controller: fix a crash when deleting a port claimed when sb was ro

Message ID 20220916084156.884513-1-xsimonar@redhat.com
State Accepted
Headers show
Series [ovs-dev] ovn-controller: fix a crash when deleting a port claimed when sb was ro | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Xavier Simonart Sept. 16, 2022, 8:41 a.m. UTC
When SB is read-only when a port needs to be claimed, the claim of the port
is delayed until SB becomes writable.
However, if that port gets deleted while ovn-controller is waiting for SB to
become writable, the claim will crash ovn-controller

Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
---
 controller/ovn-controller.c |  3 +-
 tests/ovn.at                | 92 +++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 1 deletion(-)

Comments

Mark Michelson Sept. 16, 2022, 3:25 p.m. UTC | #1
Thanks Xavier,

Acked-by: Mark Michelson <mmichels@redhat.com>

In the interest of getting 22.09 released today, I have merged this to 
main, branch-22.09, branch-22.06, and branch-22.03 already.

On 9/16/22 04:41, Xavier Simonart wrote:
> When SB is read-only when a port needs to be claimed, the claim of the port
> is delayed until SB becomes writable.
> However, if that port gets deleted while ovn-controller is waiting for SB to
> become writable, the claim will crash ovn-controller
> 
> Signed-off-by: Xavier Simonart <xsimonar@redhat.com>
> ---
>   controller/ovn-controller.c |  3 +-
>   tests/ovn.at                | 92 +++++++++++++++++++++++++++++++++++++
>   2 files changed, 94 insertions(+), 1 deletion(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 89a495a04..07e9b07cf 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -3799,7 +3799,6 @@ main(int argc, char *argv[])
>       engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
>                        ovs_interface_shadow_ovs_interface_handler);
>   
> -    engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler);
>       engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
>   
>       engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
> @@ -3814,6 +3813,8 @@ main(int argc, char *argv[])
>       /* Reuse the same handler for any previously postponed ports. */
>       engine_add_input(&en_runtime_data, &en_postponed_ports,
>                        runtime_data_sb_port_binding_handler);
> +    /* Run sb_ro_handler after port_binding_handler in case port get deleted */
> +    engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler);
>   
>       /* The OVS interface handler for runtime_data changes MUST be executed
>        * after the sb_port_binding_handler as port_binding deletes must be
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 4f4783461..80e9192ca 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -32797,3 +32797,95 @@ check ovn-nbctl lrp-del lr0-sw1
>   OVN_CLEANUP([hv1],[hv2])
>   AT_CLEANUP
>   ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller: deleting a port being claimed])
> +ovn_start
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +check ovs-vsctl add-port br-int p0
> +check ovs-vsctl add-port br-int p1
> +
> +ovn-nbctl ls-add sw0
> +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
> +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
> +check ovn-nbctl --wait=hv sync
> +
> +# Pause SB
> +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
> +
> +# Make us claim psw0-port0. This will make SB ro
> +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
> +sleep 0.5
> +# Make us claim sw0-port1. Claim should be delayed
> +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
> +# Delete sw0-port1
> +ovn-nbctl lsp-del sw0-port1
> +sleep 0.5
> +# Restart SB
> +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
> +check ovn-nbctl --wait=hv sync
> +ovn-nbctl ls-del sw0
> +check ovn-nbctl --wait=hv sync
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller: checking flows after delaying port claims])
> +ovn_start
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +check ovs-vsctl add-port br-int p0
> +check ovs-vsctl add-port br-int p1
> +
> +ovn-nbctl ls-add sw0
> +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
> +check ovn-nbctl --wait=hv sync
> +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
> +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
> +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
> +check ovn-nbctl --wait=hv sync
> +as hv1 ovs-ofctl dump-flows br-int  | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows1
> +
> +ovs-vsctl set interface p0 external-ids:iface-id=foo0
> +ovs-vsctl set interface p1 external-ids:iface-id=foo1
> +ovn-nbctl lsp-del sw0-port0
> +ovn-nbctl lsp-del sw0-port1
> +ovn-nbctl ls-del sw0
> +check ovn-nbctl --wait=hv sync
> +
> +ovn-nbctl ls-add sw0
> +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
> +check ovn-nbctl --wait=hv sync
> +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
> +check ovn-nbctl --wait=hv sync
> +
> +# Pause SB
> +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
> +
> +# Make us claim psw0-port0. This will make SB ro
> +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
> +sleep 0.5
> +# Make us claim sw0-port1. Claim should be delayed
> +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
> +sleep 0.5
> +# Restart SB
> +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
> +check ovn-nbctl --wait=hv sync
> +as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows2
> +AT_CHECK([diff  offlows1 offlows2])
> +
> +ovn-nbctl ls-del sw0
> +check ovn-nbctl --wait=hv sync
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 89a495a04..07e9b07cf 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -3799,7 +3799,6 @@  main(int argc, char *argv[])
     engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
                      ovs_interface_shadow_ovs_interface_handler);
 
-    engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler);
     engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
 
     engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
@@ -3814,6 +3813,8 @@  main(int argc, char *argv[])
     /* Reuse the same handler for any previously postponed ports. */
     engine_add_input(&en_runtime_data, &en_postponed_ports,
                      runtime_data_sb_port_binding_handler);
+    /* Run sb_ro_handler after port_binding_handler in case port get deleted */
+    engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler);
 
     /* The OVS interface handler for runtime_data changes MUST be executed
      * after the sb_port_binding_handler as port_binding deletes must be
diff --git a/tests/ovn.at b/tests/ovn.at
index 4f4783461..80e9192ca 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -32797,3 +32797,95 @@  check ovn-nbctl lrp-del lr0-sw1
 OVN_CLEANUP([hv1],[hv2])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller: deleting a port being claimed])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+check ovs-vsctl add-port br-int p0
+check ovs-vsctl add-port br-int p1
+
+ovn-nbctl ls-add sw0
+ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
+ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
+check ovn-nbctl --wait=hv sync
+
+# Pause SB
+AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
+
+# Make us claim psw0-port0. This will make SB ro
+ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
+sleep 0.5
+# Make us claim sw0-port1. Claim should be delayed
+ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
+# Delete sw0-port1
+ovn-nbctl lsp-del sw0-port1
+sleep 0.5
+# Restart SB
+AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
+check ovn-nbctl --wait=hv sync
+ovn-nbctl ls-del sw0
+check ovn-nbctl --wait=hv sync
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller: checking flows after delaying port claims])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+check ovs-vsctl add-port br-int p0
+check ovs-vsctl add-port br-int p1
+
+ovn-nbctl ls-add sw0
+ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
+check ovn-nbctl --wait=hv sync
+ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
+ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
+ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
+check ovn-nbctl --wait=hv sync
+as hv1 ovs-ofctl dump-flows br-int  | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows1
+
+ovs-vsctl set interface p0 external-ids:iface-id=foo0
+ovs-vsctl set interface p1 external-ids:iface-id=foo1
+ovn-nbctl lsp-del sw0-port0
+ovn-nbctl lsp-del sw0-port1
+ovn-nbctl ls-del sw0
+check ovn-nbctl --wait=hv sync
+
+ovn-nbctl ls-add sw0
+ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2"
+check ovn-nbctl --wait=hv sync
+ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3"
+check ovn-nbctl --wait=hv sync
+
+# Pause SB
+AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)])
+
+# Make us claim psw0-port0. This will make SB ro
+ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0
+sleep 0.5
+# Make us claim sw0-port1. Claim should be delayed
+ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1
+sleep 0.5
+# Restart SB
+AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)])
+check ovn-nbctl --wait=hv sync
+as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows2
+AT_CHECK([diff  offlows1 offlows2])
+
+ovn-nbctl ls-del sw0
+check ovn-nbctl --wait=hv sync
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])