Message ID | 20220916084156.884513-1-xsimonar@redhat.com |
---|---|
State | Accepted |
Headers | show |
Series | [ovs-dev] ovn-controller: fix a crash when deleting a port claimed when sb was ro | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
Thanks Xavier, Acked-by: Mark Michelson <mmichels@redhat.com> In the interest of getting 22.09 released today, I have merged this to main, branch-22.09, branch-22.06, and branch-22.03 already. On 9/16/22 04:41, Xavier Simonart wrote: > When SB is read-only when a port needs to be claimed, the claim of the port > is delayed until SB becomes writable. > However, if that port gets deleted while ovn-controller is waiting for SB to > become writable, the claim will crash ovn-controller > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > --- > controller/ovn-controller.c | 3 +- > tests/ovn.at | 92 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 89a495a04..07e9b07cf 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -3799,7 +3799,6 @@ main(int argc, char *argv[]) > engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface, > ovs_interface_shadow_ovs_interface_handler); > > - engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler); > engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); > > engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL); > @@ -3814,6 +3813,8 @@ main(int argc, char *argv[]) > /* Reuse the same handler for any previously postponed ports. */ > engine_add_input(&en_runtime_data, &en_postponed_ports, > runtime_data_sb_port_binding_handler); > + /* Run sb_ro_handler after port_binding_handler in case port get deleted */ > + engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler); > > /* The OVS interface handler for runtime_data changes MUST be executed > * after the sb_port_binding_handler as port_binding deletes must be > diff --git a/tests/ovn.at b/tests/ovn.at > index 4f4783461..80e9192ca 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -32797,3 +32797,95 @@ check ovn-nbctl lrp-del lr0-sw1 > OVN_CLEANUP([hv1],[hv2]) > AT_CLEANUP > ]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn-controller: deleting a port being claimed]) > +ovn_start > +net_add n1 > + > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > +check ovs-vsctl add-port br-int p0 > +check ovs-vsctl add-port br-int p1 > + > +ovn-nbctl ls-add sw0 > +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" > +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" > +check ovn-nbctl --wait=hv sync > + > +# Pause SB > +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) > + > +# Make us claim psw0-port0. This will make SB ro > +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 > +sleep 0.5 > +# Make us claim sw0-port1. Claim should be delayed > +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 > +# Delete sw0-port1 > +ovn-nbctl lsp-del sw0-port1 > +sleep 0.5 > +# Restart SB > +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)]) > +check ovn-nbctl --wait=hv sync > +ovn-nbctl ls-del sw0 > +check ovn-nbctl --wait=hv sync > +OVN_CLEANUP([hv1]) > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn-controller: checking flows after delaying port claims]) > +ovn_start > +net_add n1 > + > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > +check ovs-vsctl add-port br-int p0 > +check ovs-vsctl add-port br-int p1 > + > +ovn-nbctl ls-add sw0 > +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" > +check ovn-nbctl --wait=hv sync > +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" > +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 > +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 > +check ovn-nbctl --wait=hv sync > +as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows1 > + > +ovs-vsctl set interface p0 external-ids:iface-id=foo0 > +ovs-vsctl set interface p1 external-ids:iface-id=foo1 > +ovn-nbctl lsp-del sw0-port0 > +ovn-nbctl lsp-del sw0-port1 > +ovn-nbctl ls-del sw0 > +check ovn-nbctl --wait=hv sync > + > +ovn-nbctl ls-add sw0 > +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" > +check ovn-nbctl --wait=hv sync > +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" > +check ovn-nbctl --wait=hv sync > + > +# Pause SB > +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) > + > +# Make us claim psw0-port0. This will make SB ro > +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 > +sleep 0.5 > +# Make us claim sw0-port1. Claim should be delayed > +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 > +sleep 0.5 > +# Restart SB > +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)]) > +check ovn-nbctl --wait=hv sync > +as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows2 > +AT_CHECK([diff offlows1 offlows2]) > + > +ovn-nbctl ls-del sw0 > +check ovn-nbctl --wait=hv sync > +OVN_CLEANUP([hv1]) > +AT_CLEANUP > +])
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 89a495a04..07e9b07cf 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -3799,7 +3799,6 @@ main(int argc, char *argv[]) engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface, ovs_interface_shadow_ovs_interface_handler); - engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler); engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL); engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL); @@ -3814,6 +3813,8 @@ main(int argc, char *argv[]) /* Reuse the same handler for any previously postponed ports. */ engine_add_input(&en_runtime_data, &en_postponed_ports, runtime_data_sb_port_binding_handler); + /* Run sb_ro_handler after port_binding_handler in case port get deleted */ + engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler); /* The OVS interface handler for runtime_data changes MUST be executed * after the sb_port_binding_handler as port_binding deletes must be diff --git a/tests/ovn.at b/tests/ovn.at index 4f4783461..80e9192ca 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -32797,3 +32797,95 @@ check ovn-nbctl lrp-del lr0-sw1 OVN_CLEANUP([hv1],[hv2]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ovn-controller: deleting a port being claimed]) +ovn_start +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +check ovs-vsctl add-port br-int p0 +check ovs-vsctl add-port br-int p1 + +ovn-nbctl ls-add sw0 +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" +check ovn-nbctl --wait=hv sync + +# Pause SB +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) + +# Make us claim psw0-port0. This will make SB ro +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 +sleep 0.5 +# Make us claim sw0-port1. Claim should be delayed +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 +# Delete sw0-port1 +ovn-nbctl lsp-del sw0-port1 +sleep 0.5 +# Restart SB +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)]) +check ovn-nbctl --wait=hv sync +ovn-nbctl ls-del sw0 +check ovn-nbctl --wait=hv sync +OVN_CLEANUP([hv1]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ovn-controller: checking flows after delaying port claims]) +ovn_start +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +check ovs-vsctl add-port br-int p0 +check ovs-vsctl add-port br-int p1 + +ovn-nbctl ls-add sw0 +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" +check ovn-nbctl --wait=hv sync +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 +check ovn-nbctl --wait=hv sync +as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows1 + +ovs-vsctl set interface p0 external-ids:iface-id=foo0 +ovs-vsctl set interface p1 external-ids:iface-id=foo1 +ovn-nbctl lsp-del sw0-port0 +ovn-nbctl lsp-del sw0-port1 +ovn-nbctl ls-del sw0 +check ovn-nbctl --wait=hv sync + +ovn-nbctl ls-add sw0 +ovn-nbctl lsp-add sw0 sw0-port0 -- lsp-set-addresses sw0-port0 "50:54:00:00:00:01 192.168.0.2" +check ovn-nbctl --wait=hv sync +ovn-nbctl lsp-add sw0 sw0-port1 -- lsp-set-addresses sw0-port1 "50:54:00:00:00:02 192.168.0.3" +check ovn-nbctl --wait=hv sync + +# Pause SB +AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) + +# Make us claim psw0-port0. This will make SB ro +ovs-vsctl set interface p0 external-ids:iface-id=sw0-port0 +sleep 0.5 +# Make us claim sw0-port1. Claim should be delayed +ovs-vsctl set interface p1 external-ids:iface-id=sw0-port1 +sleep 0.5 +# Restart SB +AT_CHECK([kill -CONT $(cat ovn-sb/ovsdb-server.pid)]) +check ovn-nbctl --wait=hv sync +as hv1 ovs-ofctl dump-flows br-int | sed 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | sed 's/load:0x.->NXM_NX_REG1/load:0x?->NXM_NX_REG1/g' | sed 's/idle_age=[[0-9]], //g' | sort > offlows2 +AT_CHECK([diff offlows1 offlows2]) + +ovn-nbctl ls-del sw0 +check ovn-nbctl --wait=hv sync +OVN_CLEANUP([hv1]) +AT_CLEANUP +])
When SB is read-only when a port needs to be claimed, the claim of the port is delayed until SB becomes writable. However, if that port gets deleted while ovn-controller is waiting for SB to become writable, the claim will crash ovn-controller Signed-off-by: Xavier Simonart <xsimonar@redhat.com> --- controller/ovn-controller.c | 3 +- tests/ovn.at | 92 +++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-)