Message ID | 20220825144710.4149344-1-xsimonar@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v2] controller: fix potential segmentation violation when removing ports | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
On Thu, Aug 25, 2022 at 7:47 AM Xavier Simonart <xsimonar@redhat.com> wrote: > > If a logical switch port is added and connected to a logical router > port (through options: router-port) before the router port is > created, then this might cause further issues such as segmentation > violation when the switch and router ports are deleted. > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > --- > v2: handled Han's comments (avoid wasting CPU cycles searching for peer_ld) > --- > controller/local_data.c | 36 ++++++++++++++---------------------- > tests/ovn.at | 30 ++++++++++++++++++++++++++++++ > 2 files changed, 44 insertions(+), 22 deletions(-) > > diff --git a/controller/local_data.c b/controller/local_data.c > index 7f874fc19..669e686ab 100644 > --- a/controller/local_data.c > +++ b/controller/local_data.c > @@ -34,7 +34,7 @@ > > VLOG_DEFINE_THIS_MODULE(ldata); > > -static void add_local_datapath__( > +static struct local_datapath *add_local_datapath__( > struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, > struct ovsdb_idl_index *sbrec_port_binding_by_name, > @@ -194,17 +194,7 @@ add_local_datapath_peer_port( > return; > } > > - bool present = false; > - for (size_t i = 0; i < ld->n_peer_ports; i++) { > - if (ld->peer_ports[i].local == pb) { > - present = true; > - break; > - } > - } > - > - if (!present) { > - local_datapath_peer_port_add(ld, pb, peer); > - } > + local_datapath_peer_port_add(ld, pb, peer); > > struct local_datapath *peer_ld = > get_local_datapath(local_datapaths, > @@ -218,12 +208,6 @@ add_local_datapath_peer_port( > return; > } > > - for (size_t i = 0; i < peer_ld->n_peer_ports; i++) { > - if (peer_ld->peer_ports[i].local == peer) { > - return; > - } > - } > - > local_datapath_peer_port_add(peer_ld, peer, pb); > } > > @@ -541,7 +525,7 @@ chassis_tunnel_find(const struct hmap *chassis_tunnels, const char *chassis_id, > } > > /* static functions. */ > -static void > +static struct local_datapath * > add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, > struct ovsdb_idl_index *sbrec_port_binding_by_name, > @@ -553,7 +537,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > uint32_t dp_key = dp->tunnel_key; > struct local_datapath *ld = get_local_datapath(local_datapaths, dp_key); > if (ld) { > - return; > + return ld; > } > > ld = local_datapath_alloc(dp); > @@ -568,7 +552,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > if (depth >= 100) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > VLOG_WARN_RL(&rl, "datapaths nested too deep"); > - return; > + return ld; > } > > struct sbrec_port_binding *target = > @@ -589,12 +573,14 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > if (peer && peer->datapath) { > if (need_add_patch_peer_to_local( > sbrec_port_binding_by_name, pb, chassis)) { > - add_local_datapath__(sbrec_datapath_binding_by_key, > + struct local_datapath *peer_ld = > + add_local_datapath__(sbrec_datapath_binding_by_key, > sbrec_port_binding_by_datapath, > sbrec_port_binding_by_name, > depth + 1, peer->datapath, > chassis, local_datapaths, > tracked_datapaths); > + local_datapath_peer_port_add(peer_ld, peer, pb); Thanks Xavier for the refactor. Now that it is cleaner, it reminds me of a potential problem that when the peer DP doesn't need to be added to local_datapaths, we may end up with the same crash because the peer is added as peer of the pb but the pb is not added as peer of the peer (I am sorry that this reads confusing). So when the peer is deleted, it won't remove itself from the pb's peer, and pb's peer would be a dangling pointer. I would be safe only if we make sure they are always added as peers from both sides, or not added at all. However, if we move the below local_datapath_peer_port_add(ld, pb, peer); under this if condition, it is possible that when we do need the peer port information it is unavailable from the local_datapath. I am going through all use cases of the peer port structure before concluding. Han > } > local_datapath_peer_port_add(ld, pb, peer); > } > @@ -602,6 +588,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > } > } > sbrec_port_binding_index_destroy_row(target); > + return ld; > } > > static struct tracked_datapath * > @@ -622,6 +609,11 @@ local_datapath_peer_port_add(struct local_datapath *ld, > const struct sbrec_port_binding *local, > const struct sbrec_port_binding *remote) > { > + for (size_t i = 0; i < ld->n_peer_ports; i++) { > + if (ld->peer_ports[i].local == local) { > + return; > + } > + } > ld->n_peer_ports++; > if (ld->n_peer_ports > ld->n_allocated_peer_ports) { > size_t old_n_ports = ld->n_allocated_peer_ports; > diff --git a/tests/ovn.at b/tests/ovn.at > index bba2c9c1d..ae0918d55 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -32580,3 +32580,33 @@ check ovn-nbctl --wait=hv sync > OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([router port add then remove - lsp first]) > +ovn_start > +net_add n1 > + > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +check ovn-nbctl ls-add sw0 > +check ovn-nbctl lr-add ro0 > +check ovn-nbctl lsp-add sw0 sw0-p1 > +check ovn-nbctl lsp-add sw0 lsp > +check ovn-nbctl lsp-set-type lsp router > +check ovn-nbctl lsp-set-options lsp router-port=lrp > +check ovn-nbctl lsp-set-addresses lsp 00:00:00:00:00:1 > +check ovn-nbctl --wait=hv lrp-add ro0 lrp 00:00:00:00:00:1 aef0:0:0:0:0:0:0:1/64 > +check ovn-nbctl --wait=hv lsp-del lsp > +check ovn-nbctl lrp-del lrp > +check ovn-nbctl --wait=hv sync > +OVN_CLEANUP([hv1]) > +AT_CLEANUP > +]) > -- > 2.31.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Thu, Aug 25, 2022 at 10:05 AM Han Zhou <zhouhan@gmail.com> wrote: > > > > On Thu, Aug 25, 2022 at 7:47 AM Xavier Simonart <xsimonar@redhat.com> wrote: > > > > If a logical switch port is added and connected to a logical router > > port (through options: router-port) before the router port is > > created, then this might cause further issues such as segmentation > > violation when the switch and router ports are deleted. > > > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > > > --- > > v2: handled Han's comments (avoid wasting CPU cycles searching for peer_ld) > > --- > > controller/local_data.c | 36 ++++++++++++++---------------------- > > tests/ovn.at | 30 ++++++++++++++++++++++++++++++ > > 2 files changed, 44 insertions(+), 22 deletions(-) > > > > diff --git a/controller/local_data.c b/controller/local_data.c > > index 7f874fc19..669e686ab 100644 > > --- a/controller/local_data.c > > +++ b/controller/local_data.c > > @@ -34,7 +34,7 @@ > > > > VLOG_DEFINE_THIS_MODULE(ldata); > > > > -static void add_local_datapath__( > > +static struct local_datapath *add_local_datapath__( > > struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, > > struct ovsdb_idl_index *sbrec_port_binding_by_name, > > @@ -194,17 +194,7 @@ add_local_datapath_peer_port( > > return; > > } > > > > - bool present = false; > > - for (size_t i = 0; i < ld->n_peer_ports; i++) { > > - if (ld->peer_ports[i].local == pb) { > > - present = true; > > - break; > > - } > > - } > > - > > - if (!present) { > > - local_datapath_peer_port_add(ld, pb, peer); > > - } > > + local_datapath_peer_port_add(ld, pb, peer); > > > > struct local_datapath *peer_ld = > > get_local_datapath(local_datapaths, > > @@ -218,12 +208,6 @@ add_local_datapath_peer_port( > > return; > > } > > > > - for (size_t i = 0; i < peer_ld->n_peer_ports; i++) { > > - if (peer_ld->peer_ports[i].local == peer) { > > - return; > > - } > > - } > > - > > local_datapath_peer_port_add(peer_ld, peer, pb); > > } > > > > @@ -541,7 +525,7 @@ chassis_tunnel_find(const struct hmap *chassis_tunnels, const char *chassis_id, > > } > > > > /* static functions. */ > > -static void > > +static struct local_datapath * > > add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, > > struct ovsdb_idl_index *sbrec_port_binding_by_name, > > @@ -553,7 +537,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > uint32_t dp_key = dp->tunnel_key; > > struct local_datapath *ld = get_local_datapath(local_datapaths, dp_key); > > if (ld) { > > - return; > > + return ld; > > } > > > > ld = local_datapath_alloc(dp); > > @@ -568,7 +552,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > if (depth >= 100) { > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > > VLOG_WARN_RL(&rl, "datapaths nested too deep"); > > - return; > > + return ld; > > } > > > > struct sbrec_port_binding *target = > > @@ -589,12 +573,14 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > if (peer && peer->datapath) { > > if (need_add_patch_peer_to_local( > > sbrec_port_binding_by_name, pb, chassis)) { > > - add_local_datapath__(sbrec_datapath_binding_by_key, > > + struct local_datapath *peer_ld = > > + add_local_datapath__(sbrec_datapath_binding_by_key, > > sbrec_port_binding_by_datapath, > > sbrec_port_binding_by_name, > > depth + 1, peer->datapath, > > chassis, local_datapaths, > > tracked_datapaths); > > + local_datapath_peer_port_add(peer_ld, peer, pb); > > Thanks Xavier for the refactor. Now that it is cleaner, it reminds me of a potential problem that when the peer DP doesn't need to be added to local_datapaths, we may end up with the same crash because the peer is added as peer of the pb but the pb is not added as peer of the peer (I am sorry that this reads confusing). So when the peer is deleted, it won't remove itself from the pb's peer, and pb's peer would be a dangling pointer. I would be safe only if we make sure they are always added as peers from both sides, or not added at all. However, if we move the below local_datapath_peer_port_add(ld, pb, peer); under this if condition, it is possible that when we do need the peer port information it is unavailable from the local_datapath. I am going through all use cases of the peer port structure before concluding. > Hi Xavier, After going through the use cases of the ld->peer_ports, for what I can tell, the data is used by pinctrl.c and physical.c for DGPs when both the LR and the LS are local, so I think we should move the below "local_datapath_peer_port_add(ld, pb, peer);" into the above "if" condition. Would you like to update with v3? Thanks, Han > Han > > > } > > local_datapath_peer_port_add(ld, pb, peer); > > } > > @@ -602,6 +588,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > } > > } > > sbrec_port_binding_index_destroy_row(target); > > + return ld; > > } > > > > static struct tracked_datapath * > > @@ -622,6 +609,11 @@ local_datapath_peer_port_add(struct local_datapath *ld, > > const struct sbrec_port_binding *local, > > const struct sbrec_port_binding *remote) > > { > > + for (size_t i = 0; i < ld->n_peer_ports; i++) { > > + if (ld->peer_ports[i].local == local) { > > + return; > > + } > > + } > > ld->n_peer_ports++; > > if (ld->n_peer_ports > ld->n_allocated_peer_ports) { > > size_t old_n_ports = ld->n_allocated_peer_ports; > > diff --git a/tests/ovn.at b/tests/ovn.at > > index bba2c9c1d..ae0918d55 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -32580,3 +32580,33 @@ check ovn-nbctl --wait=hv sync > > OVN_CLEANUP([hv1]) > > AT_CLEANUP > > ]) > > + > > +OVN_FOR_EACH_NORTHD([ > > +AT_SETUP([router port add then remove - lsp first]) > > +ovn_start > > +net_add n1 > > + > > +sim_add hv1 > > +as hv1 > > +ovs-vsctl add-br br-phys > > +ovn_attach n1 br-phys 192.168.0.1 > > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > > + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ > > + options:tx_pcap=hv1/vif1-tx.pcap \ > > + options:rxq_pcap=hv1/vif1-rx.pcap \ > > + ofport-request=1 > > + > > +check ovn-nbctl ls-add sw0 > > +check ovn-nbctl lr-add ro0 > > +check ovn-nbctl lsp-add sw0 sw0-p1 > > +check ovn-nbctl lsp-add sw0 lsp > > +check ovn-nbctl lsp-set-type lsp router > > +check ovn-nbctl lsp-set-options lsp router-port=lrp > > +check ovn-nbctl lsp-set-addresses lsp 00:00:00:00:00:1 > > +check ovn-nbctl --wait=hv lrp-add ro0 lrp 00:00:00:00:00:1 aef0:0:0:0:0:0:0:1/64 > > +check ovn-nbctl --wait=hv lsp-del lsp > > +check ovn-nbctl lrp-del lrp > > +check ovn-nbctl --wait=hv sync > > +OVN_CLEANUP([hv1]) > > +AT_CLEANUP > > +]) > > -- > > 2.31.1 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Hi Han Agreed that v2 is not correct as we might have cases where the peers are not added symmetrically (I'll add a test case highlighting this), and we'll hit the same kind of issue/crash. However, I am not sure that I can simply move "local_datapath_peer_port_add(ld, pb, peer);" into the above "if" (if I understood your proposal). If I do it, some other test cases start to fail, such as "ip-buffering" I'll continue looking into this on Monday Thanks Xavier On Fri, Aug 26, 2022 at 7:44 AM Han Zhou <hzhou@ovn.org> wrote: > > > On Thu, Aug 25, 2022 at 10:05 AM Han Zhou <zhouhan@gmail.com> wrote: > > > > > > > > On Thu, Aug 25, 2022 at 7:47 AM Xavier Simonart <xsimonar@redhat.com> > wrote: > > > > > > If a logical switch port is added and connected to a logical router > > > port (through options: router-port) before the router port is > > > created, then this might cause further issues such as segmentation > > > violation when the switch and router ports are deleted. > > > > > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > > > > > --- > > > v2: handled Han's comments (avoid wasting CPU cycles searching for > peer_ld) > > > --- > > > controller/local_data.c | 36 ++++++++++++++---------------------- > > > tests/ovn.at | 30 ++++++++++++++++++++++++++++++ > > > 2 files changed, 44 insertions(+), 22 deletions(-) > > > > > > diff --git a/controller/local_data.c b/controller/local_data.c > > > index 7f874fc19..669e686ab 100644 > > > --- a/controller/local_data.c > > > +++ b/controller/local_data.c > > > @@ -34,7 +34,7 @@ > > > > > > VLOG_DEFINE_THIS_MODULE(ldata); > > > > > > -static void add_local_datapath__( > > > +static struct local_datapath *add_local_datapath__( > > > struct ovsdb_idl_index *sbrec_datapath_binding_by_key, > > > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, > > > struct ovsdb_idl_index *sbrec_port_binding_by_name, > > > @@ -194,17 +194,7 @@ add_local_datapath_peer_port( > > > return; > > > } > > > > > > - bool present = false; > > > - for (size_t i = 0; i < ld->n_peer_ports; i++) { > > > - if (ld->peer_ports[i].local == pb) { > > > - present = true; > > > - break; > > > - } > > > - } > > > - > > > - if (!present) { > > > - local_datapath_peer_port_add(ld, pb, peer); > > > - } > > > + local_datapath_peer_port_add(ld, pb, peer); > > > > > > struct local_datapath *peer_ld = > > > get_local_datapath(local_datapaths, > > > @@ -218,12 +208,6 @@ add_local_datapath_peer_port( > > > return; > > > } > > > > > > - for (size_t i = 0; i < peer_ld->n_peer_ports; i++) { > > > - if (peer_ld->peer_ports[i].local == peer) { > > > - return; > > > - } > > > - } > > > - > > > local_datapath_peer_port_add(peer_ld, peer, pb); > > > } > > > > > > @@ -541,7 +525,7 @@ chassis_tunnel_find(const struct hmap > *chassis_tunnels, const char *chassis_id, > > > } > > > > > > /* static functions. */ > > > -static void > > > +static struct local_datapath * > > > add_local_datapath__(struct ovsdb_idl_index > *sbrec_datapath_binding_by_key, > > > struct ovsdb_idl_index > *sbrec_port_binding_by_datapath, > > > struct ovsdb_idl_index > *sbrec_port_binding_by_name, > > > @@ -553,7 +537,7 @@ add_local_datapath__(struct ovsdb_idl_index > *sbrec_datapath_binding_by_key, > > > uint32_t dp_key = dp->tunnel_key; > > > struct local_datapath *ld = get_local_datapath(local_datapaths, > dp_key); > > > if (ld) { > > > - return; > > > + return ld; > > > } > > > > > > ld = local_datapath_alloc(dp); > > > @@ -568,7 +552,7 @@ add_local_datapath__(struct ovsdb_idl_index > *sbrec_datapath_binding_by_key, > > > if (depth >= 100) { > > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > > > VLOG_WARN_RL(&rl, "datapaths nested too deep"); > > > - return; > > > + return ld; > > > } > > > > > > struct sbrec_port_binding *target = > > > @@ -589,12 +573,14 @@ add_local_datapath__(struct ovsdb_idl_index > *sbrec_datapath_binding_by_key, > > > if (peer && peer->datapath) { > > > if (need_add_patch_peer_to_local( > > > sbrec_port_binding_by_name, pb, chassis)) > { > > > - > add_local_datapath__(sbrec_datapath_binding_by_key, > > > + struct local_datapath *peer_ld = > > > + > add_local_datapath__(sbrec_datapath_binding_by_key, > > > > sbrec_port_binding_by_datapath, > > > > sbrec_port_binding_by_name, > > > depth + 1, > peer->datapath, > > > chassis, local_datapaths, > > > tracked_datapaths); > > > + local_datapath_peer_port_add(peer_ld, peer, > pb); > > > > Thanks Xavier for the refactor. Now that it is cleaner, it reminds me of > a potential problem that when the peer DP doesn't need to be added to > local_datapaths, we may end up with the same crash because the peer is > added as peer of the pb but the pb is not added as peer of the peer (I am > sorry that this reads confusing). So when the peer is deleted, it won't > remove itself from the pb's peer, and pb's peer would be a dangling > pointer. I would be safe only if we make sure they are always added as > peers from both sides, or not added at all. However, if we move the below > local_datapath_peer_port_add(ld, pb, peer); under this if condition, it is > possible that when we do need the peer port information it is unavailable > from the local_datapath. I am going through all use cases of the peer port > structure before concluding. > > > Hi Xavier, > > After going through the use cases of the ld->peer_ports, for what I can > tell, the data is used by pinctrl.c and physical.c for DGPs when both the > LR and the LS are local, so I think we should move the below > "local_datapath_peer_port_add(ld, pb, peer);" into the above "if" > condition. Would you like to update with v3? > > Thanks, > Han > > > Han > > > > > } > > > local_datapath_peer_port_add(ld, pb, peer); > > > } > > > @@ -602,6 +588,7 @@ add_local_datapath__(struct ovsdb_idl_index > *sbrec_datapath_binding_by_key, > > > } > > > } > > > sbrec_port_binding_index_destroy_row(target); > > > + return ld; > > > } > > > > > > static struct tracked_datapath * > > > @@ -622,6 +609,11 @@ local_datapath_peer_port_add(struct > local_datapath *ld, > > > const struct sbrec_port_binding *local, > > > const struct sbrec_port_binding *remote) > > > { > > > + for (size_t i = 0; i < ld->n_peer_ports; i++) { > > > + if (ld->peer_ports[i].local == local) { > > > + return; > > > + } > > > + } > > > ld->n_peer_ports++; > > > if (ld->n_peer_ports > ld->n_allocated_peer_ports) { > > > size_t old_n_ports = ld->n_allocated_peer_ports; > > > diff --git a/tests/ovn.at b/tests/ovn.at > > > index bba2c9c1d..ae0918d55 100644 > > > --- a/tests/ovn.at > > > +++ b/tests/ovn.at > > > @@ -32580,3 +32580,33 @@ check ovn-nbctl --wait=hv sync > > > OVN_CLEANUP([hv1]) > > > AT_CLEANUP > > > ]) > > > + > > > +OVN_FOR_EACH_NORTHD([ > > > +AT_SETUP([router port add then remove - lsp first]) > > > +ovn_start > > > +net_add n1 > > > + > > > +sim_add hv1 > > > +as hv1 > > > +ovs-vsctl add-br br-phys > > > +ovn_attach n1 br-phys 192.168.0.1 > > > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > > > + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ > > > + options:tx_pcap=hv1/vif1-tx.pcap \ > > > + options:rxq_pcap=hv1/vif1-rx.pcap \ > > > + ofport-request=1 > > > + > > > +check ovn-nbctl ls-add sw0 > > > +check ovn-nbctl lr-add ro0 > > > +check ovn-nbctl lsp-add sw0 sw0-p1 > > > +check ovn-nbctl lsp-add sw0 lsp > > > +check ovn-nbctl lsp-set-type lsp router > > > +check ovn-nbctl lsp-set-options lsp router-port=lrp > > > +check ovn-nbctl lsp-set-addresses lsp 00:00:00:00:00:1 > > > +check ovn-nbctl --wait=hv lrp-add ro0 lrp 00:00:00:00:00:1 > aef0:0:0:0:0:0:0:1/64 > > > +check ovn-nbctl --wait=hv lsp-del lsp > > > +check ovn-nbctl lrp-del lrp > > > +check ovn-nbctl --wait=hv sync > > > +OVN_CLEANUP([hv1]) > > > +AT_CLEANUP > > > +]) > > > -- > > > 2.31.1 > > > > > > _______________________________________________ > > > dev mailing list > > > dev@openvswitch.org > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Fri, Aug 26, 2022 at 10:27 AM Xavier Simonart <xsimonar@redhat.com> wrote: > > Hi Han > > Agreed that v2 is not correct as we might have cases where the peers are not added symmetrically (I'll add a test case highlighting this), and we'll hit the same kind of issue/crash. > However, I am not sure that I can simply move "local_datapath_peer_port_add(ld, pb, peer);" into the above "if" (if I understood your proposal). If I do it, some other test cases start to fail, such as "ip-buffering" > Thanks Xavier, you are right! Sorry that I missed the peer_ports usage in run_buffered_binding(), which doesn't really need the peer port, but just uses the data structure to get the ports of type "patch" for convenience, and uses only the "local" part. So we can simply change that by lookup in the SB IDL using the sbrec_port_binding_by_datapath index. I do notice a similar use case of peer_ports in fill_ipv6_prefix_state(), but that is ok because it requires the peer to be local before calling the function. So I tried the below incremental patch which passed all tests. --------------------------------------------------------------------------- diff --git a/controller/local_data.c b/controller/local_data.c index 669e686ab..9eee568d1 100644 --- a/controller/local_data.c +++ b/controller/local_data.c @@ -581,8 +581,8 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, chassis, local_datapaths, tracked_datapaths); local_datapath_peer_port_add(peer_ld, peer, pb); + local_datapath_peer_port_add(ld, pb, peer); } - local_datapath_peer_port_add(ld, pb, peer); } } } diff --git a/controller/pinctrl.c b/controller/pinctrl.c index eeb6f7527..3f5d0af79 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -181,6 +181,7 @@ static void init_buffered_packets_map(void); static void destroy_buffered_packets_map(void); static void run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, + struct ovsdb_idl_index *sbrec_port_binding_by_datapath, const struct hmap *local_datapaths) OVS_REQUIRES(pinctrl_mutex); @@ -3584,6 +3585,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, sbrec_igmp_groups, sbrec_ip_multicast_opts); run_buffered_binding(sbrec_mac_binding_by_lport_ip, + sbrec_port_binding_by_datapath, local_datapaths); sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name, chassis); @@ -4354,6 +4356,7 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, static void run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, + struct ovsdb_idl_index *sbrec_port_binding_by_datapath, const struct hmap *local_datapaths) OVS_REQUIRES(pinctrl_mutex) { @@ -4369,9 +4372,15 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, continue; } - for (size_t i = 0; i < ld->n_peer_ports; i++) { - - const struct sbrec_port_binding *pb = ld->peer_ports[i].local; + struct sbrec_port_binding *target = + sbrec_port_binding_index_init_row(sbrec_port_binding_by_datapath); + sbrec_port_binding_index_set_datapath(target, ld->datapath); + const struct sbrec_port_binding *pb; + SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target, + sbrec_port_binding_by_datapath) { + if (strcmp(pb->type, "patch")) { + continue; + } struct buffered_packets *cur_qp; HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) { struct ds ip_s = DS_EMPTY_INITIALIZER; @@ -4388,6 +4397,7 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, ds_destroy(&ip_s); } } + sbrec_port_binding_index_destroy_row(target); } buffered_packets_map_gc(); ----------------------------------------------------------------------------------- Please let me know if this helps or if you see any other issues. Thanks, Han > I'll continue looking into this on Monday > Thanks > Xavier > > On Fri, Aug 26, 2022 at 7:44 AM Han Zhou <hzhou@ovn.org> wrote: >> >> >> >> On Thu, Aug 25, 2022 at 10:05 AM Han Zhou <zhouhan@gmail.com> wrote: >> > >> > >> > >> > On Thu, Aug 25, 2022 at 7:47 AM Xavier Simonart <xsimonar@redhat.com> wrote: >> > > >> > > If a logical switch port is added and connected to a logical router >> > > port (through options: router-port) before the router port is >> > > created, then this might cause further issues such as segmentation >> > > violation when the switch and router ports are deleted. >> > > >> > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> >> > > >> > > --- >> > > v2: handled Han's comments (avoid wasting CPU cycles searching for peer_ld) >> > > --- >> > > controller/local_data.c | 36 ++++++++++++++---------------------- >> > > tests/ovn.at | 30 ++++++++++++++++++++++++++++++ >> > > 2 files changed, 44 insertions(+), 22 deletions(-) >> > > >> > > diff --git a/controller/local_data.c b/controller/local_data.c >> > > index 7f874fc19..669e686ab 100644 >> > > --- a/controller/local_data.c >> > > +++ b/controller/local_data.c >> > > @@ -34,7 +34,7 @@ >> > > >> > > VLOG_DEFINE_THIS_MODULE(ldata); >> > > >> > > -static void add_local_datapath__( >> > > +static struct local_datapath *add_local_datapath__( >> > > struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, >> > > struct ovsdb_idl_index *sbrec_port_binding_by_name, >> > > @@ -194,17 +194,7 @@ add_local_datapath_peer_port( >> > > return; >> > > } >> > > >> > > - bool present = false; >> > > - for (size_t i = 0; i < ld->n_peer_ports; i++) { >> > > - if (ld->peer_ports[i].local == pb) { >> > > - present = true; >> > > - break; >> > > - } >> > > - } >> > > - >> > > - if (!present) { >> > > - local_datapath_peer_port_add(ld, pb, peer); >> > > - } >> > > + local_datapath_peer_port_add(ld, pb, peer); >> > > >> > > struct local_datapath *peer_ld = >> > > get_local_datapath(local_datapaths, >> > > @@ -218,12 +208,6 @@ add_local_datapath_peer_port( >> > > return; >> > > } >> > > >> > > - for (size_t i = 0; i < peer_ld->n_peer_ports; i++) { >> > > - if (peer_ld->peer_ports[i].local == peer) { >> > > - return; >> > > - } >> > > - } >> > > - >> > > local_datapath_peer_port_add(peer_ld, peer, pb); >> > > } >> > > >> > > @@ -541,7 +525,7 @@ chassis_tunnel_find(const struct hmap *chassis_tunnels, const char *chassis_id, >> > > } >> > > >> > > /* static functions. */ >> > > -static void >> > > +static struct local_datapath * >> > > add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > struct ovsdb_idl_index *sbrec_port_binding_by_datapath, >> > > struct ovsdb_idl_index *sbrec_port_binding_by_name, >> > > @@ -553,7 +537,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > uint32_t dp_key = dp->tunnel_key; >> > > struct local_datapath *ld = get_local_datapath(local_datapaths, dp_key); >> > > if (ld) { >> > > - return; >> > > + return ld; >> > > } >> > > >> > > ld = local_datapath_alloc(dp); >> > > @@ -568,7 +552,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > if (depth >= 100) { >> > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); >> > > VLOG_WARN_RL(&rl, "datapaths nested too deep"); >> > > - return; >> > > + return ld; >> > > } >> > > >> > > struct sbrec_port_binding *target = >> > > @@ -589,12 +573,14 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > if (peer && peer->datapath) { >> > > if (need_add_patch_peer_to_local( >> > > sbrec_port_binding_by_name, pb, chassis)) { >> > > - add_local_datapath__(sbrec_datapath_binding_by_key, >> > > + struct local_datapath *peer_ld = >> > > + add_local_datapath__(sbrec_datapath_binding_by_key, >> > > sbrec_port_binding_by_datapath, >> > > sbrec_port_binding_by_name, >> > > depth + 1, peer->datapath, >> > > chassis, local_datapaths, >> > > tracked_datapaths); >> > > + local_datapath_peer_port_add(peer_ld, peer, pb); >> > >> > Thanks Xavier for the refactor. Now that it is cleaner, it reminds me of a potential problem that when the peer DP doesn't need to be added to local_datapaths, we may end up with the same crash because the peer is added as peer of the pb but the pb is not added as peer of the peer (I am sorry that this reads confusing). So when the peer is deleted, it won't remove itself from the pb's peer, and pb's peer would be a dangling pointer. I would be safe only if we make sure they are always added as peers from both sides, or not added at all. However, if we move the below local_datapath_peer_port_add(ld, pb, peer); under this if condition, it is possible that when we do need the peer port information it is unavailable from the local_datapath. I am going through all use cases of the peer port structure before concluding. >> > >> Hi Xavier, >> >> After going through the use cases of the ld->peer_ports, for what I can tell, the data is used by pinctrl.c and physical.c for DGPs when both the LR and the LS are local, so I think we should move the below "local_datapath_peer_port_add(ld, pb, peer);" into the above "if" condition. Would you like to update with v3? >> >> Thanks, >> Han >> >> > Han >> > >> > > } >> > > local_datapath_peer_port_add(ld, pb, peer); >> > > } >> > > @@ -602,6 +588,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, >> > > } >> > > } >> > > sbrec_port_binding_index_destroy_row(target); >> > > + return ld; >> > > } >> > > >> > > static struct tracked_datapath * >> > > @@ -622,6 +609,11 @@ local_datapath_peer_port_add(struct local_datapath *ld, >> > > const struct sbrec_port_binding *local, >> > > const struct sbrec_port_binding *remote) >> > > { >> > > + for (size_t i = 0; i < ld->n_peer_ports; i++) { >> > > + if (ld->peer_ports[i].local == local) { >> > > + return; >> > > + } >> > > + } >> > > ld->n_peer_ports++; >> > > if (ld->n_peer_ports > ld->n_allocated_peer_ports) { >> > > size_t old_n_ports = ld->n_allocated_peer_ports; >> > > diff --git a/tests/ovn.at b/tests/ovn.at >> > > index bba2c9c1d..ae0918d55 100644 >> > > --- a/tests/ovn.at >> > > +++ b/tests/ovn.at >> > > @@ -32580,3 +32580,33 @@ check ovn-nbctl --wait=hv sync >> > > OVN_CLEANUP([hv1]) >> > > AT_CLEANUP >> > > ]) >> > > + >> > > +OVN_FOR_EACH_NORTHD([ >> > > +AT_SETUP([router port add then remove - lsp first]) >> > > +ovn_start >> > > +net_add n1 >> > > + >> > > +sim_add hv1 >> > > +as hv1 >> > > +ovs-vsctl add-br br-phys >> > > +ovn_attach n1 br-phys 192.168.0.1 >> > > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ >> > > + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ >> > > + options:tx_pcap=hv1/vif1-tx.pcap \ >> > > + options:rxq_pcap=hv1/vif1-rx.pcap \ >> > > + ofport-request=1 >> > > + >> > > +check ovn-nbctl ls-add sw0 >> > > +check ovn-nbctl lr-add ro0 >> > > +check ovn-nbctl lsp-add sw0 sw0-p1 >> > > +check ovn-nbctl lsp-add sw0 lsp >> > > +check ovn-nbctl lsp-set-type lsp router >> > > +check ovn-nbctl lsp-set-options lsp router-port=lrp >> > > +check ovn-nbctl lsp-set-addresses lsp 00:00:00:00:00:1 >> > > +check ovn-nbctl --wait=hv lrp-add ro0 lrp 00:00:00:00:00:1 aef0:0:0:0:0:0:0:1/64 >> > > +check ovn-nbctl --wait=hv lsp-del lsp >> > > +check ovn-nbctl lrp-del lrp >> > > +check ovn-nbctl --wait=hv sync >> > > +OVN_CLEANUP([hv1]) >> > > +AT_CLEANUP >> > > +]) >> > > -- >> > > 2.31.1 >> > > >> > > _______________________________________________ >> > > dev mailing list >> > > dev@openvswitch.org >> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/controller/local_data.c b/controller/local_data.c index 7f874fc19..669e686ab 100644 --- a/controller/local_data.c +++ b/controller/local_data.c @@ -34,7 +34,7 @@ VLOG_DEFINE_THIS_MODULE(ldata); -static void add_local_datapath__( +static struct local_datapath *add_local_datapath__( struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_datapath, struct ovsdb_idl_index *sbrec_port_binding_by_name, @@ -194,17 +194,7 @@ add_local_datapath_peer_port( return; } - bool present = false; - for (size_t i = 0; i < ld->n_peer_ports; i++) { - if (ld->peer_ports[i].local == pb) { - present = true; - break; - } - } - - if (!present) { - local_datapath_peer_port_add(ld, pb, peer); - } + local_datapath_peer_port_add(ld, pb, peer); struct local_datapath *peer_ld = get_local_datapath(local_datapaths, @@ -218,12 +208,6 @@ add_local_datapath_peer_port( return; } - for (size_t i = 0; i < peer_ld->n_peer_ports; i++) { - if (peer_ld->peer_ports[i].local == peer) { - return; - } - } - local_datapath_peer_port_add(peer_ld, peer, pb); } @@ -541,7 +525,7 @@ chassis_tunnel_find(const struct hmap *chassis_tunnels, const char *chassis_id, } /* static functions. */ -static void +static struct local_datapath * add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_datapath, struct ovsdb_idl_index *sbrec_port_binding_by_name, @@ -553,7 +537,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, uint32_t dp_key = dp->tunnel_key; struct local_datapath *ld = get_local_datapath(local_datapaths, dp_key); if (ld) { - return; + return ld; } ld = local_datapath_alloc(dp); @@ -568,7 +552,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, if (depth >= 100) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "datapaths nested too deep"); - return; + return ld; } struct sbrec_port_binding *target = @@ -589,12 +573,14 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, if (peer && peer->datapath) { if (need_add_patch_peer_to_local( sbrec_port_binding_by_name, pb, chassis)) { - add_local_datapath__(sbrec_datapath_binding_by_key, + struct local_datapath *peer_ld = + add_local_datapath__(sbrec_datapath_binding_by_key, sbrec_port_binding_by_datapath, sbrec_port_binding_by_name, depth + 1, peer->datapath, chassis, local_datapaths, tracked_datapaths); + local_datapath_peer_port_add(peer_ld, peer, pb); } local_datapath_peer_port_add(ld, pb, peer); } @@ -602,6 +588,7 @@ add_local_datapath__(struct ovsdb_idl_index *sbrec_datapath_binding_by_key, } } sbrec_port_binding_index_destroy_row(target); + return ld; } static struct tracked_datapath * @@ -622,6 +609,11 @@ local_datapath_peer_port_add(struct local_datapath *ld, const struct sbrec_port_binding *local, const struct sbrec_port_binding *remote) { + for (size_t i = 0; i < ld->n_peer_ports; i++) { + if (ld->peer_ports[i].local == local) { + return; + } + } ld->n_peer_ports++; if (ld->n_peer_ports > ld->n_allocated_peer_ports) { size_t old_n_ports = ld->n_allocated_peer_ports; diff --git a/tests/ovn.at b/tests/ovn.at index bba2c9c1d..ae0918d55 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -32580,3 +32580,33 @@ check ovn-nbctl --wait=hv sync OVN_CLEANUP([hv1]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([router port add then remove - lsp first]) +ovn_start +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + +check ovn-nbctl ls-add sw0 +check ovn-nbctl lr-add ro0 +check ovn-nbctl lsp-add sw0 sw0-p1 +check ovn-nbctl lsp-add sw0 lsp +check ovn-nbctl lsp-set-type lsp router +check ovn-nbctl lsp-set-options lsp router-port=lrp +check ovn-nbctl lsp-set-addresses lsp 00:00:00:00:00:1 +check ovn-nbctl --wait=hv lrp-add ro0 lrp 00:00:00:00:00:1 aef0:0:0:0:0:0:0:1/64 +check ovn-nbctl --wait=hv lsp-del lsp +check ovn-nbctl lrp-del lrp +check ovn-nbctl --wait=hv sync +OVN_CLEANUP([hv1]) +AT_CLEANUP +])
If a logical switch port is added and connected to a logical router port (through options: router-port) before the router port is created, then this might cause further issues such as segmentation violation when the switch and router ports are deleted. Signed-off-by: Xavier Simonart <xsimonar@redhat.com> --- v2: handled Han's comments (avoid wasting CPU cycles searching for peer_ld) --- controller/local_data.c | 36 ++++++++++++++---------------------- tests/ovn.at | 30 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 22 deletions(-)