Message ID | 20200907124320.830247-1-numans@ovn.org |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev] northd: Add lflows to send all pkts to conntrack if LB is configured on a lswitch. | expand |
On 9/7/20 2:43 PM, numans@ovn.org wrote: > From: Numan Siddique <numans@ovn.org> > > Prior to this patch, if a load balancer is configured on a logical switch but with no > ACLs with allow-related configured, then in the ingress pipeline only the packets > with ip.dst = VIP will be sent to conntrack using the zone id of the source logical port. > > If the backend of the load balancer, sends an invalid packet (for example invalid tcp > sequence number), then such packets will be delivered to the source logical port VIF > without unDNATting. This causes the source to reset the connection. > > This patch fixes this issue by sending all the packets to conntrack if a load balancer > is configured on the logical switch. Because of this, any invalid (ct.inv) packets will > be dropped in the ingress pipeline itself. > > Unfortunately this impacts the performance as now there will be extra recirculations > because of ct and ct_commit (for new connections) actions. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1870359 > Reported-by: Tim Rozet (trozet@redhat.com) > Signed-off-by: Numan Siddique <numans@ovn.org> Hi Numan, Overall the patch looks ok to me and sadly I think this is the only way (send everything to conntrack) to fix the issue reported in the bugzilla. I do have some minor comments below. > --- > northd/ovn-northd.8.xml | 14 +++--- > northd/ovn-northd.c | 99 +++++++++++++++++++++++++++-------------- > 2 files changed, 72 insertions(+), 41 deletions(-) > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index 989e3643b8..89711c5a6c 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -340,15 +340,12 @@ > it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic > to the next table. If load balancing rules with virtual IP addresses > (and ports) are configured in <code>OVN_Northbound</code> database for a > - logical switch datapath, a priority-100 flow is added for each configured > - virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is > - <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6 > - <var>VIPs</var>, the match is <code>ip && > - ip6.dst == <var>VIP</var></code>. The flow sets an action > + logical switch datapath, a priority-100 flow is added with the match > + <code>ip</code> to match on IP packets and sets the action > <code>reg0[0] = 1; next;</code> to act as a hint for table > <code>Pre-stateful</code> to send IP packets to the connection tracker > - for packet de-fragmentation before eventually advancing to ingress table > - <code>LB</code>. > + for packet de-fragmentation before eventually advancing to ingress > + table <code>LB</code>. > If controller_event has been enabled and load balancing rules with > empty backends have been added in <code>OVN_Northbound</code>, a 130 flow > is added to trigger ovn-controller events whenever the chassis receives a > @@ -430,7 +427,8 @@ > <p> > This table also contains a priority 0 flow with action > <code>next;</code>, so that ACLs allow packets by default. If the > - logical datapath has a statetful ACL, the following flows will > + logical datapath has a statetful ACL or a load balancer with VIP > + configured, the following flows will > also be added: Nit: this can go on the line above. > </p> > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 3de71612b8..c322558051 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, > free(action); > } > > +static bool > +has_lb_vip(struct ovn_datapath *od, struct hmap *lbs) > +{ > + for (int i = 0; i < od->nbs->n_load_balancer; i++) { > + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > + struct ovn_lb *lb = > + ovn_lb_find(lbs, &nb_lb->header_.uuid); > + ovs_assert(lb); > + > + if (lb->n_vips) { > + return true; > + } > + } > + > + return false; > +} > + > static void > build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > struct shash *meter_groups, struct hmap *lbs) > @@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > 110, lflows); > } > > - struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); > - struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); > bool vip_configured = false; > for (int i = 0; i < od->nbs->n_load_balancer; i++) { > struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > @@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > > for (size_t j = 0; j < lb->n_vips; j++) { > struct lb_vip *lb_vip = &lb->vips[j]; > - if (lb_vip->addr_family == AF_INET) { > - sset_add(&all_ips_v4, lb_vip->vip); > - } else { > - sset_add(&all_ips_v6, lb_vip->vip); > - } > - > build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb, > S_SWITCH_IN_PRE_LB, meter_groups); > > @@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > } > > /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send > - * packet to conntrack for defragmentation. */ > - const char *ip_address; > - SSET_FOR_EACH (ip_address, &all_ips_v4) { > - char *match = xasprintf("ip && ip4.dst == %s", ip_address); > - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > - free(match); > - } > - > - SSET_FOR_EACH (ip_address, &all_ips_v6) { > - char *match = xasprintf("ip && ip6.dst == %s", ip_address); > - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > - free(match); > - } > - > - sset_destroy(&all_ips_v4); > - sset_destroy(&all_ips_v6); > - > + * packet to conntrack for defragmentation. > + * > + * Send all the packets to conntrack in the ingress pipeline if the > + * logical switch has a load balancer with VIP configured. Earlier > + * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline > + * if the IP destination matches the VIP. But this causes few issues when > + * a logical switch has no ACLs configured with allow-related. > + * To understand the issue, lets a take a TCP load balancer - > + * 10.0.0.10:80=10.0.0.3:80. > + * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with > + * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1' > + * is sent to the p1's conntrack zone id and the packet is load balanced > + * to the backend - 10.0.0.3. For the reply packet from the backend lport, > + * it is not sent to the conntrack of backend lport's zone id. This is fine > + * as long as the packet is valid. Suppose the backend lport sends an > + * invalid TCP packet (like incorrect sequence number), the packet gets > + * delivered to the lport 'p1' without unDNATing the packet to the > + * VIP - 10.0.0.10. And this causes the connection to be reset by the > + * lport p1's VIF. > + * Thanks for thoroughly explaining the problem, it's really helpful! However, I think this would be more useful somewhere else, maybe in the man page where we describe the flow that sets REGBIT_CONNTRACK_DEFRAG? > + * We can't fix this issue by adding a logical flow to drop ct.inv packets > + * in the egress pipeline since it will drop all other connections not > + * destined to the load balancers. > + * > + * To fix this issue, we send all the packets to the conntrack in the > + * ingress pipeline if a load balancer is configured. We can now > + * add a lflow to drop ct.inv packets. > + */ > if (vip_configured) { > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > + 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, > 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > } > @@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs, > > static void > build_acls(struct ovn_datapath *od, struct hmap *lflows, > - struct hmap *port_groups) > + struct hmap *port_groups, struct hmap *lbs) > { > - bool has_stateful = has_stateful_acl(od); > + bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs); Nit: I would add parenthesis here: bool has_stateful = (has_stateful_acl(od) || has_lb_vip(od, lbs)); > > /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by > * default. A related rule at priority 1 is added below if there > @@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) { > } > > static void > -build_lb(struct ovn_datapath *od, struct hmap *lflows) > +build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) > { > /* Ingress and Egress LB Table (Priority 0): Packets are allowed by > * default. */ > ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); > > - if (od->nbs->load_balancer) { > + if (od->nbs->n_load_balancer) { > for (size_t i = 0; i < od->n_router_ports; i++) { > skip_port_from_conntrack(od, od->router_ports[i], > S_SWITCH_IN_LB, S_SWITCH_OUT_LB, > UINT16_MAX, lflows); > } > + } > + > + bool vip_configured = false; > + for (int i = 0; i < od->nbs->n_load_balancer; i++) { > + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > + struct ovn_lb *lb = > + ovn_lb_find(lbs, &nb_lb->header_.uuid); > + ovs_assert(lb); > + > + vip_configured = (vip_configured || lb->n_vips); > + } > + > + if (vip_configured) { We should use the new function you added, "has_lb_vip()". And almost the same code is duplicated in build_pre_lb() where we also call build_empty_lb_event_flow(). It would be nice to unify this a bit. Thanks, Dumitru > /* Ingress and Egress LB Table (Priority 65534). > * > * Send established traffic through conntrack for just NAT. */ > @@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, > build_pre_acls(od, lflows); > build_pre_lb(od, lflows, meter_groups, lbs); > build_pre_stateful(od, lflows); > - build_acls(od, lflows, port_groups); > + build_acls(od, lflows, port_groups, lbs); > build_qos(od, lflows); > - build_lb(od, lflows); > + build_lb(od, lflows, lbs); > build_stateful(od, lflows, lbs); > } > >
On 9/7/20 8:43 AM, numans@ovn.org wrote: > From: Numan Siddique <numans@ovn.org> > > Prior to this patch, if a load balancer is configured on a logical switch but with no > ACLs with allow-related configured, then in the ingress pipeline only the packets > with ip.dst = VIP will be sent to conntrack using the zone id of the source logical port. > > If the backend of the load balancer, sends an invalid packet (for example invalid tcp > sequence number), then such packets will be delivered to the source logical port VIF > without unDNATting. This causes the source to reset the connection. > > This patch fixes this issue by sending all the packets to conntrack if a load balancer > is configured on the logical switch. Because of this, any invalid (ct.inv) packets will > be dropped in the ingress pipeline itself. > > Unfortunately this impacts the performance as now there will be extra recirculations > because of ct and ct_commit (for new connections) actions. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1870359 > Reported-by: Tim Rozet (trozet@redhat.com) > Signed-off-by: Numan Siddique <numans@ovn.org> > --- > northd/ovn-northd.8.xml | 14 +++--- > northd/ovn-northd.c | 99 +++++++++++++++++++++++++++-------------- > 2 files changed, 72 insertions(+), 41 deletions(-) > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index 989e3643b8..89711c5a6c 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -340,15 +340,12 @@ > it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic > to the next table. If load balancing rules with virtual IP addresses > (and ports) are configured in <code>OVN_Northbound</code> database for a > - logical switch datapath, a priority-100 flow is added for each configured > - virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is > - <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6 > - <var>VIPs</var>, the match is <code>ip && > - ip6.dst == <var>VIP</var></code>. The flow sets an action > + logical switch datapath, a priority-100 flow is added with the match > + <code>ip</code> to match on IP packets and sets the action > <code>reg0[0] = 1; next;</code> to act as a hint for table > <code>Pre-stateful</code> to send IP packets to the connection tracker > - for packet de-fragmentation before eventually advancing to ingress table > - <code>LB</code>. > + for packet de-fragmentation before eventually advancing to ingress > + table <code>LB</code>. > If controller_event has been enabled and load balancing rules with > empty backends have been added in <code>OVN_Northbound</code>, a 130 flow > is added to trigger ovn-controller events whenever the chassis receives a > @@ -430,7 +427,8 @@ > <p> > This table also contains a priority 0 flow with action > <code>next;</code>, so that ACLs allow packets by default. If the > - logical datapath has a statetful ACL, the following flows will > + logical datapath has a statetful ACL or a load balancer with VIP > + configured, the following flows will > also be added: > </p> > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 3de71612b8..c322558051 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, > free(action); > } > > +static bool > +has_lb_vip(struct ovn_datapath *od, struct hmap *lbs) > +{ > + for (int i = 0; i < od->nbs->n_load_balancer; i++) { > + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > + struct ovn_lb *lb = > + ovn_lb_find(lbs, &nb_lb->header_.uuid); > + ovs_assert(lb); > + > + if (lb->n_vips) { > + return true; > + } I'm not sure why you need to look up the ovn_lb here. Why not just return true if smap_count(&nb_lb->vips) > 0 ? lb is guaranteed to be found in the lbs hmap, and lb->n_vips is always set to smap_count(&nb_lb->vips) (see ovn_lb_create). > + } > + > + return false; > +} > + > static void > build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > struct shash *meter_groups, struct hmap *lbs) > @@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > 110, lflows); > } > > - struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); > - struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); > bool vip_configured = false; > for (int i = 0; i < od->nbs->n_load_balancer; i++) { > struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > @@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > > for (size_t j = 0; j < lb->n_vips; j++) { > struct lb_vip *lb_vip = &lb->vips[j]; > - if (lb_vip->addr_family == AF_INET) { > - sset_add(&all_ips_v4, lb_vip->vip); > - } else { > - sset_add(&all_ips_v6, lb_vip->vip); > - } > - > build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb, > S_SWITCH_IN_PRE_LB, meter_groups); > > @@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, > } > > /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send > - * packet to conntrack for defragmentation. */ > - const char *ip_address; > - SSET_FOR_EACH (ip_address, &all_ips_v4) { > - char *match = xasprintf("ip && ip4.dst == %s", ip_address); > - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > - free(match); > - } > - > - SSET_FOR_EACH (ip_address, &all_ips_v6) { > - char *match = xasprintf("ip && ip6.dst == %s", ip_address); > - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > - free(match); > - } > - > - sset_destroy(&all_ips_v4); > - sset_destroy(&all_ips_v6); > - > + * packet to conntrack for defragmentation. > + * > + * Send all the packets to conntrack in the ingress pipeline if the > + * logical switch has a load balancer with VIP configured. Earlier > + * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline > + * if the IP destination matches the VIP. But this causes few issues when > + * a logical switch has no ACLs configured with allow-related. > + * To understand the issue, lets a take a TCP load balancer - > + * 10.0.0.10:80=10.0.0.3:80. > + * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with > + * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1' > + * is sent to the p1's conntrack zone id and the packet is load balanced > + * to the backend - 10.0.0.3. For the reply packet from the backend lport, > + * it is not sent to the conntrack of backend lport's zone id. This is fine > + * as long as the packet is valid. Suppose the backend lport sends an > + * invalid TCP packet (like incorrect sequence number), the packet gets > + * delivered to the lport 'p1' without unDNATing the packet to the > + * VIP - 10.0.0.10. And this causes the connection to be reset by the > + * lport p1's VIF. > + * > + * We can't fix this issue by adding a logical flow to drop ct.inv packets > + * in the egress pipeline since it will drop all other connections not > + * destined to the load balancers. > + * > + * To fix this issue, we send all the packets to the conntrack in the > + * ingress pipeline if a load balancer is configured. We can now > + * add a lflow to drop ct.inv packets. > + */ > if (vip_configured) { > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, > + 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, > 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > } > @@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs, > > static void > build_acls(struct ovn_datapath *od, struct hmap *lflows, > - struct hmap *port_groups) > + struct hmap *port_groups, struct hmap *lbs) > { > - bool has_stateful = has_stateful_acl(od); > + bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs); > > /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by > * default. A related rule at priority 1 is added below if there > @@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) { > } > > static void > -build_lb(struct ovn_datapath *od, struct hmap *lflows) > +build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) > { > /* Ingress and Egress LB Table (Priority 0): Packets are allowed by > * default. */ > ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); > > - if (od->nbs->load_balancer) { > + if (od->nbs->n_load_balancer) { > for (size_t i = 0; i < od->n_router_ports; i++) { > skip_port_from_conntrack(od, od->router_ports[i], > S_SWITCH_IN_LB, S_SWITCH_OUT_LB, > UINT16_MAX, lflows); > } > + } > + > + bool vip_configured = false; > + for (int i = 0; i < od->nbs->n_load_balancer; i++) { > + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; > + struct ovn_lb *lb = > + ovn_lb_find(lbs, &nb_lb->header_.uuid); > + ovs_assert(lb); > + > + vip_configured = (vip_configured || lb->n_vips); As Dumitru stated, you can use the new has_lb_vip() function here, but it's another instance where ovn_lb_find() seems unnecessary. > + } > + > + if (vip_configured) { > /* Ingress and Egress LB Table (Priority 65534). > * > * Send established traffic through conntrack for just NAT. */ > @@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, > build_pre_acls(od, lflows); > build_pre_lb(od, lflows, meter_groups, lbs); > build_pre_stateful(od, lflows); > - build_acls(od, lflows, port_groups); > + build_acls(od, lflows, port_groups, lbs); > build_qos(od, lflows); > - build_lb(od, lflows); > + build_lb(od, lflows, lbs); > build_stateful(od, lflows, lbs); > } > >
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 989e3643b8..89711c5a6c 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -340,15 +340,12 @@ it contains a priority-110 flow to move IPv6 Neighbor Discovery traffic to the next table. If load balancing rules with virtual IP addresses (and ports) are configured in <code>OVN_Northbound</code> database for a - logical switch datapath, a priority-100 flow is added for each configured - virtual IP address <var>VIP</var>. For IPv4 <var>VIPs</var>, the match is - <code>ip && ip4.dst == <var>VIP</var></code>. For IPv6 - <var>VIPs</var>, the match is <code>ip && - ip6.dst == <var>VIP</var></code>. The flow sets an action + logical switch datapath, a priority-100 flow is added with the match + <code>ip</code> to match on IP packets and sets the action <code>reg0[0] = 1; next;</code> to act as a hint for table <code>Pre-stateful</code> to send IP packets to the connection tracker - for packet de-fragmentation before eventually advancing to ingress table - <code>LB</code>. + for packet de-fragmentation before eventually advancing to ingress + table <code>LB</code>. If controller_event has been enabled and load balancing rules with empty backends have been added in <code>OVN_Northbound</code>, a 130 flow is added to trigger ovn-controller events whenever the chassis receives a @@ -430,7 +427,8 @@ <p> This table also contains a priority 0 flow with action <code>next;</code>, so that ACLs allow packets by default. If the - logical datapath has a statetful ACL, the following flows will + logical datapath has a statetful ACL or a load balancer with VIP + configured, the following flows will also be added: </p> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 3de71612b8..c322558051 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -5031,6 +5031,23 @@ build_empty_lb_event_flow(struct ovn_datapath *od, struct hmap *lflows, free(action); } +static bool +has_lb_vip(struct ovn_datapath *od, struct hmap *lbs) +{ + for (int i = 0; i < od->nbs->n_load_balancer; i++) { + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; + struct ovn_lb *lb = + ovn_lb_find(lbs, &nb_lb->header_.uuid); + ovs_assert(lb); + + if (lb->n_vips) { + return true; + } + } + + return false; +} + static void build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, struct shash *meter_groups, struct hmap *lbs) @@ -5069,8 +5086,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, 110, lflows); } - struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4); - struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6); bool vip_configured = false; for (int i = 0; i < od->nbs->n_load_balancer; i++) { struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; @@ -5080,12 +5095,6 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, for (size_t j = 0; j < lb->n_vips; j++) { struct lb_vip *lb_vip = &lb->vips[j]; - if (lb_vip->addr_family == AF_INET) { - sset_add(&all_ips_v4, lb_vip->vip); - } else { - sset_add(&all_ips_v6, lb_vip->vip); - } - build_empty_lb_event_flow(od, lflows, lb_vip, nb_lb, S_SWITCH_IN_PRE_LB, meter_groups); @@ -5099,26 +5108,37 @@ build_pre_lb(struct ovn_datapath *od, struct hmap *lflows, } /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send - * packet to conntrack for defragmentation. */ - const char *ip_address; - SSET_FOR_EACH (ip_address, &all_ips_v4) { - char *match = xasprintf("ip && ip4.dst == %s", ip_address); - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); - free(match); - } - - SSET_FOR_EACH (ip_address, &all_ips_v6) { - char *match = xasprintf("ip && ip6.dst == %s", ip_address); - ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, - 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); - free(match); - } - - sset_destroy(&all_ips_v4); - sset_destroy(&all_ips_v6); - + * packet to conntrack for defragmentation. + * + * Send all the packets to conntrack in the ingress pipeline if the + * logical switch has a load balancer with VIP configured. Earlier + * we used to set the REGBIT_CONNTRACK_DEFRAG flag in the ingress pipeline + * if the IP destination matches the VIP. But this causes few issues when + * a logical switch has no ACLs configured with allow-related. + * To understand the issue, lets a take a TCP load balancer - + * 10.0.0.10:80=10.0.0.3:80. + * If a logical port - p1 with IP - 10.0.0.5 opens a TCP connection with + * the VIP - 10.0.0.10, then the packet in the ingress pipeline of 'p1' + * is sent to the p1's conntrack zone id and the packet is load balanced + * to the backend - 10.0.0.3. For the reply packet from the backend lport, + * it is not sent to the conntrack of backend lport's zone id. This is fine + * as long as the packet is valid. Suppose the backend lport sends an + * invalid TCP packet (like incorrect sequence number), the packet gets + * delivered to the lport 'p1' without unDNATing the packet to the + * VIP - 10.0.0.10. And this causes the connection to be reset by the + * lport p1's VIF. + * + * We can't fix this issue by adding a logical flow to drop ct.inv packets + * in the egress pipeline since it will drop all other connections not + * destined to the load balancers. + * + * To fix this issue, we send all the packets to the conntrack in the + * ingress pipeline if a load balancer is configured. We can now + * add a lflow to drop ct.inv packets. + */ if (vip_configured) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, + 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); } @@ -5477,9 +5497,9 @@ build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs, static void build_acls(struct ovn_datapath *od, struct hmap *lflows, - struct hmap *port_groups) + struct hmap *port_groups, struct hmap *lbs) { - bool has_stateful = has_stateful_acl(od); + bool has_stateful = has_stateful_acl(od) || has_lb_vip(od, lbs); /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by * default. A related rule at priority 1 is added below if there @@ -5736,19 +5756,32 @@ build_qos(struct ovn_datapath *od, struct hmap *lflows) { } static void -build_lb(struct ovn_datapath *od, struct hmap *lflows) +build_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs) { /* Ingress and Egress LB Table (Priority 0): Packets are allowed by * default. */ ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); - if (od->nbs->load_balancer) { + if (od->nbs->n_load_balancer) { for (size_t i = 0; i < od->n_router_ports; i++) { skip_port_from_conntrack(od, od->router_ports[i], S_SWITCH_IN_LB, S_SWITCH_OUT_LB, UINT16_MAX, lflows); } + } + + bool vip_configured = false; + for (int i = 0; i < od->nbs->n_load_balancer; i++) { + struct nbrec_load_balancer *nb_lb = od->nbs->load_balancer[i]; + struct ovn_lb *lb = + ovn_lb_find(lbs, &nb_lb->header_.uuid); + ovs_assert(lb); + + vip_configured = (vip_configured || lb->n_vips); + } + + if (vip_configured) { /* Ingress and Egress LB Table (Priority 65534). * * Send established traffic through conntrack for just NAT. */ @@ -6622,9 +6655,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, build_pre_acls(od, lflows); build_pre_lb(od, lflows, meter_groups, lbs); build_pre_stateful(od, lflows); - build_acls(od, lflows, port_groups); + build_acls(od, lflows, port_groups, lbs); build_qos(od, lflows); - build_lb(od, lflows); + build_lb(od, lflows, lbs); build_stateful(od, lflows, lbs); }