Message ID | 20221212092743.25556-1-venugopali@nvidia.com |
---|---|
State | Accepted |
Headers | show |
Series | [ovs-dev,v2] northd: bypass connection tracking for stateless flows when there are LB flows present | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | warning | apply and check: warning |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
On Mon, Dec 12, 2022 at 1:28 AM venu iyer <venugopali@nvidia.com> wrote: > > Currently, even stateless flows are subject to connection tracking when there are > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then it shouldn't > be configured as stateless. > > Stateless flow means we should not track it, and this change exempts stateless > flows from being tracked regardless of whether LB rules are present or not. > > Signed-off-by: venu iyer <venugopali@nvidia.com> > Acked-by: Han Zhou <hzhou@ovn.org> > --- > northd/northd.c | 25 +++- > northd/ovn-northd.8.xml | 57 ++++---- > ovn-nb.xml | 3 + > tests/ovn-northd.at | 76 +++++------ > tests/ovn.at | 4 +- > tests/system-ovn.at | 296 ++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 383 insertions(+), 78 deletions(-) > > diff --git a/northd/northd.c b/northd/northd.c > index 7c48bb3b4..5d8ef612f 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -140,8 +140,8 @@ enum ovn_stage { > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") \ > \ > /* Logical switch egress stages. */ \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ > @@ -215,6 +215,7 @@ enum ovn_stage { > #define REGBIT_ACL_LABEL "reg0[13]" > #define REGBIT_FROM_RAMP "reg0[14]" > #define REGBIT_PORT_SEC_DROP "reg0[15]" > +#define REGBIT_ACL_STATELESS "reg0[16]" > > #define REG_ORIG_DIP_IPV4 "reg1" > #define REG_ORIG_DIP_IPV6 "xxreg1" > @@ -290,7 +291,7 @@ enum ovn_stage { > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | > - * | | REGBIT_ACL_LABEL | X | | > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | > * +----+----------------------------------------------+ X | | > * | R5 | UNUSED | X | LB_L2_AFF_BACKEND_IP6 | > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | > @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, > const struct nbrec_acl *acl, > struct hmap *lflows) > { > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > if (!strcmp(acl->direction, "from-lport")) { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } else { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } > } > @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > + } else if (od->has_lb_vip) { > + /* We'll build stateless filters if there are LB rules so that > + * the stateless flows are not tracked in pre-lb. */ > + build_stateless_filters(od, port_groups, lflows); > } > } > > @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, > 110, lflows); > } > > + /* Do not sent statless flows via conntrack */ > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > + > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > * packet to conntrack for defragmentation and possibly for unNATting. > * > @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, > } > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > + ip_match, lb_vip->vip_str); > if (lb_vip->port_str) { > ds_put_format(match, " && %s.dst == %s", proto, lb_vip->port_str); > } > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index dffbba96d..ce5603169 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -474,7 +474,9 @@ > priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > "allow-stateless" ACLs, a flow is added to bypass setting the hint for > - connection tracker processing. > + connection tracker processing when there are stateful ACLs or LB rules; > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching stateless > + ACL flows. > </p> > > <p> > @@ -494,8 +496,10 @@ > in ingress table <code>LB</code> and <code>Stateful</code>. It contains > a priority-0 flow that simply moves traffic to the next table. Moreover > it contains two priority-110 flows to move multicast, IPv6 Neighbor > - Discovery and MLD traffic to the next table. If load balancing rules with > - virtual IP addresses (and ports) are configured in > + Discovery and MLD traffic to the next table. It also contains two > + priority-110 flows to move stateless traffic, i.e traffic for which > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load > + balancing rules with virtual IP addresses (and ports) are configured in > <code>OVN_Northbound</code> database for a logical switch datapath, a > priority-100 flow is added with the match <code>ip</code> to match on IP > packets and sets the action <code>reg0[2] = 1; next;</code> to act as a > @@ -1973,19 +1977,11 @@ output; > </li> > </ul> > > - <h3>Egress Table 0: Pre-LB</h3> > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > <p> > - This table is similar to ingress table <code>Pre-LB</code>. It > - contains a priority-0 flow that simply moves traffic to the next table. > - Moreover it contains two priority-110 flows to move multicast, IPv6 > - Neighbor Discovery and MLD traffic to the next table. If any load > - balancing rules exist for the datapath, a priority-100 flow is added with > - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > - to act as a hint for table <code>Pre-stateful</code> to send IP packets > - to the connection tracker for packet de-fragmentation and possibly DNAT > - the destination VIP to one of the selected backend for already committed > - load balanced traffic. > + This is similar to ingress table <code>Pre-ACLs</code> except for > + <code>to-lport</code> traffic. > </p> > > <p> > @@ -1998,11 +1994,29 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > + <p> > + This table also has a priority-110 flow with the match > + <code>outport == <var>I</var></code> for all logical switch > + datapaths to move traffic to the next table. Where <var>I</var> > + is the peer of a logical router port. This flow is added to > + skip the connection tracking of packets which will be entering > + logical router datapath from logical switch datapath for routing. > + </p> > + > + > + <h3>Egress Table 1: Pre-LB</h3> > > <p> > - This is similar to ingress table <code>Pre-ACLs</code> except for > - <code>to-lport</code> traffic. > + This table is similar to ingress table <code>Pre-LB</code>. It > + contains a priority-0 flow that simply moves traffic to the next table. > + Moreover it contains two priority-110 flows to move multicast, IPv6 > + Neighbor Discovery and MLD traffic to the next table. If any load > + balancing rules exist for the datapath, a priority-100 flow is added with > + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > + to act as a hint for table <code>Pre-stateful</code> to send IP packets > + to the connection tracker for packet de-fragmentation and possibly DNAT > + the destination VIP to one of the selected backend for already committed > + load balanced traffic. > </p> > > <p> > @@ -2015,15 +2029,6 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <p> > - This table also has a priority-110 flow with the match > - <code>outport == <var>I</var></code> for all logical switch > - datapaths to move traffic to the next table. Where <var>I</var> > - is the peer of a logical router port. This flow is added to > - skip the connection tracking of packets which will be entering > - logical router datapath from logical switch datapath for routing. > - </p> > - > <h3>Egress Table 2: Pre-stateful</h3> > > <p> > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 0edc3da96..3ac7785e1 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -2159,6 +2159,9 @@ or > outgoing TCP traffic directed to an IP address, then you probably > also want to define another rule to allow incoming TCP traffic coming > from this same IP address. > + In addition, traffic that matches stateless ACLs will bypass > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > + used instead if the traffic is supposed to be load-balanced. > </li> > > <li> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index ca4263eac..a2f8e8a20 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -2024,7 +2024,7 @@ AT_CLEANUP > > # This test case tests that when a logical switch has load balancers associated > # (with VIPs configured), the below logical flow is added by ovn-northd. > -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) > +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) > # This test case is added for the BZ - > # https://bugzilla.redhat.com/show_bug.cgi?id=1849162 > # > @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl ls-lb-add sw0 lb2 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl clear load_balancer $lb3 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb4 vips > @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" > > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > # Now reverse the order of clearing the vip. > @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl clear load_balancer $lb4 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb3 vips > @@ -3057,18 +3057,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -3201,18 +3193,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -4026,14 +4010,15 @@ check_stateful_flows() { > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl > table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > ]) > > AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl > @@ -4049,12 +4034,13 @@ check_stateful_flows() { > ]) > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed 's/table=./table=?/'], [0], > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl > @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | sed 's/table=../table=??/'], [ > ]) > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) > ]) > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > diff --git a/tests/ovn.at b/tests/ovn.at > index f3bd53242..7abded46c 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( > [ovn-sbctl dump-flows > sbflows > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, > [dnl > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > ]) > > @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > AT_CHECK( > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ > sed 's/table=../table=??/'], [0], [dnl > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > ]) > > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index b99578b9e..4bc9fb84f 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], [ignore], [ignore]) > > AT_CLEANUP > ]) > + > +# for packets that match stateless ACL flows, make sure we bypass > +# connection tracking, even with a LB in the switch. Testing for > +# TCP should suffice. For v4 and v6. > +# > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([omit connection tracking for stateless flows v4]) > + > +CHECK_CONNTRACK() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), > +# > +# foo -- R1 -- bar > + > +ovn-nbctl lr-add R1 > + > +ovn-nbctl ls-add foo > +ovn-nbctl ls-add bar > + > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 > + > +# Connect foo to R1 > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > + type=router options:router-port=foo \ > + -- lsp-set-addresses rp-foo router > + > +# Connect bar to R1 > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > + type=router options:router-port=bar \ > + -- lsp-set-addresses rp-bar router > + > +# Logical port 'foo1' in switch 'foo'. > +ADD_NAMESPACES(foo1) > +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > + "192.168.1.1") > +ovn-nbctl lsp-add foo foo1 \ > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" > + > +# Logical port 'bar1' in switch 'bar'. > +ADD_NAMESPACES(bar1) > +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ > + "192.168.2.1") > +ovn-nbctl lsp-add bar bar1 \ > +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" > + > +# Config OVN load-balancer with a VIP. > +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=192.168.2.2:80)']) > + > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) > + > +OVS_START_L7([bar1], [http]) > + > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(192.168.1.2) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# now check wirh VIP > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(30.30.30.30) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# remove lb > +ovn-nbctl ls-lb-del foo lb1 > + > +# add stateless acl > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > + > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(192.168.1.2) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# add lb back > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=192.168.2.2:80)']) > + > +# should not dnat so will not be able to connect > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(30.30.30.30) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([omit connection tracking for stateless flows v6]) > + > +CHECK_CONNTRACK() > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > + > +# Set external-ids in br-int needed for ovn-controller > +ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +# Logical network: > +# One LR - R1 with switchess foo (fd11::/64) and > +# bar (fd12::/64) connected to it > +# > +# foo -- R1 -- bar > + > +ovn-nbctl lr-add R1 > + > +ovn-nbctl ls-add foo > +ovn-nbctl ls-add bar > + > +# Connect foo to R1 > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" > + > +# Connect bar to R1 > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" > + > +# Logical port 'foo1' in switch 'foo'. > +ADD_NAMESPACES(foo1) > +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ > + "fd11::1") > +ovn-nbctl lsp-add foo foo1 \ > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" > + > +# Logical port 'bar1' in switch 'bar'. > +ADD_NAMESPACES(bar1) > +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ > +"fd12::1") > +ovn-nbctl lsp-add bar bar1 \ > +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" > + > +# Config OVN load-balancer with a VIP. > +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=\[[fd12::2\]]:80)']) > + > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) > + > +OVS_START_L7([bar1], [http6]) > + > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# now check wirh VIP > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# remove lb > +ovn-nbctl ls-lb-del foo lb1 > + > +# add stateless acl > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > + > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) > + > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +# add lb back > +ovn-nbctl ls-lb-add foo lb1 > + > +# Wait for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > +grep 'nat(dst=\[[fd12::2\]]:80)']) > + > +# should not dnat so will not be able to connect > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], [ignore], [ignore]) > +# > +# check conntrack zone has no tcp entry > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > + > +as ovn-sb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as ovn-nb > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > + > +as northd > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > + > +as > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > -- > 2.17.1 > Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that Numan's comment was addressed. So, applied to main branch. Han
On Thu, Dec 15, 2022 at 4:38 PM Han Zhou <hzhou@ovn.org> wrote: > > On Mon, Dec 12, 2022 at 1:28 AM venu iyer <venugopali@nvidia.com> wrote: > > > > Currently, even stateless flows are subject to connection tracking when > there are > > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then > it shouldn't > > be configured as stateless. > > > > Stateless flow means we should not track it, and this change exempts > stateless > > flows from being tracked regardless of whether LB rules are present or > not. > > > > Signed-off-by: venu iyer <venugopali@nvidia.com> > > Acked-by: Han Zhou <hzhou@ovn.org> > > --- > > northd/northd.c | 25 +++- > > northd/ovn-northd.8.xml | 57 ++++---- > > ovn-nb.xml | 3 + > > tests/ovn-northd.at | 76 +++++------ > > tests/ovn.at | 4 +- > > tests/system-ovn.at | 296 ++++++++++++++++++++++++++++++++++++++++ > > 6 files changed, 383 insertions(+), 78 deletions(-) > > > > diff --git a/northd/northd.c b/northd/northd.c > > index 7c48bb3b4..5d8ef612f 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -140,8 +140,8 @@ enum ovn_stage { > > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") > \ > > > \ > > /* Logical switch egress stages. */ > \ > > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") > \ > > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") > \ > > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") > \ > > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") > \ > > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") > \ > > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") > \ > > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") > \ > > @@ -215,6 +215,7 @@ enum ovn_stage { > > #define REGBIT_ACL_LABEL "reg0[13]" > > #define REGBIT_FROM_RAMP "reg0[14]" > > #define REGBIT_PORT_SEC_DROP "reg0[15]" > > +#define REGBIT_ACL_STATELESS "reg0[16]" > > > > #define REG_ORIG_DIP_IPV4 "reg1" > > #define REG_ORIG_DIP_IPV6 "xxreg1" > > @@ -290,7 +291,7 @@ enum ovn_stage { > > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | > | > > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | > | > > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | > | > > - * | | REGBIT_ACL_LABEL | X | > | > > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | > | > > * +----+----------------------------------------------+ X | > | > > * | R5 | UNUSED | X | > LB_L2_AFF_BACKEND_IP6 | > > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | > | > > @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, > > const struct nbrec_acl *acl, > > struct hmap *lflows) > > { > > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > > if (!strcmp(acl->direction, "from-lport")) { > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > > acl->priority + OVN_ACL_PRI_OFFSET, > > acl->match, > > - "next;", > > + action, > > &acl->header_); > > } else { > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > > acl->priority + OVN_ACL_PRI_OFFSET, > > acl->match, > > - "next;", > > + action, > > &acl->header_); > > } > > } > > @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const > struct hmap *port_groups, > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > + } else if (od->has_lb_vip) { > > + /* We'll build stateless filters if there are LB rules so that > > + * the stateless flows are not tracked in pre-lb. */ > > + build_stateless_filters(od, port_groups, lflows); > > } > > } > > > > @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct > shash *meter_groups, > > 110, lflows); > > } > > > > + /* Do not sent statless flows via conntrack */ > > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > + > > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > > * packet to conntrack for defragmentation and possibly for > unNATting. > > * > > @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, > struct ovn_northd_lb *lb, > > } > > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : > "ct_lb"); > > > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > > + ip_match, lb_vip->vip_str); > > if (lb_vip->port_str) { > > ds_put_format(match, " && %s.dst == %s", proto, > lb_vip->port_str); > > } > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > > index dffbba96d..ce5603169 100644 > > --- a/northd/ovn-northd.8.xml > > +++ b/northd/ovn-northd.8.xml > > @@ -474,7 +474,9 @@ > > priority-110 flow is added to skip over stateful ACLs. Multicast, > IPv6 > > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > > "allow-stateless" ACLs, a flow is added to bypass setting the hint > for > > - connection tracker processing. > > + connection tracker processing when there are stateful ACLs or LB > rules; > > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching > stateless > > + ACL flows. > > </p> > > > > <p> > > @@ -494,8 +496,10 @@ > > in ingress table <code>LB</code> and <code>Stateful</code>. It > contains > > a priority-0 flow that simply moves traffic to the next table. > Moreover > > it contains two priority-110 flows to move multicast, IPv6 Neighbor > > - Discovery and MLD traffic to the next table. If load balancing > rules with > > - virtual IP addresses (and ports) are configured in > > + Discovery and MLD traffic to the next table. It also contains two > > + priority-110 flows to move stateless traffic, i.e traffic for which > > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If > load > > + balancing rules with virtual IP addresses (and ports) are > configured in > > <code>OVN_Northbound</code> database for a logical switch > datapath, a > > priority-100 flow is added with the match <code>ip</code> to match > on IP > > packets and sets the action <code>reg0[2] = 1; next;</code> to act > as a > > @@ -1973,19 +1977,11 @@ output; > > </li> > > </ul> > > > > - <h3>Egress Table 0: Pre-LB</h3> > > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > > > <p> > > - This table is similar to ingress table <code>Pre-LB</code>. It > > - contains a priority-0 flow that simply moves traffic to the next > table. > > - Moreover it contains two priority-110 flows to move multicast, IPv6 > > - Neighbor Discovery and MLD traffic to the next table. If any load > > - balancing rules exist for the datapath, a priority-100 flow is > added with > > - a match of <code>ip</code> and action of <code>reg0[2] = 1; > next;</code> > > - to act as a hint for table <code>Pre-stateful</code> to send IP > packets > > - to the connection tracker for packet de-fragmentation and possibly > DNAT > > - the destination VIP to one of the selected backend for already > committed > > - load balanced traffic. > > + This is similar to ingress table <code>Pre-ACLs</code> except for > > + <code>to-lport</code> traffic. > > </p> > > > > <p> > > @@ -1998,11 +1994,29 @@ output; > > db="OVN_Northbound"/> table. > > </p> > > > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > > + <p> > > + This table also has a priority-110 flow with the match > > + <code>outport == <var>I</var></code> for all logical switch > > + datapaths to move traffic to the next table. Where <var>I</var> > > + is the peer of a logical router port. This flow is added to > > + skip the connection tracking of packets which will be entering > > + logical router datapath from logical switch datapath for routing. > > + </p> > > + > > + > > + <h3>Egress Table 1: Pre-LB</h3> > > > > <p> > > - This is similar to ingress table <code>Pre-ACLs</code> except for > > - <code>to-lport</code> traffic. > > + This table is similar to ingress table <code>Pre-LB</code>. It > > + contains a priority-0 flow that simply moves traffic to the next > table. > > + Moreover it contains two priority-110 flows to move multicast, IPv6 > > + Neighbor Discovery and MLD traffic to the next table. If any load > > + balancing rules exist for the datapath, a priority-100 flow is > added with > > + a match of <code>ip</code> and action of <code>reg0[2] = 1; > next;</code> > > + to act as a hint for table <code>Pre-stateful</code> to send IP > packets > > + to the connection tracker for packet de-fragmentation and possibly > DNAT > > + the destination VIP to one of the selected backend for already > committed > > + load balanced traffic. > > </p> > > > > <p> > > @@ -2015,15 +2029,6 @@ output; > > db="OVN_Northbound"/> table. > > </p> > > > > - <p> > > - This table also has a priority-110 flow with the match > > - <code>outport == <var>I</var></code> for all logical switch > > - datapaths to move traffic to the next table. Where <var>I</var> > > - is the peer of a logical router port. This flow is added to > > - skip the connection tracking of packets which will be entering > > - logical router datapath from logical switch datapath for routing. > > - </p> > > - > > <h3>Egress Table 2: Pre-stateful</h3> > > > > <p> > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > index 0edc3da96..3ac7785e1 100644 > > --- a/ovn-nb.xml > > +++ b/ovn-nb.xml > > @@ -2159,6 +2159,9 @@ or > > outgoing TCP traffic directed to an IP address, then you > probably > > also want to define another rule to allow incoming TCP traffic > coming > > from this same IP address. > > + In addition, traffic that matches stateless ACLs will bypass > > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > > + used instead if the traffic is supposed to be load-balanced. > > </li> > > > > <li> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index ca4263eac..a2f8e8a20 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -2024,7 +2024,7 @@ AT_CLEANUP > > > > # This test case tests that when a logical switch has load balancers > associated > > # (with VIPs configured), the below logical flow is added by ovn-northd. > > -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[0]] = 1; next;) > > +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[0]] = 1; next;) > > # This test case is added for the BZ - > > # https://bugzilla.redhat.com/show_bug.cgi?id=1849162 > > # > > @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl ls-lb-add sw0 lb2 > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb1 vips > > check ovn-nbctl clear load_balancer $lb3 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb2 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb4 vips > > @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 > vips:"10.0.0.13"="10.0.0.6" > > > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > # Now reverse the order of clearing the vip. > > @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > > check ovn-nbctl clear load_balancer $lb4 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb1 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb3 vips > > @@ -3057,18 +3057,10 @@ for direction in from to; do > > done > > ovn-nbctl --wait=sb sync > > > > -# TCP packets should go to conntrack for load balancing. > > +# TCP packets should not go to conntrack for load balancing. > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > [0], [dnl > > -ct_lb_mark { > > - ct_lb_mark { > > - reg0[[6]] = 0; > > - reg0[[12]] = 0; > > - ct_lb_mark /* default (use --ct to customize) */ { > > - output("lsp2"); > > - }; > > - }; > > -}; > > +output("lsp2"); > > ]) > > > > # UDP packets still go to conntrack. > > @@ -3201,18 +3193,10 @@ for direction in from to; do > > done > > ovn-nbctl --wait=sb sync > > > > -# TCP packets should go to conntrack for load balancing. > > +# TCP packets should not go to conntrack for load balancing. > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > [0], [dnl > > -ct_lb_mark { > > - ct_lb_mark { > > - reg0[[6]] = 0; > > - reg0[[12]] = 0; > > - ct_lb_mark /* default (use --ct to customize) */ { > > - output("lsp2"); > > - }; > > - }; > > -}; > > +output("lsp2"); > > ]) > > > > # UDP packets still go to conntrack. > > @@ -4026,14 +4010,15 @@ check_stateful_flows() { > > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == > "sw0-lr0"), action=(next;) > > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == > 1), action=(next;) > > ]) > > > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed > 's/table=./table=?/'], [0], [dnl > > table=? (ls_in_pre_stateful ), priority=0 , match=(1), > action=(next;) > > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), > action=(ct_next;) > > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > ct_lb_mark;) > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; > ct_lb_mark;) > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; > reg2[[0..15]] = 80; ct_lb_mark;) > > ]) > > > > AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed > 's/table=../table=??/'], [0], [dnl > > @@ -4049,12 +4034,13 @@ check_stateful_flows() { > > ]) > > > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), > action=(next;) > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == > $svc_monitor_mac), action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == > "sw0-lr0"), action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), > action=(next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == > $svc_monitor_mac), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == > "sw0-lr0"), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == > 1), action=(next;) > > ]) > > > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > > @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed > 's/table=./table=?/'], [0], > > table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == > "sw0-lr0"), action=(next;) > > table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == > 1), action=(next;) > > ]) > > > > AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed > 's/table=./table=?/'], [0], [dnl > > @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | > sed 's/table=../table=??/'], [ > > ]) > > > > AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=0 , match=(1), > action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == > $svc_monitor_mac), action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == > "sw0-lr0"), action=(next;) > > - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=0 , match=(1), > action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), > action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == > $svc_monitor_mac), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == > "sw0-lr0"), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || > nd_ra || mldv1 || mldv2), action=(next;) > > + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == > 1), action=(next;) > > ]) > > > > AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl > > @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && > !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), > action=(next;) > > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && > !ct.rel && ip4 && reg0 == 66.66.66.66), > action=(ct_lb_mark(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > table=12(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > ct_lb_mark(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && > !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), > action=(next;) > > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && > !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb;) > > table=12(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb;) > > @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=7 (lr_in_dnat ), priority=110 , match=(ct.est && > !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), > action=(next;) > > table=7 (lr_in_dnat ), priority=110 , match=(ct.new && > !ct.rel && ip4 && reg0 == 66.66.66.66), > action=(ct_lb_mark(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > table=12(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > ct_lb_mark(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > diff --git a/tests/ovn.at b/tests/ovn.at > > index f3bd53242..7abded46c 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( > > [ovn-sbctl dump-flows > sbflows > > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed > 's/table=..//'], 0, > > [dnl > > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && > tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= > 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > > ]) > > > > @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > > AT_CHECK( > > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep > priority=120 |\ > > sed 's/table=../table=??/'], [0], [dnl > > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > ct_lb_mark;) > > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > table=??(ls_in_lb ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > > ]) > > > > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > > index b99578b9e..4bc9fb84f 100644 > > --- a/tests/system-ovn.at > > +++ b/tests/system-ovn.at > > @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], > [ignore], [ignore]) > > > > AT_CLEANUP > > ]) > > + > > +# for packets that match stateless ACL flows, make sure we bypass > > +# connection tracking, even with a LB in the switch. Testing for > > +# TCP should suffice. For v4 and v6. > > +# > > +OVN_FOR_EACH_NORTHD([ > > +AT_SETUP([omit connection tracking for stateless flows v4]) > > + > > +CHECK_CONNTRACK() > > +ovn_start > > +OVS_TRAFFIC_VSWITCHD_START() > > +ADD_BR([br-int]) > > + > > +# Set external-ids in br-int needed for ovn-controller > > +ovs-vsctl \ > > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true > > + > > +# Start ovn-controller > > +start_daemon ovn-controller > > + > > +# Logical network: > > +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), > > +# > > +# foo -- R1 -- bar > > + > > +ovn-nbctl lr-add R1 > > + > > +ovn-nbctl ls-add foo > > +ovn-nbctl ls-add bar > > + > > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 > > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 > > + > > +# Connect foo to R1 > > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > > + type=router options:router-port=foo \ > > + -- lsp-set-addresses rp-foo router > > + > > +# Connect bar to R1 > > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > > + type=router options:router-port=bar \ > > + -- lsp-set-addresses rp-bar router > > + > > +# Logical port 'foo1' in switch 'foo'. > > +ADD_NAMESPACES(foo1) > > +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > > + "192.168.1.1") > > +ovn-nbctl lsp-add foo foo1 \ > > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" > > + > > +# Logical port 'bar1' in switch 'bar'. > > +ADD_NAMESPACES(bar1) > > +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ > > + "192.168.2.1") > > +ovn-nbctl lsp-add bar bar1 \ > > +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" > > + > > +# Config OVN load-balancer with a VIP. > > +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp > > +ovn-nbctl ls-lb-add foo lb1 > > + > > +# Wait for ovn-controller to catch up. > > +ovn-nbctl --wait=hv sync > > + > > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > > +grep 'nat(dst=192.168.2.2:80)']) > > + > > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d > ' ' -f2) > > + > > +OVS_START_L7([bar1], [http]) > > + > > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(192.168.1.2) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# now check wirh VIP > > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(30.30.30.30) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# remove lb > > +ovn-nbctl ls-lb-del foo lb1 > > + > > +# add stateless acl > > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > > + > > +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has no tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(192.168.1.2) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# add lb back > > +ovn-nbctl ls-lb-add foo lb1 > > + > > +# Wait for ovn-controller to catch up. > > +ovn-nbctl --wait=hv sync > > + > > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > > +grep 'nat(dst=192.168.2.2:80)']) > > + > > +# should not dnat so will not be able to connect > > +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], > [ignore], [ignore]) > > + > > +# check conntrack zone has no tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(30.30.30.30) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > + > > +as ovn-sb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as ovn-nb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as northd > > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > > + > > +as > > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > > +/connection dropped.*/d"]) > > +AT_CLEANUP > > +]) > > + > > +OVN_FOR_EACH_NORTHD([ > > +AT_SETUP([omit connection tracking for stateless flows v6]) > > + > > +CHECK_CONNTRACK() > > +ovn_start > > +OVS_TRAFFIC_VSWITCHD_START() > > +ADD_BR([br-int]) > > + > > +# Set external-ids in br-int needed for ovn-controller > > +ovs-vsctl \ > > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true > > + > > +# Start ovn-controller > > +start_daemon ovn-controller > > + > > +# Logical network: > > +# One LR - R1 with switchess foo (fd11::/64) and > > +# bar (fd12::/64) connected to it > > +# > > +# foo -- R1 -- bar > > + > > +ovn-nbctl lr-add R1 > > + > > +ovn-nbctl ls-add foo > > +ovn-nbctl ls-add bar > > + > > +# Connect foo to R1 > > +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 > > +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ > > + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" > > + > > +# Connect bar to R1 > > +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 > > +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ > > + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" > > + > > +# Logical port 'foo1' in switch 'foo'. > > +ADD_NAMESPACES(foo1) > > +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ > > + "fd11::1") > > +ovn-nbctl lsp-add foo foo1 \ > > +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" > > + > > +# Logical port 'bar1' in switch 'bar'. > > +ADD_NAMESPACES(bar1) > > +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ > > +"fd12::1") > > +ovn-nbctl lsp-add bar bar1 \ > > +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" > > + > > +# Config OVN load-balancer with a VIP. > > +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp > > +ovn-nbctl ls-lb-add foo lb1 > > + > > +# Wait for ovn-controller to catch up. > > +ovn-nbctl --wait=hv sync > > + > > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > > +grep 'nat(dst=\[[fd12::2\]]:80)']) > > + > > +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d > ' ' -f2) > > + > > +OVS_START_L7([bar1], [http6]) > > + > > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# now check wirh VIP > > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# remove lb > > +ovn-nbctl ls-lb-del foo lb1 > > + > > +# add stateless acl > > +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless > > +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless > > + > > +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], > [ignore], [ignore]) > > + > > +# check conntrack zone has no tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(fd12::2) | grep -v fe80 | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +# add lb back > > +ovn-nbctl ls-lb-add foo lb1 > > + > > +# Wait for ovn-controller to catch up. > > +ovn-nbctl --wait=hv sync > > + > > +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ > > +grep 'nat(dst=\[[fd12::2\]]:80)']) > > + > > +# should not dnat so will not be able to connect > > +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], > [ignore], [ignore]) > > +# > > +# check conntrack zone has no tcp entry > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ > > +FORMAT_CT(fd30::2) | grep -v fe80 | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > + > > +as ovn-sb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as ovn-nb > > +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > > + > > +as northd > > +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > > + > > +as > > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > > +/connection dropped.*/d"]) > > +AT_CLEANUP > > +]) > > -- > > 2.17.1 > > > > Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that > Numan's comment was addressed. So, applied to main branch. Sorry I didn't get the chance to review it. Thanks for applying. Numan > > Han > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
Thanks, much, Han for applying the patch! Numan, i'd appreciate it if you could still have a look at the changes, so that if i can follow up, in case i missed anything. thanks, -venu
Hi all, We recently got an internal request to backport this patch to branch-22.12. It's a bug fix so in theory it should be OK to backport it all the way down to 22.03. The patches apply with minor conflicts that I can easily resolve. If nobody opposes this, I plan to do the backport early next week. Regards, Dumitru On 12/19/22 05:04, Venugopal Iyer via dev wrote: > Thanks, much, Han for applying the patch! Numan, i'd appreciate it if you could still > have a look at the changes, so that if i can follow up, in case i missed anything. > > thanks, > > -venu > > ________________________________________ > From: Numan Siddique <numans@ovn.org> > Sent: Friday, December 16, 2022 1:07 PM > To: Han Zhou > Cc: Venugopal Iyer; dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH ovn v2] northd: bypass connection tracking for stateless flows when there are LB flows present > > External email: Use caution opening links or attachments > > > On Thu, Dec 15, 2022 at 4:38 PM Han Zhou <hzhou@ovn.org> wrote: >> >> On Mon, Dec 12, 2022 at 1:28 AM venu iyer <venugopali@nvidia.com> wrote: >>> >>> Currently, even stateless flows are subject to connection tracking when >> there are >>> LB rules (for DNAT). However, if a flow needs to be subjected to LB, then >> it shouldn't >>> be configured as stateless. >>> >>> Stateless flow means we should not track it, and this change exempts >> stateless >>> flows from being tracked regardless of whether LB rules are present or >> not. >>> >>> Signed-off-by: venu iyer <venugopali@nvidia.com> >>> Acked-by: Han Zhou <hzhou@ovn.org> >>> --- >>> northd/northd.c | 25 +++- >>> northd/ovn-northd.8.xml | 57 ++++---- >>> ovn-nb.xml | 3 + >>> tests/ovn-northd.at | 76 +++++------ >>> tests/ovn.at | 4 +- >>> tests/system-ovn.at | 296 ++++++++++++++++++++++++++++++++++++++++ >>> 6 files changed, 383 insertions(+), 78 deletions(-) >>> >>> diff --git a/northd/northd.c b/northd/northd.c >>> index 7c48bb3b4..5d8ef612f 100644 >>> --- a/northd/northd.c >>> +++ b/northd/northd.c >>> @@ -140,8 +140,8 @@ enum ovn_stage { >>> PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") >> \ >>> >> \ >>> /* Logical switch egress stages. */ >> \ >>> - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") >> \ >>> - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") >> \ >>> + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") >> \ >>> + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") >> \ >>> PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") >> \ >>> PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") >> \ >>> PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") >> \ >>> @@ -215,6 +215,7 @@ enum ovn_stage { >>> #define REGBIT_ACL_LABEL "reg0[13]" >>> #define REGBIT_FROM_RAMP "reg0[14]" >>> #define REGBIT_PORT_SEC_DROP "reg0[15]" >>> +#define REGBIT_ACL_STATELESS "reg0[16]" >>> >>> #define REG_ORIG_DIP_IPV4 "reg1" >>> #define REG_ORIG_DIP_IPV6 "xxreg1" >>> @@ -290,7 +291,7 @@ enum ovn_stage { >>> * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | >> | >>> * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | >> | >>> * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | >> | >>> - * | | REGBIT_ACL_LABEL | X | >> | >>> + * | | REGBIT_ACL_{LABEL/STATELESS} | X | >> | >>> * +----+----------------------------------------------+ X | >> | >>> * | R5 | UNUSED | X | >> LB_L2_AFF_BACKEND_IP6 | >>> * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | >> | >>> @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, >>> const struct nbrec_acl *acl, >>> struct hmap *lflows) >>> { >>> + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; >>> if (!strcmp(acl->direction, "from-lport")) { >>> ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, >>> acl->priority + OVN_ACL_PRI_OFFSET, >>> acl->match, >>> - "next;", >>> + action, >>> &acl->header_); >>> } else { >>> ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, >>> acl->priority + OVN_ACL_PRI_OFFSET, >>> acl->match, >>> - "next;", >>> + action, >>> &acl->header_); >>> } >>> } >>> @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const >> struct hmap *port_groups, >>> REGBIT_CONNTRACK_DEFRAG" = 1; next;"); >>> ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", >>> REGBIT_CONNTRACK_DEFRAG" = 1; next;"); >>> + } else if (od->has_lb_vip) { >>> + /* We'll build stateless filters if there are LB rules so that >>> + * the stateless flows are not tracked in pre-lb. */ >>> + build_stateless_filters(od, port_groups, lflows); >>> } >>> } >>> >>> @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct >> shash *meter_groups, >>> 110, lflows); >>> } >>> >>> + /* Do not sent statless flows via conntrack */ >>> + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, >>> + REGBIT_ACL_STATELESS" == 1", "next;"); >>> + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, >>> + REGBIT_ACL_STATELESS" == 1", "next;"); >>> + >>> /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send >>> * packet to conntrack for defragmentation and possibly for >> unNATting. >>> * >>> @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, >> struct ovn_northd_lb *lb, >>> } >>> ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : >> "ct_lb"); >>> >>> - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); >>> + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", >>> + ip_match, lb_vip->vip_str); >>> if (lb_vip->port_str) { >>> ds_put_format(match, " && %s.dst == %s", proto, >> lb_vip->port_str); >>> } >>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml >>> index dffbba96d..ce5603169 100644 >>> --- a/northd/ovn-northd.8.xml >>> +++ b/northd/ovn-northd.8.xml >>> @@ -474,7 +474,9 @@ >>> priority-110 flow is added to skip over stateful ACLs. Multicast, >> IPv6 >>> Neighbor Discovery and MLD traffic also skips stateful ACLs. For >>> "allow-stateless" ACLs, a flow is added to bypass setting the hint >> for >>> - connection tracker processing. >>> + connection tracker processing when there are stateful ACLs or LB >> rules; >>> + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching >> stateless >>> + ACL flows. >>> </p> >>> >>> <p> >>> @@ -494,8 +496,10 @@ >>> in ingress table <code>LB</code> and <code>Stateful</code>. It >> contains >>> a priority-0 flow that simply moves traffic to the next table. >> Moreover >>> it contains two priority-110 flows to move multicast, IPv6 Neighbor >>> - Discovery and MLD traffic to the next table. If load balancing >> rules with >>> - virtual IP addresses (and ports) are configured in >>> + Discovery and MLD traffic to the next table. It also contains two >>> + priority-110 flows to move stateless traffic, i.e traffic for which >>> + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If >> load >>> + balancing rules with virtual IP addresses (and ports) are >> configured in >>> <code>OVN_Northbound</code> database for a logical switch >> datapath, a >>> priority-100 flow is added with the match <code>ip</code> to match >> on IP >>> packets and sets the action <code>reg0[2] = 1; next;</code> to act >> as a >>> @@ -1973,19 +1977,11 @@ output; >>> </li> >>> </ul> >>> >>> - <h3>Egress Table 0: Pre-LB</h3> >>> + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> >>> >>> <p> >>> - This table is similar to ingress table <code>Pre-LB</code>. It >>> - contains a priority-0 flow that simply moves traffic to the next >> table. >>> - Moreover it contains two priority-110 flows to move multicast, IPv6 >>> - Neighbor Discovery and MLD traffic to the next table. If any load >>> - balancing rules exist for the datapath, a priority-100 flow is >> added with >>> - a match of <code>ip</code> and action of <code>reg0[2] = 1; >> next;</code> >>> - to act as a hint for table <code>Pre-stateful</code> to send IP >> packets >>> - to the connection tracker for packet de-fragmentation and possibly >> DNAT >>> - the destination VIP to one of the selected backend for already >> committed >>> - load balanced traffic. >>> + This is similar to ingress table <code>Pre-ACLs</code> except for >>> + <code>to-lport</code> traffic. >>> </p> >>> >>> <p> >>> @@ -1998,11 +1994,29 @@ output; >>> db="OVN_Northbound"/> table. >>> </p> >>> >>> - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> >>> + <p> >>> + This table also has a priority-110 flow with the match >>> + <code>outport == <var>I</var></code> for all logical switch >>> + datapaths to move traffic to the next table. Where <var>I</var> >>> + is the peer of a logical router port. This flow is added to >>> + skip the connection tracking of packets which will be entering >>> + logical router datapath from logical switch datapath for routing. >>> + </p> >>> + >>> + >>> + <h3>Egress Table 1: Pre-LB</h3> >>> >>> <p> >>> - This is similar to ingress table <code>Pre-ACLs</code> except for >>> - <code>to-lport</code> traffic. >>> + This table is similar to ingress table <code>Pre-LB</code>. It >>> + contains a priority-0 flow that simply moves traffic to the next >> table. >>> + Moreover it contains two priority-110 flows to move multicast, IPv6 >>> + Neighbor Discovery and MLD traffic to the next table. If any load >>> + balancing rules exist for the datapath, a priority-100 flow is >> added with >>> + a match of <code>ip</code> and action of <code>reg0[2] = 1; >> next;</code> >>> + to act as a hint for table <code>Pre-stateful</code> to send IP >> packets >>> + to the connection tracker for packet de-fragmentation and possibly >> DNAT >>> + the destination VIP to one of the selected backend for already >> committed >>> + load balanced traffic. >>> </p> >>> >>> <p> >>> @@ -2015,15 +2029,6 @@ output; >>> db="OVN_Northbound"/> table. >>> </p> >>> >>> - <p> >>> - This table also has a priority-110 flow with the match >>> - <code>outport == <var>I</var></code> for all logical switch >>> - datapaths to move traffic to the next table. Where <var>I</var> >>> - is the peer of a logical router port. This flow is added to >>> - skip the connection tracking of packets which will be entering >>> - logical router datapath from logical switch datapath for routing. >>> - </p> >>> - >>> <h3>Egress Table 2: Pre-stateful</h3> >>> >>> <p> >>> diff --git a/ovn-nb.xml b/ovn-nb.xml >>> index 0edc3da96..3ac7785e1 100644 >>> --- a/ovn-nb.xml >>> +++ b/ovn-nb.xml >>> @@ -2159,6 +2159,9 @@ or >>> outgoing TCP traffic directed to an IP address, then you >> probably >>> also want to define another rule to allow incoming TCP traffic >> coming >>> from this same IP address. >>> + In addition, traffic that matches stateless ACLs will bypass >>> + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be >>> + used instead if the traffic is supposed to be load-balanced. >>> </li> >>> >>> <li> >>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at >>> index ca4263eac..a2f8e8a20 100644 >>> --- a/tests/ovn-northd.at >>> +++ b/tests/ovn-northd.at >>> @@ -2024,7 +2024,7 @@ AT_CLEANUP >>> >>> # This test case tests that when a logical switch has load balancers >> associated >>> # (with VIPs configured), the below logical flow is added by ovn-northd. >>> -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[0]] = 1; next;) >>> +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[0]] = 1; next;) >>> # This test case is added for the BZ - >>> # https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugzilla.redhat.com%2Fshow_bug.cgi%3Fid%3D1849162&data=05%7C01%7Cvenugopali%40nvidia.com%7C4803d2da0a014ad9807308dadfa9a430%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638068216964747698%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=%2B8ytjZQ7t4MrfczKVEJjbgFwsLqgIVIpKjD6%2BTkYKjo%3D&reserved=0 >>> # >>> @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 >>> check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl ls-lb-add sw0 lb2 >>> check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl clear load_balancer $lb1 vips >>> check ovn-nbctl clear load_balancer $lb3 vips >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl clear load_balancer $lb2 vips >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl clear load_balancer $lb4 vips >>> @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 >> vips:"10.0.0.13"="10.0.0.6" >>> >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> # Now reverse the order of clearing the vip. >>> @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips >>> check ovn-nbctl clear load_balancer $lb4 vips >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl clear load_balancer $lb1 vips >>> check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >> grep reg0 | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> ]) >>> >>> check ovn-nbctl clear load_balancer $lb3 vips >>> @@ -3057,18 +3057,10 @@ for direction in from to; do >>> done >>> ovn-nbctl --wait=sb sync >>> >>> -# TCP packets should go to conntrack for load balancing. >>> +# TCP packets should not go to conntrack for load balancing. >>> flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" >>> AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], >> [0], [dnl >>> -ct_lb_mark { >>> - ct_lb_mark { >>> - reg0[[6]] = 0; >>> - reg0[[12]] = 0; >>> - ct_lb_mark /* default (use --ct to customize) */ { >>> - output("lsp2"); >>> - }; >>> - }; >>> -}; >>> +output("lsp2"); >>> ]) >>> >>> # UDP packets still go to conntrack. >>> @@ -3201,18 +3193,10 @@ for direction in from to; do >>> done >>> ovn-nbctl --wait=sb sync >>> >>> -# TCP packets should go to conntrack for load balancing. >>> +# TCP packets should not go to conntrack for load balancing. >>> flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" >>> AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], >> [0], [dnl >>> -ct_lb_mark { >>> - ct_lb_mark { >>> - reg0[[6]] = 0; >>> - reg0[[12]] = 0; >>> - ct_lb_mark /* default (use --ct to customize) */ { >>> - output("lsp2"); >>> - }; >>> - }; >>> -}; >>> +output("lsp2"); >>> ]) >>> >>> # UDP packets still go to conntrack. >>> @@ -4026,14 +4010,15 @@ check_stateful_flows() { >>> table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == >> "sw0-lr0"), action=(next;) >>> table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == >> 1), action=(next;) >>> ]) >>> >>> AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed >> 's/table=./table=?/'], [0], [dnl >>> table=? (ls_in_pre_stateful ), priority=0 , match=(1), >> action=(next;) >>> table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), >> action=(ct_next;) >>> table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb_mark;) >>> - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; >> ct_lb_mark;) >>> - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; >> ct_lb_mark;) >>> + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >> reg2[[0..15]] = 80; ct_lb_mark;) >>> + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; >> reg2[[0..15]] = 80; ct_lb_mark;) >>> ]) >>> >>> AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed >> 's/table=../table=??/'], [0], [dnl >>> @@ -4049,12 +4034,13 @@ check_stateful_flows() { >>> ]) >>> >>> AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=0 , match=(1), >> action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == >> $svc_monitor_mac), action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >> "sw0-lr0"), action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=0 , match=(1), >> action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >> action=(reg0[[2]] = 1; next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == >> $svc_monitor_mac), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >> "sw0-lr0"), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == >> 1), action=(next;) >>> ]) >>> >>> AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl >>> @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed >> 's/table=./table=?/'], [0], >>> table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == >> "sw0-lr0"), action=(next;) >>> table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == >> 1), action=(next;) >>> ]) >>> >>> AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed >> 's/table=./table=?/'], [0], [dnl >>> @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | >> sed 's/table=../table=??/'], [ >>> ]) >>> >>> AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl >>> - table=0 (ls_out_pre_lb ), priority=0 , match=(1), >> action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == >> $svc_monitor_mac), action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >> "sw0-lr0"), action=(next;) >>> - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=0 , match=(1), >> action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >> action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == >> $svc_monitor_mac), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >> "sw0-lr0"), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >> nd_ra || mldv1 || mldv2), action=(next;) >>> + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == >> 1), action=(next;) >>> ]) >>> >>> AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl >>> @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), >> action=(next;) >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >> !ct.rel && ip4 && reg0 == 66.66.66.66), >> action=(ct_lb_mark(backends=42.42.42.2);) >>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb_mark;) >>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; >> ct_lb_mark(backends=42.42.42.2);) >>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb_mark;) >>> @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), >> action=(next;) >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >> !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) >>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) >>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) >>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb;) >>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) >>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb;) >>> @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync >>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), >> action=(next;) >>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >> !ct.rel && ip4 && reg0 == 66.66.66.66), >> action=(ct_lb_mark(backends=42.42.42.2);) >>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb_mark;) >>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; >> ct_lb_mark(backends=42.42.42.2);) >>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >> action=(ct_lb_mark;) >>> diff --git a/tests/ovn.at b/tests/ovn.at >>> index f3bd53242..7abded46c 100644 >>> --- a/tests/ovn.at >>> +++ b/tests/ovn.at >>> @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( >>> [ovn-sbctl dump-flows > sbflows >>> ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed >> 's/table=..//'], 0, >>> [dnl >>> - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && >> tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) >>> + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && >> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >> reg2[[0..15]] = 80; ct_lb_mark;) >>> (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == >> 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= >> 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) >>> ]) >>> >>> @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 >>> AT_CHECK( >>> [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep >> priority=120 |\ >>> sed 's/table=../table=??/'], [0], [dnl >>> - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >> 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; >> ct_lb_mark;) >>> + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >> && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >> reg2[[0..15]] = 80; ct_lb_mark;) >>> table=??(ls_in_lb ), priority=120 , match=(ct.new && >> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) >>> ]) >>> >>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at >>> index b99578b9e..4bc9fb84f 100644 >>> --- a/tests/system-ovn.at >>> +++ b/tests/system-ovn.at >>> @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], >> [ignore], [ignore]) >>> >>> AT_CLEANUP >>> ]) >>> + >>> +# for packets that match stateless ACL flows, make sure we bypass >>> +# connection tracking, even with a LB in the switch. Testing for >>> +# TCP should suffice. For v4 and v6. >>> +# >>> +OVN_FOR_EACH_NORTHD([ >>> +AT_SETUP([omit connection tracking for stateless flows v4]) >>> + >>> +CHECK_CONNTRACK() >>> +ovn_start >>> +OVS_TRAFFIC_VSWITCHD_START() >>> +ADD_BR([br-int]) >>> + >>> +# Set external-ids in br-int needed for ovn-controller >>> +ovs-vsctl \ >>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>> + -- set Open_vSwitch . >> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>> + -- set bridge br-int fail-mode=secure >> other-config:disable-in-band=true >>> + >>> +# Start ovn-controller >>> +start_daemon ovn-controller >>> + >>> +# Logical network: >>> +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), >>> +# >>> +# foo -- R1 -- bar >>> + >>> +ovn-nbctl lr-add R1 >>> + >>> +ovn-nbctl ls-add foo >>> +ovn-nbctl ls-add bar >>> + >>> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 >>> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 >>> + >>> +# Connect foo to R1 >>> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ >>> + type=router options:router-port=foo \ >>> + -- lsp-set-addresses rp-foo router >>> + >>> +# Connect bar to R1 >>> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ >>> + type=router options:router-port=bar \ >>> + -- lsp-set-addresses rp-bar router >>> + >>> +# Logical port 'foo1' in switch 'foo'. >>> +ADD_NAMESPACES(foo1) >>> +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ >>> + "192.168.1.1") >>> +ovn-nbctl lsp-add foo foo1 \ >>> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" >>> + >>> +# Logical port 'bar1' in switch 'bar'. >>> +ADD_NAMESPACES(bar1) >>> +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ >>> + "192.168.2.1") >>> +ovn-nbctl lsp-add bar bar1 \ >>> +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" >>> + >>> +# Config OVN load-balancer with a VIP. >>> +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp >>> +ovn-nbctl ls-lb-add foo lb1 >>> + >>> +# Wait for ovn-controller to catch up. >>> +ovn-nbctl --wait=hv sync >>> + >>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>> +grep 'nat(dst=192.168.2.2:80)']) >>> + >>> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d >> ' ' -f2) >>> + >>> +OVS_START_L7([bar1], [http]) >>> + >>> +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(192.168.1.2) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> >> +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# now check wirh VIP >>> +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(30.30.30.30) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> >> +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# remove lb >>> +ovn-nbctl ls-lb-del foo lb1 >>> + >>> +# add stateless acl >>> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless >>> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless >>> + >>> +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has no tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(192.168.1.2) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# add lb back >>> +ovn-nbctl ls-lb-add foo lb1 >>> + >>> +# Wait for ovn-controller to catch up. >>> +ovn-nbctl --wait=hv sync >>> + >>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>> +grep 'nat(dst=192.168.2.2:80)']) >>> + >>> +# should not dnat so will not be able to connect >>> +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has no tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(30.30.30.30) | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +OVS_APP_EXIT_AND_WAIT([ovn-controller]) >>> + >>> +as ovn-sb >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>> + >>> +as ovn-nb >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>> + >>> +as northd >>> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) >>> + >>> +as >>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d >>> +/connection dropped.*/d"]) >>> +AT_CLEANUP >>> +]) >>> + >>> +OVN_FOR_EACH_NORTHD([ >>> +AT_SETUP([omit connection tracking for stateless flows v6]) >>> + >>> +CHECK_CONNTRACK() >>> +ovn_start >>> +OVS_TRAFFIC_VSWITCHD_START() >>> +ADD_BR([br-int]) >>> + >>> +# Set external-ids in br-int needed for ovn-controller >>> +ovs-vsctl \ >>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>> + -- set Open_vSwitch . >> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>> + -- set bridge br-int fail-mode=secure >> other-config:disable-in-band=true >>> + >>> +# Start ovn-controller >>> +start_daemon ovn-controller >>> + >>> +# Logical network: >>> +# One LR - R1 with switchess foo (fd11::/64) and >>> +# bar (fd12::/64) connected to it >>> +# >>> +# foo -- R1 -- bar >>> + >>> +ovn-nbctl lr-add R1 >>> + >>> +ovn-nbctl ls-add foo >>> +ovn-nbctl ls-add bar >>> + >>> +# Connect foo to R1 >>> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 >>> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ >>> + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" >>> + >>> +# Connect bar to R1 >>> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 >>> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ >>> + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" >>> + >>> +# Logical port 'foo1' in switch 'foo'. >>> +ADD_NAMESPACES(foo1) >>> +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ >>> + "fd11::1") >>> +ovn-nbctl lsp-add foo foo1 \ >>> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" >>> + >>> +# Logical port 'bar1' in switch 'bar'. >>> +ADD_NAMESPACES(bar1) >>> +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ >>> +"fd12::1") >>> +ovn-nbctl lsp-add bar bar1 \ >>> +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" >>> + >>> +# Config OVN load-balancer with a VIP. >>> +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp >>> +ovn-nbctl ls-lb-add foo lb1 >>> + >>> +# Wait for ovn-controller to catch up. >>> +ovn-nbctl --wait=hv sync >>> + >>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>> +grep 'nat(dst=\[[fd12::2\]]:80)']) >>> + >>> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d >> ' ' -f2) >>> + >>> +OVS_START_L7([bar1], [http6]) >>> + >>> +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(fd12::2) | grep -v fe80 | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> >> +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# now check wirh VIP >>> +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(fd30::2) | grep -v fe80 | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> >> +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# remove lb >>> +ovn-nbctl ls-lb-del foo lb1 >>> + >>> +# add stateless acl >>> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless >>> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless >>> + >>> +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], >> [ignore], [ignore]) >>> + >>> +# check conntrack zone has no tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(fd12::2) | grep -v fe80 | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +# add lb back >>> +ovn-nbctl ls-lb-add foo lb1 >>> + >>> +# Wait for ovn-controller to catch up. >>> +ovn-nbctl --wait=hv sync >>> + >>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>> +grep 'nat(dst=\[[fd12::2\]]:80)']) >>> + >>> +# should not dnat so will not be able to connect >>> +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], >> [ignore], [ignore]) >>> +# >>> +# check conntrack zone has no tcp entry >>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>> +FORMAT_CT(fd30::2) | grep -v fe80 | \ >>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>> +]) >>> + >>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>> + >>> +OVS_APP_EXIT_AND_WAIT([ovn-controller]) >>> + >>> +as ovn-sb >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>> + >>> +as ovn-nb >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>> + >>> +as northd >>> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) >>> + >>> +as >>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d >>> +/connection dropped.*/d"]) >>> +AT_CLEANUP >>> +]) >>> -- >>> 2.17.1 >>> >> >> Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that >> Numan's comment was addressed. So, applied to main branch. > > Sorry I didn't get the chance to review it. Thanks for applying. > > Numan > >> >> Han >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=05%7C01%7Cvenugopali%40nvidia.com%7C4803d2da0a014ad9807308dadfa9a430%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638068216964747698%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=UpsUNrqrFFU0deEwfes%2FewdJWVjYiTb1mvGJ2jSflZc%3D&reserved=0 >> > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On 6/15/23 12:29, Dumitru Ceara wrote: > Hi all, > > We recently got an internal request to backport this patch to > branch-22.12. It's a bug fix so in theory it should be OK to backport > it all the way down to 22.03. The patches apply with minor conflicts > that I can easily resolve. > > If nobody opposes this, I plan to do the backport early next week. > I actually went ahead and did this today, there were no objections here or during the community meeting (IRC) yesterday. I backported this (and its follow up fix eacd7dcd83f6 ("tests: Fix Flaky system-tests "omit connection tracking ..."") to branches 22.12, 22.09, 22.06 and 22.03 LTS. Regards, Dumitru > Regards, > Dumitru > > On 12/19/22 05:04, Venugopal Iyer via dev wrote: >> Thanks, much, Han for applying the patch! Numan, i'd appreciate it if you could still >> have a look at the changes, so that if i can follow up, in case i missed anything. >> >> thanks, >> >> -venu >> >> ________________________________________ >> From: Numan Siddique <numans@ovn.org> >> Sent: Friday, December 16, 2022 1:07 PM >> To: Han Zhou >> Cc: Venugopal Iyer; dev@openvswitch.org >> Subject: Re: [ovs-dev] [PATCH ovn v2] northd: bypass connection tracking for stateless flows when there are LB flows present >> >> External email: Use caution opening links or attachments >> >> >> On Thu, Dec 15, 2022 at 4:38 PM Han Zhou <hzhou@ovn.org> wrote: >>> >>> On Mon, Dec 12, 2022 at 1:28 AM venu iyer <venugopali@nvidia.com> wrote: >>>> >>>> Currently, even stateless flows are subject to connection tracking when >>> there are >>>> LB rules (for DNAT). However, if a flow needs to be subjected to LB, then >>> it shouldn't >>>> be configured as stateless. >>>> >>>> Stateless flow means we should not track it, and this change exempts >>> stateless >>>> flows from being tracked regardless of whether LB rules are present or >>> not. >>>> >>>> Signed-off-by: venu iyer <venugopali@nvidia.com> >>>> Acked-by: Han Zhou <hzhou@ovn.org> >>>> --- >>>> northd/northd.c | 25 +++- >>>> northd/ovn-northd.8.xml | 57 ++++---- >>>> ovn-nb.xml | 3 + >>>> tests/ovn-northd.at | 76 +++++------ >>>> tests/ovn.at | 4 +- >>>> tests/system-ovn.at | 296 ++++++++++++++++++++++++++++++++++++++++ >>>> 6 files changed, 383 insertions(+), 78 deletions(-) >>>> >>>> diff --git a/northd/northd.c b/northd/northd.c >>>> index 7c48bb3b4..5d8ef612f 100644 >>>> --- a/northd/northd.c >>>> +++ b/northd/northd.c >>>> @@ -140,8 +140,8 @@ enum ovn_stage { >>>> PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") >>> \ >>>> >>> \ >>>> /* Logical switch egress stages. */ >>> \ >>>> - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") >>> \ >>>> - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") >>> \ >>>> + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") >>> \ >>>> + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") >>> \ >>>> PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") >>> \ >>>> PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") >>> \ >>>> PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") >>> \ >>>> @@ -215,6 +215,7 @@ enum ovn_stage { >>>> #define REGBIT_ACL_LABEL "reg0[13]" >>>> #define REGBIT_FROM_RAMP "reg0[14]" >>>> #define REGBIT_PORT_SEC_DROP "reg0[15]" >>>> +#define REGBIT_ACL_STATELESS "reg0[16]" >>>> >>>> #define REG_ORIG_DIP_IPV4 "reg1" >>>> #define REG_ORIG_DIP_IPV6 "xxreg1" >>>> @@ -290,7 +291,7 @@ enum ovn_stage { >>>> * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | >>> | >>>> * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | >>> | >>>> * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | >>> | >>>> - * | | REGBIT_ACL_LABEL | X | >>> | >>>> + * | | REGBIT_ACL_{LABEL/STATELESS} | X | >>> | >>>> * +----+----------------------------------------------+ X | >>> | >>>> * | R5 | UNUSED | X | >>> LB_L2_AFF_BACKEND_IP6 | >>>> * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | >>> | >>>> @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, >>>> const struct nbrec_acl *acl, >>>> struct hmap *lflows) >>>> { >>>> + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; >>>> if (!strcmp(acl->direction, "from-lport")) { >>>> ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, >>>> acl->priority + OVN_ACL_PRI_OFFSET, >>>> acl->match, >>>> - "next;", >>>> + action, >>>> &acl->header_); >>>> } else { >>>> ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, >>>> acl->priority + OVN_ACL_PRI_OFFSET, >>>> acl->match, >>>> - "next;", >>>> + action, >>>> &acl->header_); >>>> } >>>> } >>>> @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const >>> struct hmap *port_groups, >>>> REGBIT_CONNTRACK_DEFRAG" = 1; next;"); >>>> ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", >>>> REGBIT_CONNTRACK_DEFRAG" = 1; next;"); >>>> + } else if (od->has_lb_vip) { >>>> + /* We'll build stateless filters if there are LB rules so that >>>> + * the stateless flows are not tracked in pre-lb. */ >>>> + build_stateless_filters(od, port_groups, lflows); >>>> } >>>> } >>>> >>>> @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct >>> shash *meter_groups, >>>> 110, lflows); >>>> } >>>> >>>> + /* Do not sent statless flows via conntrack */ >>>> + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, >>>> + REGBIT_ACL_STATELESS" == 1", "next;"); >>>> + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, >>>> + REGBIT_ACL_STATELESS" == 1", "next;"); >>>> + >>>> /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send >>>> * packet to conntrack for defragmentation and possibly for >>> unNATting. >>>> * >>>> @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, >>> struct ovn_northd_lb *lb, >>>> } >>>> ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : >>> "ct_lb"); >>>> >>>> - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); >>>> + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", >>>> + ip_match, lb_vip->vip_str); >>>> if (lb_vip->port_str) { >>>> ds_put_format(match, " && %s.dst == %s", proto, >>> lb_vip->port_str); >>>> } >>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml >>>> index dffbba96d..ce5603169 100644 >>>> --- a/northd/ovn-northd.8.xml >>>> +++ b/northd/ovn-northd.8.xml >>>> @@ -474,7 +474,9 @@ >>>> priority-110 flow is added to skip over stateful ACLs. Multicast, >>> IPv6 >>>> Neighbor Discovery and MLD traffic also skips stateful ACLs. For >>>> "allow-stateless" ACLs, a flow is added to bypass setting the hint >>> for >>>> - connection tracker processing. >>>> + connection tracker processing when there are stateful ACLs or LB >>> rules; >>>> + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching >>> stateless >>>> + ACL flows. >>>> </p> >>>> >>>> <p> >>>> @@ -494,8 +496,10 @@ >>>> in ingress table <code>LB</code> and <code>Stateful</code>. It >>> contains >>>> a priority-0 flow that simply moves traffic to the next table. >>> Moreover >>>> it contains two priority-110 flows to move multicast, IPv6 Neighbor >>>> - Discovery and MLD traffic to the next table. If load balancing >>> rules with >>>> - virtual IP addresses (and ports) are configured in >>>> + Discovery and MLD traffic to the next table. It also contains two >>>> + priority-110 flows to move stateless traffic, i.e traffic for which >>>> + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If >>> load >>>> + balancing rules with virtual IP addresses (and ports) are >>> configured in >>>> <code>OVN_Northbound</code> database for a logical switch >>> datapath, a >>>> priority-100 flow is added with the match <code>ip</code> to match >>> on IP >>>> packets and sets the action <code>reg0[2] = 1; next;</code> to act >>> as a >>>> @@ -1973,19 +1977,11 @@ output; >>>> </li> >>>> </ul> >>>> >>>> - <h3>Egress Table 0: Pre-LB</h3> >>>> + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> >>>> >>>> <p> >>>> - This table is similar to ingress table <code>Pre-LB</code>. It >>>> - contains a priority-0 flow that simply moves traffic to the next >>> table. >>>> - Moreover it contains two priority-110 flows to move multicast, IPv6 >>>> - Neighbor Discovery and MLD traffic to the next table. If any load >>>> - balancing rules exist for the datapath, a priority-100 flow is >>> added with >>>> - a match of <code>ip</code> and action of <code>reg0[2] = 1; >>> next;</code> >>>> - to act as a hint for table <code>Pre-stateful</code> to send IP >>> packets >>>> - to the connection tracker for packet de-fragmentation and possibly >>> DNAT >>>> - the destination VIP to one of the selected backend for already >>> committed >>>> - load balanced traffic. >>>> + This is similar to ingress table <code>Pre-ACLs</code> except for >>>> + <code>to-lport</code> traffic. >>>> </p> >>>> >>>> <p> >>>> @@ -1998,11 +1994,29 @@ output; >>>> db="OVN_Northbound"/> table. >>>> </p> >>>> >>>> - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> >>>> + <p> >>>> + This table also has a priority-110 flow with the match >>>> + <code>outport == <var>I</var></code> for all logical switch >>>> + datapaths to move traffic to the next table. Where <var>I</var> >>>> + is the peer of a logical router port. This flow is added to >>>> + skip the connection tracking of packets which will be entering >>>> + logical router datapath from logical switch datapath for routing. >>>> + </p> >>>> + >>>> + >>>> + <h3>Egress Table 1: Pre-LB</h3> >>>> >>>> <p> >>>> - This is similar to ingress table <code>Pre-ACLs</code> except for >>>> - <code>to-lport</code> traffic. >>>> + This table is similar to ingress table <code>Pre-LB</code>. It >>>> + contains a priority-0 flow that simply moves traffic to the next >>> table. >>>> + Moreover it contains two priority-110 flows to move multicast, IPv6 >>>> + Neighbor Discovery and MLD traffic to the next table. If any load >>>> + balancing rules exist for the datapath, a priority-100 flow is >>> added with >>>> + a match of <code>ip</code> and action of <code>reg0[2] = 1; >>> next;</code> >>>> + to act as a hint for table <code>Pre-stateful</code> to send IP >>> packets >>>> + to the connection tracker for packet de-fragmentation and possibly >>> DNAT >>>> + the destination VIP to one of the selected backend for already >>> committed >>>> + load balanced traffic. >>>> </p> >>>> >>>> <p> >>>> @@ -2015,15 +2029,6 @@ output; >>>> db="OVN_Northbound"/> table. >>>> </p> >>>> >>>> - <p> >>>> - This table also has a priority-110 flow with the match >>>> - <code>outport == <var>I</var></code> for all logical switch >>>> - datapaths to move traffic to the next table. Where <var>I</var> >>>> - is the peer of a logical router port. This flow is added to >>>> - skip the connection tracking of packets which will be entering >>>> - logical router datapath from logical switch datapath for routing. >>>> - </p> >>>> - >>>> <h3>Egress Table 2: Pre-stateful</h3> >>>> >>>> <p> >>>> diff --git a/ovn-nb.xml b/ovn-nb.xml >>>> index 0edc3da96..3ac7785e1 100644 >>>> --- a/ovn-nb.xml >>>> +++ b/ovn-nb.xml >>>> @@ -2159,6 +2159,9 @@ or >>>> outgoing TCP traffic directed to an IP address, then you >>> probably >>>> also want to define another rule to allow incoming TCP traffic >>> coming >>>> from this same IP address. >>>> + In addition, traffic that matches stateless ACLs will bypass >>>> + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be >>>> + used instead if the traffic is supposed to be load-balanced. >>>> </li> >>>> >>>> <li> >>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at >>>> index ca4263eac..a2f8e8a20 100644 >>>> --- a/tests/ovn-northd.at >>>> +++ b/tests/ovn-northd.at >>>> @@ -2024,7 +2024,7 @@ AT_CLEANUP >>>> >>>> # This test case tests that when a logical switch has load balancers >>> associated >>>> # (with VIPs configured), the below logical flow is added by ovn-northd. >>>> -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[0]] = 1; next;) >>>> +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[0]] = 1; next;) >>>> # This test case is added for the BZ - >>>> # https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugzilla.redhat.com%2Fshow_bug.cgi%3Fid%3D1849162&data=05%7C01%7Cvenugopali%40nvidia.com%7C4803d2da0a014ad9807308dadfa9a430%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638068216964747698%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=%2B8ytjZQ7t4MrfczKVEJjbgFwsLqgIVIpKjD6%2BTkYKjo%3D&reserved=0 >>>> # >>>> @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 >>>> check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl ls-lb-add sw0 lb2 >>>> check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl clear load_balancer $lb1 vips >>>> check ovn-nbctl clear load_balancer $lb3 vips >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl clear load_balancer $lb2 vips >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl clear load_balancer $lb4 vips >>>> @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 >>> vips:"10.0.0.13"="10.0.0.6" >>>> >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> # Now reverse the order of clearing the vip. >>>> @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips >>>> check ovn-nbctl clear load_balancer $lb4 vips >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl clear load_balancer $lb1 vips >>>> check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | >>> grep reg0 | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> ]) >>>> >>>> check ovn-nbctl clear load_balancer $lb3 vips >>>> @@ -3057,18 +3057,10 @@ for direction in from to; do >>>> done >>>> ovn-nbctl --wait=sb sync >>>> >>>> -# TCP packets should go to conntrack for load balancing. >>>> +# TCP packets should not go to conntrack for load balancing. >>>> flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" >>>> AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], >>> [0], [dnl >>>> -ct_lb_mark { >>>> - ct_lb_mark { >>>> - reg0[[6]] = 0; >>>> - reg0[[12]] = 0; >>>> - ct_lb_mark /* default (use --ct to customize) */ { >>>> - output("lsp2"); >>>> - }; >>>> - }; >>>> -}; >>>> +output("lsp2"); >>>> ]) >>>> >>>> # UDP packets still go to conntrack. >>>> @@ -3201,18 +3193,10 @@ for direction in from to; do >>>> done >>>> ovn-nbctl --wait=sb sync >>>> >>>> -# TCP packets should go to conntrack for load balancing. >>>> +# TCP packets should not go to conntrack for load balancing. >>>> flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" >>>> AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], >>> [0], [dnl >>>> -ct_lb_mark { >>>> - ct_lb_mark { >>>> - reg0[[6]] = 0; >>>> - reg0[[12]] = 0; >>>> - ct_lb_mark /* default (use --ct to customize) */ { >>>> - output("lsp2"); >>>> - }; >>>> - }; >>>> -}; >>>> +output("lsp2"); >>>> ]) >>>> >>>> # UDP packets still go to conntrack. >>>> @@ -4026,14 +4010,15 @@ check_stateful_flows() { >>>> table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == >>> "sw0-lr0"), action=(next;) >>>> table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == >>> 1), action=(next;) >>>> ]) >>>> >>>> AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed >>> 's/table=./table=?/'], [0], [dnl >>>> table=? (ls_in_pre_stateful ), priority=0 , match=(1), >>> action=(next;) >>>> table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), >>> action=(ct_next;) >>>> table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb_mark;) >>>> - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; >>> ct_lb_mark;) >>>> - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; >>> ct_lb_mark;) >>>> + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >>> reg2[[0..15]] = 80; ct_lb_mark;) >>>> + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; >>> reg2[[0..15]] = 80; ct_lb_mark;) >>>> ]) >>>> >>>> AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed >>> 's/table=../table=??/'], [0], [dnl >>>> @@ -4049,12 +4034,13 @@ check_stateful_flows() { >>>> ]) >>>> >>>> AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=0 , match=(1), >>> action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == >>> $svc_monitor_mac), action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >>> "sw0-lr0"), action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=0 , match=(1), >>> action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), >>> action=(reg0[[2]] = 1; next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == >>> $svc_monitor_mac), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >>> "sw0-lr0"), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == >>> 1), action=(next;) >>>> ]) >>>> >>>> AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl >>>> @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed >>> 's/table=./table=?/'], [0], >>>> table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == >>> "sw0-lr0"), action=(next;) >>>> table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == >>> 1), action=(next;) >>>> ]) >>>> >>>> AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed >>> 's/table=./table=?/'], [0], [dnl >>>> @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | >>> sed 's/table=../table=??/'], [ >>>> ]) >>>> >>>> AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl >>>> - table=0 (ls_out_pre_lb ), priority=0 , match=(1), >>> action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == >>> $svc_monitor_mac), action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >>> "sw0-lr0"), action=(next;) >>>> - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=0 , match=(1), >>> action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), >>> action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == >>> $svc_monitor_mac), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == >>> "sw0-lr0"), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || >>> nd_ra || mldv1 || mldv2), action=(next;) >>>> + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == >>> 1), action=(next;) >>>> ]) >>>> >>>> AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl >>>> @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >>> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), >>> action=(next;) >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >>> !ct.rel && ip4 && reg0 == 66.66.66.66), >>> action=(ct_lb_mark(backends=42.42.42.2);) >>>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb_mark;) >>>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >>> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; >>> ct_lb_mark(backends=42.42.42.2);) >>>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb_mark;) >>>> @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >>> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), >>> action=(next;) >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >>> !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) >>>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) >>>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) >>>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb;) >>>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >>> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) >>>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb;) >>>> @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync >>>> AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.est && >>> !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), >>> action=(next;) >>>> table=7 (lr_in_dnat ), priority=110 , match=(ct.new && >>> !ct.rel && ip4 && reg0 == 66.66.66.66), >>> action=(ct_lb_mark(backends=42.42.42.2);) >>>> - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>>> + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) >>>> table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb_mark;) >>>> table=12(ls_in_lb ), priority=110 , match=(ct.new && >>> ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; >>> ct_lb_mark(backends=42.42.42.2);) >>>> table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), >>> action=(ct_lb_mark;) >>>> diff --git a/tests/ovn.at b/tests/ovn.at >>>> index f3bd53242..7abded46c 100644 >>>> --- a/tests/ovn.at >>>> +++ b/tests/ovn.at >>>> @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( >>>> [ovn-sbctl dump-flows > sbflows >>>> ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed >>> 's/table=..//'], 0, >>>> [dnl >>>> - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && >>> tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) >>>> + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && >>> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >>> reg2[[0..15]] = 80; ct_lb_mark;) >>>> (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == >>> 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= >>> 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) >>>> ]) >>>> >>>> @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 >>>> AT_CHECK( >>>> [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep >>> priority=120 |\ >>>> sed 's/table=../table=??/'], [0], [dnl >>>> - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == >>> 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; >>> ct_lb_mark;) >>>> + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 >>> && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; >>> reg2[[0..15]] = 80; ct_lb_mark;) >>>> table=??(ls_in_lb ), priority=120 , match=(ct.new && >>> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) >>>> ]) >>>> >>>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at >>>> index b99578b9e..4bc9fb84f 100644 >>>> --- a/tests/system-ovn.at >>>> +++ b/tests/system-ovn.at >>>> @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], >>> [ignore], [ignore]) >>>> >>>> AT_CLEANUP >>>> ]) >>>> + >>>> +# for packets that match stateless ACL flows, make sure we bypass >>>> +# connection tracking, even with a LB in the switch. Testing for >>>> +# TCP should suffice. For v4 and v6. >>>> +# >>>> +OVN_FOR_EACH_NORTHD([ >>>> +AT_SETUP([omit connection tracking for stateless flows v4]) >>>> + >>>> +CHECK_CONNTRACK() >>>> +ovn_start >>>> +OVS_TRAFFIC_VSWITCHD_START() >>>> +ADD_BR([br-int]) >>>> + >>>> +# Set external-ids in br-int needed for ovn-controller >>>> +ovs-vsctl \ >>>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>>> + -- set Open_vSwitch . >>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>>> + -- set bridge br-int fail-mode=secure >>> other-config:disable-in-band=true >>>> + >>>> +# Start ovn-controller >>>> +start_daemon ovn-controller >>>> + >>>> +# Logical network: >>>> +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), >>>> +# >>>> +# foo -- R1 -- bar >>>> + >>>> +ovn-nbctl lr-add R1 >>>> + >>>> +ovn-nbctl ls-add foo >>>> +ovn-nbctl ls-add bar >>>> + >>>> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 >>>> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 >>>> + >>>> +# Connect foo to R1 >>>> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ >>>> + type=router options:router-port=foo \ >>>> + -- lsp-set-addresses rp-foo router >>>> + >>>> +# Connect bar to R1 >>>> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ >>>> + type=router options:router-port=bar \ >>>> + -- lsp-set-addresses rp-bar router >>>> + >>>> +# Logical port 'foo1' in switch 'foo'. >>>> +ADD_NAMESPACES(foo1) >>>> +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ >>>> + "192.168.1.1") >>>> +ovn-nbctl lsp-add foo foo1 \ >>>> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" >>>> + >>>> +# Logical port 'bar1' in switch 'bar'. >>>> +ADD_NAMESPACES(bar1) >>>> +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ >>>> + "192.168.2.1") >>>> +ovn-nbctl lsp-add bar bar1 \ >>>> +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" >>>> + >>>> +# Config OVN load-balancer with a VIP. >>>> +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp >>>> +ovn-nbctl ls-lb-add foo lb1 >>>> + >>>> +# Wait for ovn-controller to catch up. >>>> +ovn-nbctl --wait=hv sync >>>> + >>>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>>> +grep 'nat(dst=192.168.2.2:80)']) >>>> + >>>> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d >>> ' ' -f2) >>>> + >>>> +OVS_START_L7([bar1], [http]) >>>> + >>>> +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(192.168.1.2) | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> >>> +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# now check wirh VIP >>>> +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(30.30.30.30) | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> >>> +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# remove lb >>>> +ovn-nbctl ls-lb-del foo lb1 >>>> + >>>> +# add stateless acl >>>> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless >>>> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless >>>> + >>>> +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has no tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(192.168.1.2) | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# add lb back >>>> +ovn-nbctl ls-lb-add foo lb1 >>>> + >>>> +# Wait for ovn-controller to catch up. >>>> +ovn-nbctl --wait=hv sync >>>> + >>>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>>> +grep 'nat(dst=192.168.2.2:80)']) >>>> + >>>> +# should not dnat so will not be able to connect >>>> +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has no tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(30.30.30.30) | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +OVS_APP_EXIT_AND_WAIT([ovn-controller]) >>>> + >>>> +as ovn-sb >>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>>> + >>>> +as ovn-nb >>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>>> + >>>> +as northd >>>> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) >>>> + >>>> +as >>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d >>>> +/connection dropped.*/d"]) >>>> +AT_CLEANUP >>>> +]) >>>> + >>>> +OVN_FOR_EACH_NORTHD([ >>>> +AT_SETUP([omit connection tracking for stateless flows v6]) >>>> + >>>> +CHECK_CONNTRACK() >>>> +ovn_start >>>> +OVS_TRAFFIC_VSWITCHD_START() >>>> +ADD_BR([br-int]) >>>> + >>>> +# Set external-ids in br-int needed for ovn-controller >>>> +ovs-vsctl \ >>>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ >>>> + -- set Open_vSwitch . >>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ >>>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ >>>> + -- set bridge br-int fail-mode=secure >>> other-config:disable-in-band=true >>>> + >>>> +# Start ovn-controller >>>> +start_daemon ovn-controller >>>> + >>>> +# Logical network: >>>> +# One LR - R1 with switchess foo (fd11::/64) and >>>> +# bar (fd12::/64) connected to it >>>> +# >>>> +# foo -- R1 -- bar >>>> + >>>> +ovn-nbctl lr-add R1 >>>> + >>>> +ovn-nbctl ls-add foo >>>> +ovn-nbctl ls-add bar >>>> + >>>> +# Connect foo to R1 >>>> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 >>>> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ >>>> + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" >>>> + >>>> +# Connect bar to R1 >>>> +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 >>>> +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ >>>> + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" >>>> + >>>> +# Logical port 'foo1' in switch 'foo'. >>>> +ADD_NAMESPACES(foo1) >>>> +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ >>>> + "fd11::1") >>>> +ovn-nbctl lsp-add foo foo1 \ >>>> +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" >>>> + >>>> +# Logical port 'bar1' in switch 'bar'. >>>> +ADD_NAMESPACES(bar1) >>>> +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ >>>> +"fd12::1") >>>> +ovn-nbctl lsp-add bar bar1 \ >>>> +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" >>>> + >>>> +# Config OVN load-balancer with a VIP. >>>> +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp >>>> +ovn-nbctl ls-lb-add foo lb1 >>>> + >>>> +# Wait for ovn-controller to catch up. >>>> +ovn-nbctl --wait=hv sync >>>> + >>>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>>> +grep 'nat(dst=\[[fd12::2\]]:80)']) >>>> + >>>> +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d >>> ' ' -f2) >>>> + >>>> +OVS_START_L7([bar1], [http6]) >>>> + >>>> +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(fd12::2) | grep -v fe80 | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> >>> +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# now check wirh VIP >>>> +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(fd30::2) | grep -v fe80 | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> >>> +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# remove lb >>>> +ovn-nbctl ls-lb-del foo lb1 >>>> + >>>> +# add stateless acl >>>> +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless >>>> +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless >>>> + >>>> +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], >>> [ignore], [ignore]) >>>> + >>>> +# check conntrack zone has no tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(fd12::2) | grep -v fe80 | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +# add lb back >>>> +ovn-nbctl ls-lb-add foo lb1 >>>> + >>>> +# Wait for ovn-controller to catch up. >>>> +ovn-nbctl --wait=hv sync >>>> + >>>> +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ >>>> +grep 'nat(dst=\[[fd12::2\]]:80)']) >>>> + >>>> +# should not dnat so will not be able to connect >>>> +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], >>> [ignore], [ignore]) >>>> +# >>>> +# check conntrack zone has no tcp entry >>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ >>>> +FORMAT_CT(fd30::2) | grep -v fe80 | \ >>>> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl >>>> +]) >>>> + >>>> +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) >>>> + >>>> +OVS_APP_EXIT_AND_WAIT([ovn-controller]) >>>> + >>>> +as ovn-sb >>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>>> + >>>> +as ovn-nb >>>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) >>>> + >>>> +as northd >>>> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) >>>> + >>>> +as >>>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d >>>> +/connection dropped.*/d"]) >>>> +AT_CLEANUP >>>> +]) >>>> -- >>>> 2.17.1 >>>> >>> >>> Thanks Venu for v2. Also thanks Numan for reviewing v1, and I saw that >>> Numan's comment was addressed. So, applied to main branch. >> >> Sorry I didn't get the chance to review it. Thanks for applying. >> >> Numan >> >>> >>> Han >>> _______________________________________________ >>> dev mailing list >>> dev@openvswitch.org >>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=05%7C01%7Cvenugopali%40nvidia.com%7C4803d2da0a014ad9807308dadfa9a430%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638068216964747698%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=UpsUNrqrFFU0deEwfes%2FewdJWVjYiTb1mvGJ2jSflZc%3D&reserved=0 >>> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev >>
diff --git a/northd/northd.c b/northd/northd.c index 7c48bb3b4..5d8ef612f 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -140,8 +140,8 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 26, "ls_in_l2_unknown") \ \ /* Logical switch egress stages. */ \ - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ @@ -215,6 +215,7 @@ enum ovn_stage { #define REGBIT_ACL_LABEL "reg0[13]" #define REGBIT_FROM_RAMP "reg0[14]" #define REGBIT_PORT_SEC_DROP "reg0[15]" +#define REGBIT_ACL_STATELESS "reg0[16]" #define REG_ORIG_DIP_IPV4 "reg1" #define REG_ORIG_DIP_IPV6 "xxreg1" @@ -290,7 +291,7 @@ enum ovn_stage { * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | - * | | REGBIT_ACL_LABEL | X | | + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | * +----+----------------------------------------------+ X | | * | R5 | UNUSED | X | LB_L2_AFF_BACKEND_IP6 | * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | @@ -5693,17 +5694,18 @@ build_stateless_filter(struct ovn_datapath *od, const struct nbrec_acl *acl, struct hmap *lflows) { + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; if (!strcmp(acl->direction, "from-lport")) { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } else { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } } @@ -5795,6 +5797,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); + } else if (od->has_lb_vip) { + /* We'll build stateless filters if there are LB rules so that + * the stateless flows are not tracked in pre-lb. */ + build_stateless_filters(od, port_groups, lflows); } } @@ -5930,6 +5936,12 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, 110, lflows); } + /* Do not sent statless flows via conntrack */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); + /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send * packet to conntrack for defragmentation and possibly for unNATting. * @@ -6935,7 +6947,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, } ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", + ip_match, lb_vip->vip_str); if (lb_vip->port_str) { ds_put_format(match, " && %s.dst == %s", proto, lb_vip->port_str); } diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index dffbba96d..ce5603169 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -474,7 +474,9 @@ priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 Neighbor Discovery and MLD traffic also skips stateful ACLs. For "allow-stateless" ACLs, a flow is added to bypass setting the hint for - connection tracker processing. + connection tracker processing when there are stateful ACLs or LB rules; + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching stateless + ACL flows. </p> <p> @@ -494,8 +496,10 @@ in ingress table <code>LB</code> and <code>Stateful</code>. It contains a priority-0 flow that simply moves traffic to the next table. Moreover it contains two priority-110 flows to move multicast, IPv6 Neighbor - Discovery and MLD traffic to the next table. If load balancing rules with - virtual IP addresses (and ports) are configured in + Discovery and MLD traffic to the next table. It also contains two + priority-110 flows to move stateless traffic, i.e traffic for which + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load + balancing rules with virtual IP addresses (and ports) are configured in <code>OVN_Northbound</code> database for a logical switch datapath, a priority-100 flow is added with the match <code>ip</code> to match on IP packets and sets the action <code>reg0[2] = 1; next;</code> to act as a @@ -1973,19 +1977,11 @@ output; </li> </ul> - <h3>Egress Table 0: Pre-LB</h3> + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> <p> - This table is similar to ingress table <code>Pre-LB</code>. It - contains a priority-0 flow that simply moves traffic to the next table. - Moreover it contains two priority-110 flows to move multicast, IPv6 - Neighbor Discovery and MLD traffic to the next table. If any load - balancing rules exist for the datapath, a priority-100 flow is added with - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> - to act as a hint for table <code>Pre-stateful</code> to send IP packets - to the connection tracker for packet de-fragmentation and possibly DNAT - the destination VIP to one of the selected backend for already committed - load balanced traffic. + This is similar to ingress table <code>Pre-ACLs</code> except for + <code>to-lport</code> traffic. </p> <p> @@ -1998,11 +1994,29 @@ output; db="OVN_Northbound"/> table. </p> - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> + <p> + This table also has a priority-110 flow with the match + <code>outport == <var>I</var></code> for all logical switch + datapaths to move traffic to the next table. Where <var>I</var> + is the peer of a logical router port. This flow is added to + skip the connection tracking of packets which will be entering + logical router datapath from logical switch datapath for routing. + </p> + + + <h3>Egress Table 1: Pre-LB</h3> <p> - This is similar to ingress table <code>Pre-ACLs</code> except for - <code>to-lport</code> traffic. + This table is similar to ingress table <code>Pre-LB</code>. It + contains a priority-0 flow that simply moves traffic to the next table. + Moreover it contains two priority-110 flows to move multicast, IPv6 + Neighbor Discovery and MLD traffic to the next table. If any load + balancing rules exist for the datapath, a priority-100 flow is added with + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> + to act as a hint for table <code>Pre-stateful</code> to send IP packets + to the connection tracker for packet de-fragmentation and possibly DNAT + the destination VIP to one of the selected backend for already committed + load balanced traffic. </p> <p> @@ -2015,15 +2029,6 @@ output; db="OVN_Northbound"/> table. </p> - <p> - This table also has a priority-110 flow with the match - <code>outport == <var>I</var></code> for all logical switch - datapaths to move traffic to the next table. Where <var>I</var> - is the peer of a logical router port. This flow is added to - skip the connection tracking of packets which will be entering - logical router datapath from logical switch datapath for routing. - </p> - <h3>Egress Table 2: Pre-stateful</h3> <p> diff --git a/ovn-nb.xml b/ovn-nb.xml index 0edc3da96..3ac7785e1 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2159,6 +2159,9 @@ or outgoing TCP traffic directed to an IP address, then you probably also want to define another rule to allow incoming TCP traffic coming from this same IP address. + In addition, traffic that matches stateless ACLs will bypass + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be + used instead if the traffic is supposed to be load-balanced. </li> <li> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index ca4263eac..a2f8e8a20 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2024,7 +2024,7 @@ AT_CLEANUP # This test case tests that when a logical switch has load balancers associated # (with VIPs configured), the below logical flow is added by ovn-northd. -# table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) +# table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[0]] = 1; next;) # This test case is added for the BZ - # https://bugzilla.redhat.com/show_bug.cgi?id=1849162 # @@ -2063,27 +2063,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl ls-lb-add sw0 lb2 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl clear load_balancer $lb3 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb4 vips @@ -2098,7 +2098,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) # Now reverse the order of clearing the vip. @@ -2106,13 +2106,13 @@ check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl clear load_balancer $lb4 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb3 vips @@ -3057,18 +3057,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -3201,18 +3193,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -4026,14 +4010,15 @@ check_stateful_flows() { table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) ]) AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) ]) AT_CHECK([grep "ls_in_lb " sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl @@ -4049,12 +4034,13 @@ check_stateful_flows() { ]) AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) ]) AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl @@ -4094,6 +4080,7 @@ AT_CHECK([grep "ls_in_pre_lb" sw0flows | sort | sed 's/table=./table=?/'], [0], table=? (ls_in_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) table=? (ls_in_pre_lb ), priority=110 , match=(ip && inport == "sw0-lr0"), action=(next;) table=? (ls_in_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=? (ls_in_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) ]) AT_CHECK([grep "ls_in_pre_stateful" sw0flows | sort | sed 's/table=./table=?/'], [0], [dnl @@ -4113,11 +4100,12 @@ AT_CHECK([grep "ls_in_stateful" sw0flows | sort | sed 's/table=../table=??/'], [ ]) AT_CHECK([grep "ls_out_pre_lb" sw0flows | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) - table=0 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=1 (ls_out_pre_lb ), priority=0 , match=(1), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.mcast), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(eth.src == $svc_monitor_mac), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(ip && outport == "sw0-lr0"), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(nd || nd_rs || nd_ra || mldv1 || mldv2), action=(next;) + table=1 (ls_out_pre_lb ), priority=110 , match=(reg0[[16]] == 1), action=(next;) ]) AT_CHECK([grep "ls_out_pre_stateful" sw0flows | sort], [0], [dnl @@ -7677,7 +7665,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) @@ -7689,7 +7677,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) @@ -7701,7 +7689,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=7 (lr_in_dnat ), priority=110 , match=(ct.est && !ct.rel && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=7 (lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=12(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) diff --git a/tests/ovn.at b/tests/ovn.at index f3bd53242..7abded46c 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -23777,7 +23777,7 @@ OVS_WAIT_FOR_OUTPUT( [ovn-sbctl dump-flows > sbflows ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, [dnl - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) ]) @@ -23820,7 +23820,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 AT_CHECK( [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ sed 's/table=../table=??/'], [0], [dnl - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) ]) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index b99578b9e..4bc9fb84f 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -9511,3 +9511,299 @@ NS_CHECK_EXEC([vm3], [nc 6666::1 666 -z], [0], [ignore], [ignore]) AT_CLEANUP ]) + +# for packets that match stateless ACL flows, make sure we bypass +# connection tracking, even with a LB in the switch. Testing for +# TCP should suffice. For v4 and v6. +# +OVN_FOR_EACH_NORTHD([ +AT_SETUP([omit connection tracking for stateless flows v4]) + +CHECK_CONNTRACK() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller + +# Logical network: +# One LR R1 with switches foo (192.168.1.0/24), bar (192.168.2.0/24), +# +# foo -- R1 -- bar + +ovn-nbctl lr-add R1 + +ovn-nbctl ls-add foo +ovn-nbctl ls-add bar + +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24 +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 192.168.2.1/24 + +# Connect foo to R1 +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ + type=router options:router-port=foo \ + -- lsp-set-addresses rp-foo router + +# Connect bar to R1 +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ + type=router options:router-port=bar \ + -- lsp-set-addresses rp-bar router + +# Logical port 'foo1' in switch 'foo'. +ADD_NAMESPACES(foo1) +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ + "192.168.1.1") +ovn-nbctl lsp-add foo foo1 \ +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2" + +# Logical port 'bar1' in switch 'bar'. +ADD_NAMESPACES(bar1) +ADD_VETH(bar1, bar1, br-int, "192.168.2.2/24", "f0:00:00:01:02:04", \ + "192.168.2.1") +ovn-nbctl lsp-add bar bar1 \ +-- lsp-set-addresses bar1 "f0:00:00:01:02:04 192.168.2.2" + +# Config OVN load-balancer with a VIP. +ovn-nbctl lb-add lb1 30.30.30.30:80 "192.168.2.2:80" tcp +ovn-nbctl ls-lb-add foo lb1 + +# Wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync + +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ +grep 'nat(dst=192.168.2.2:80)']) + +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) + +OVS_START_L7([bar1], [http]) + +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(192.168.1.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=192.168.1.2,dst=192.168.2.2,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# now check wirh VIP +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(30.30.30.30) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=192.168.1.2,dst=30.30.30.30,sport=<cleared>,dport=<cleared>),reply=(src=192.168.2.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# remove lb +ovn-nbctl ls-lb-del foo lb1 + +# add stateless acl +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless + +AT_CHECK([ip netns exec foo1 wget 192.168.2.2 -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has no tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(192.168.1.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# add lb back +ovn-nbctl ls-lb-add foo lb1 + +# Wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync + +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ +grep 'nat(dst=192.168.2.2:80)']) + +# should not dnat so will not be able to connect +AT_CHECK([ip netns exec foo1 wget 30.30.30.30 -t 3 -T 1], [4], [ignore], [ignore]) + +# check conntrack zone has no tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(30.30.30.30) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([omit connection tracking for stateless flows v6]) + +CHECK_CONNTRACK() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller + +# Logical network: +# One LR - R1 with switchess foo (fd11::/64) and +# bar (fd12::/64) connected to it +# +# foo -- R1 -- bar + +ovn-nbctl lr-add R1 + +ovn-nbctl ls-add foo +ovn-nbctl ls-add bar + +# Connect foo to R1 +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ + type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" + +# Connect bar to R1 +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ + type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" + +# Logical port 'foo1' in switch 'foo'. +ADD_NAMESPACES(foo1) +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ + "fd11::1") +ovn-nbctl lsp-add foo foo1 \ +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" + +# Logical port 'bar1' in switch 'bar'. +ADD_NAMESPACES(bar1) +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ +"fd12::1") +ovn-nbctl lsp-add bar bar1 \ +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" + +# Config OVN load-balancer with a VIP. +ovn-nbctl lb-add lb1 [[fd30::2]]:80 [[fd12::2]]:80 tcp +ovn-nbctl ls-lb-add foo lb1 + +# Wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync + +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ +grep 'nat(dst=\[[fd12::2\]]:80)']) + +zone_id=$(ovn-appctl -t ovn-controller ct-zone-list | grep foo1 | cut -d ' ' -f2) + +OVS_START_L7([bar1], [http6]) + +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(fd12::2) | grep -v fe80 | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=fd11::2,dst=fd12::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# now check wirh VIP +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(fd30::2) | grep -v fe80 | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=fd11::2,dst=fd30::2,sport=<cleared>,dport=<cleared>),reply=(src=fd12::2,dst=fd11::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=2,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# remove lb +ovn-nbctl ls-lb-del foo lb1 + +# add stateless acl +check ovn-nbctl acl-add foo from-lport 1 1 allow-stateless +check ovn-nbctl acl-add foo to-lport 1 1 allow-stateless + +AT_CHECK([ip netns exec foo1 wget http://[[fd12::2]] -t 3 -T 1], [0], [ignore], [ignore]) + +# check conntrack zone has no tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(fd12::2) | grep -v fe80 | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +# add lb back +ovn-nbctl ls-lb-add foo lb1 + +# Wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync + +OVS_WAIT_UNTIL([ovs-ofctl -O OpenFlow13 dump-groups br-int | \ +grep 'nat(dst=\[[fd12::2\]]:80)']) + +# should not dnat so will not be able to connect +AT_CHECK([ip netns exec foo1 wget http://[[fd30::2]] -t 3 -T 1], [4], [ignore], [ignore]) +# +# check conntrack zone has no tcp entry +AT_CHECK([ovs-appctl dpctl/dump-conntrack zone=$zone_id | \ +FORMAT_CT(fd30::2) | grep -v fe80 | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +OVS_APP_EXIT_AND_WAIT([ovn-controller]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d"]) +AT_CLEANUP +])