Message ID | 20221108155044.23112-1-venugopali@nvidia.com |
---|---|
State | Changes Requested |
Delegated to: | Numan Siddique |
Headers | show |
Series | [ovs-dev] northd: bypass connection tracking for stateless flows when there are LB flows present | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
On Tue, Nov 8, 2022 at 7:51 AM venu.iyer <venugopali@nvidia.com> wrote: > > Currently, even stateless flows are subject to connection tracking when there are > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then it shouldn't > be configured as stateless. > > A stateless flow means we should not track it, and this change exempts stateless > flows from being tracked regardless of whether LB rules are present or not. > > Signed-off-by: venu.iyer <venugopali@nvidia.com> > Acked-by: Han Zhou <hzhou@ovn.org> > --- > northd/northd.c | 24 +++++++++++++----- > northd/ovn-northd.8.xml | 56 ++++++++++++++++++++++------------------- > ovn-nb.xml | 3 +++ > tests/ovn-northd.at | 48 ++++++++++++----------------------- > tests/ovn.at | 4 +-- > 5 files changed, 69 insertions(+), 66 deletions(-) > > diff --git a/northd/northd.c b/northd/northd.c > index b7388afc5..da4beede6 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -137,8 +137,8 @@ enum ovn_stage { > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") \ > \ > /* Logical switch egress stages. */ \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ > @@ -210,6 +210,7 @@ enum ovn_stage { > #define REGBIT_ACL_LABEL "reg0[13]" > #define REGBIT_FROM_RAMP "reg0[14]" > #define REGBIT_PORT_SEC_DROP "reg0[15]" > +#define REGBIT_ACL_STATELESS "reg0[16]" > > #define REG_ORIG_DIP_IPV4 "reg1" > #define REG_ORIG_DIP_IPV6 "xxreg1" > @@ -271,7 +272,7 @@ enum ovn_stage { > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | > - * | | REGBIT_ACL_LABEL | X | | > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | > * +----+----------------------------------------------+ X | | > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | > * +----+----------------------------------------------+ E | | > @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, > const struct nbrec_acl *acl, > struct hmap *lflows) > { > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > if (!strcmp(acl->direction, "from-lport")) { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } else { > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > acl->priority + OVN_ACL_PRI_OFFSET, > acl->match, > - "next;", > + action, > &acl->header_); > } > } > @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > + } else if (od->has_lb_vip) { > + /* We'll build stateless filters if there are LB rules so that > + * the stateless flows are not tracked in pre-lb. */ > + build_stateless_filters(od, port_groups, lflows); > } > } > > @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, > S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, > 110, lflows); > } > + /* Do not sent statless flows via conntrack */ > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > + REGBIT_ACL_STATELESS" == 1", "next;"); > > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > * packet to conntrack for defragmentation and possibly for unNATting. > @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, > } > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > + ip_match, lb_vip->vip_str); > if (lb_vip->vip_port) { > ds_put_format(match, " && %s.dst == %d", proto, lb_vip->vip_port); > } > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index a70f2e678..162ec2b3b 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -440,7 +440,9 @@ > priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > "allow-stateless" ACLs, a flow is added to bypass setting the hint for > - connection tracker processing. > + connection tracker processing when there are statelful ACLs or LB rules; > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such > + flows for this purpose. > </p> > > <p> > @@ -460,8 +462,10 @@ > in ingress table <code>LB</code> and <code>Stateful</code>. It contains > a priority-0 flow that simply moves traffic to the next table. Moreover > it contains two priority-110 flows to move multicast, IPv6 Neighbor > - Discovery and MLD traffic to the next table. If load balancing rules with > - virtual IP addresses (and ports) are configured in > + Discovery and MLD traffic to the next table. It also contains two > + priority-110 flows to move stateless traffic, i.e traffic for which > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load > + balancing rules with virtual IP addresses (and ports) are configured in > <code>OVN_Northbound</code> database for a logical switch datapath, a > priority-100 flow is added with the match <code>ip</code> to match on IP > packets and sets the action <code>reg0[2] = 1; next;</code> to act as a > @@ -1859,19 +1863,11 @@ output; > </li> > </ul> > > - <h3>Egress Table 0: Pre-LB</h3> > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > <p> > - This table is similar to ingress table <code>Pre-LB</code>. It > - contains a priority-0 flow that simply moves traffic to the next table. > - Moreover it contains two priority-110 flows to move multicast, IPv6 > - Neighbor Discovery and MLD traffic to the next table. If any load > - balancing rules exist for the datapath, a priority-100 flow is added with > - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > - to act as a hint for table <code>Pre-stateful</code> to send IP packets > - to the connection tracker for packet de-fragmentation and possibly DNAT > - the destination VIP to one of the selected backend for already committed > - load balanced traffic. > + This is similar to ingress table <code>Pre-ACLs</code> except for > + <code>to-lport</code> traffic. > </p> > > <p> > @@ -1884,11 +1880,28 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > + <p> > + This table also has a priority-110 flow with the match > + <code>outport == <var>I</var></code> for all logical switch > + datapaths to move traffic to the next table. Where <var>I</var> > + is the peer of a logical router port. This flow is added to > + skip the connection tracking of packets which will be entering > + logical router datapath from logical switch datapath for routing. > + </p> > + > + <h3>Egress Table 1: Pre-LB</h3> > > <p> > - This is similar to ingress table <code>Pre-ACLs</code> except for > - <code>to-lport</code> traffic. > + This table is similar to ingress table <code>Pre-LB</code>. It > + contains a priority-0 flow that simply moves traffic to the next table. > + Moreover it contains two priority-110 flows to move multicast, IPv6 > + Neighbor Discovery and MLD traffic to the next table. If any load > + balancing rules exist for the datapath, a priority-100 flow is added with > + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> > + to act as a hint for table <code>Pre-stateful</code> to send IP packets > + to the connection tracker for packet de-fragmentation and possibly DNAT > + the destination VIP to one of the selected backend for already committed > + load balanced traffic. > </p> > > <p> > @@ -1901,15 +1914,6 @@ output; > db="OVN_Northbound"/> table. > </p> > > - <p> > - This table also has a priority-110 flow with the match > - <code>outport == <var>I</var></code> for all logical switch > - datapaths to move traffic to the next table. Where <var>I</var> > - is the peer of a logical router port. This flow is added to > - skip the connection tracking of packets which will be entering > - logical router datapath from logical switch datapath for routing. > - </p> > - > <h3>Egress Table 2: Pre-stateful</h3> > > <p> > diff --git a/ovn-nb.xml b/ovn-nb.xml > index f41e9d7c0..140dd9a4f 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -2063,6 +2063,9 @@ > outgoing TCP traffic directed to an IP address, then you probably > also want to define another rule to allow incoming TCP traffic coming > from this same IP address. > + In addition, traffic that matches stateless ACLs will bypass > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > + used instead if the traffic is supposed to be load-balanced. > </li> > > <li> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 4f399eccb..85d5acfed 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl ls-lb-add sw0 lb2 > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl clear load_balancer $lb3 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb4 vips > @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" > > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > # Now reverse the order of clearing the vip. > @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > check ovn-nbctl clear load_balancer $lb4 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb1 vips > check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) > ]) > > check ovn-nbctl clear load_balancer $lb3 vips > @@ -3044,18 +3044,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -3188,18 +3180,10 @@ for direction in from to; do > done > ovn-nbctl --wait=sb sync > > -# TCP packets should go to conntrack for load balancing. > +# TCP packets should not go to conntrack for load balancing. > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl > -ct_lb_mark { > - ct_lb_mark { > - reg0[[6]] = 0; > - reg0[[12]] = 0; > - ct_lb_mark /* default (use --ct to customize) */ { > - output("lsp2"); > - }; > - }; > -}; > +output("lsp2"); > ]) > > # UDP packets still go to conntrack. > @@ -4015,8 +3999,8 @@ check_stateful_flows() { > table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) > ]) > > AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl > @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) > @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) > diff --git a/tests/ovn.at b/tests/ovn.at > index f8b8db4df..f43455f60 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( > [ovn-sbctl dump-flows > sbflows > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, > [dnl > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > ]) > > @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > AT_CHECK( > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ > sed 's/table=../table=??/'], [0], [dnl > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > ]) > > -- > 2.17.1 > Thanks Venu. I have reviewed this with Venu internally and gave my Ack, but it would be good if folks can take another look before merging. Numan, since you were involved in an earlier discussion, probably you want to take a look? I think there are two things to pay attention to: 1. Switching the order of PRE_ACL and PRE_LB in the LS egress pipeline. I don't see any problem here but not sure if there was any specific reason behind the original order. 2. If there are VIP traffic matching stateless ACL, they won't work any more. We think that is ok, because we can't think of a use case that would add stateless ACLs for VIP traffic because this combination doesn't make any sense. If such traffic was allowed with CT, it could actually hide a mistake made by the user - e.g. the intention was to apply stateless ACLs to bypass CT for certain traffic that was not related to VIP, but by mistake the match condition added to the ACL touched some VIP traffic. So, we think the right thing to do is to let it fail when such *misconfiguration* happens. Let me know if you have any concerns about the above points. Thanks, Han
On Wed, Nov 9, 2022 at 4:11 PM Han Zhou <hzhou@ovn.org> wrote: > > On Tue, Nov 8, 2022 at 7:51 AM venu.iyer <venugopali@nvidia.com> wrote: > > > > Currently, even stateless flows are subject to connection tracking when > there are > > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then > it shouldn't > > be configured as stateless. > > > > A stateless flow means we should not track it, and this change exempts > stateless > > flows from being tracked regardless of whether LB rules are present or > not. > > > > Signed-off-by: venu.iyer <venugopali@nvidia.com> > > Acked-by: Han Zhou <hzhou@ovn.org> > > --- > > northd/northd.c | 24 +++++++++++++----- > > northd/ovn-northd.8.xml | 56 ++++++++++++++++++++++------------------- > > ovn-nb.xml | 3 +++ > > tests/ovn-northd.at | 48 ++++++++++++----------------------- > > tests/ovn.at | 4 +-- > > 5 files changed, 69 insertions(+), 66 deletions(-) > > > > diff --git a/northd/northd.c b/northd/northd.c > > index b7388afc5..da4beede6 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -137,8 +137,8 @@ enum ovn_stage { > > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") > \ > > > \ > > /* Logical switch egress stages. */ > \ > > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") > \ > > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") > \ > > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") > \ > > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") > \ > > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") > \ > > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") > \ > > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") > \ > > @@ -210,6 +210,7 @@ enum ovn_stage { > > #define REGBIT_ACL_LABEL "reg0[13]" > > #define REGBIT_FROM_RAMP "reg0[14]" > > #define REGBIT_PORT_SEC_DROP "reg0[15]" > > +#define REGBIT_ACL_STATELESS "reg0[16]" > > > > #define REG_ORIG_DIP_IPV4 "reg1" > > #define REG_ORIG_DIP_IPV6 "xxreg1" > > @@ -271,7 +272,7 @@ enum ovn_stage { > > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | > | > > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | > | > > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | > | > > - * | | REGBIT_ACL_LABEL | X | > | > > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | > | > > * +----+----------------------------------------------+ X | > | > > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | > | > > * +----+----------------------------------------------+ E | > | > > @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, > > const struct nbrec_acl *acl, > > struct hmap *lflows) > > { > > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > > if (!strcmp(acl->direction, "from-lport")) { > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > > acl->priority + OVN_ACL_PRI_OFFSET, > > acl->match, > > - "next;", > > + action, > > &acl->header_); > > } else { > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > > acl->priority + OVN_ACL_PRI_OFFSET, > > acl->match, > > - "next;", > > + action, > > &acl->header_); > > } > > } > > @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const > struct hmap *port_groups, > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > + } else if (od->has_lb_vip) { > > + /* We'll build stateless filters if there are LB rules so that > > + * the stateless flows are not tracked in pre-lb. */ > > + build_stateless_filters(od, port_groups, lflows); > > } > > } > > > > @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct > shash *meter_groups, > > S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, > > 110, lflows); > > } > > + /* Do not sent statless flows via conntrack */ > > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > > > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > > * packet to conntrack for defragmentation and possibly for > unNATting. > > @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, > struct ovn_northd_lb *lb, > > } > > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : > "ct_lb"); > > > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > > + ip_match, lb_vip->vip_str); > > if (lb_vip->vip_port) { > > ds_put_format(match, " && %s.dst == %d", proto, > lb_vip->vip_port); > > } > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > > index a70f2e678..162ec2b3b 100644 > > --- a/northd/ovn-northd.8.xml > > +++ b/northd/ovn-northd.8.xml > > @@ -440,7 +440,9 @@ > > priority-110 flow is added to skip over stateful ACLs. Multicast, > IPv6 > > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > > "allow-stateless" ACLs, a flow is added to bypass setting the hint > for > > - connection tracker processing. > > + connection tracker processing when there are statelful ACLs or LB > rules; > > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such > > + flows for this purpose. > > </p> > > > > <p> > > @@ -460,8 +462,10 @@ > > in ingress table <code>LB</code> and <code>Stateful</code>. It > contains > > a priority-0 flow that simply moves traffic to the next table. > Moreover > > it contains two priority-110 flows to move multicast, IPv6 Neighbor > > - Discovery and MLD traffic to the next table. If load balancing > rules with > > - virtual IP addresses (and ports) are configured in > > + Discovery and MLD traffic to the next table. It also contains two > > + priority-110 flows to move stateless traffic, i.e traffic for which > > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If > load > > + balancing rules with virtual IP addresses (and ports) are > configured in > > <code>OVN_Northbound</code> database for a logical switch > datapath, a > > priority-100 flow is added with the match <code>ip</code> to match > on IP > > packets and sets the action <code>reg0[2] = 1; next;</code> to act > as a > > @@ -1859,19 +1863,11 @@ output; > > </li> > > </ul> > > > > - <h3>Egress Table 0: Pre-LB</h3> > > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > > > <p> > > - This table is similar to ingress table <code>Pre-LB</code>. It > > - contains a priority-0 flow that simply moves traffic to the next > table. > > - Moreover it contains two priority-110 flows to move multicast, IPv6 > > - Neighbor Discovery and MLD traffic to the next table. If any load > > - balancing rules exist for the datapath, a priority-100 flow is > added with > > - a match of <code>ip</code> and action of <code>reg0[2] = 1; > next;</code> > > - to act as a hint for table <code>Pre-stateful</code> to send IP > packets > > - to the connection tracker for packet de-fragmentation and possibly > DNAT > > - the destination VIP to one of the selected backend for already > committed > > - load balanced traffic. > > + This is similar to ingress table <code>Pre-ACLs</code> except for > > + <code>to-lport</code> traffic. > > </p> > > > > <p> > > @@ -1884,11 +1880,28 @@ output; > > db="OVN_Northbound"/> table. > > </p> > > > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > > + <p> > > + This table also has a priority-110 flow with the match > > + <code>outport == <var>I</var></code> for all logical switch > > + datapaths to move traffic to the next table. Where <var>I</var> > > + is the peer of a logical router port. This flow is added to > > + skip the connection tracking of packets which will be entering > > + logical router datapath from logical switch datapath for routing. > > + </p> > > + > > + <h3>Egress Table 1: Pre-LB</h3> > > > > <p> > > - This is similar to ingress table <code>Pre-ACLs</code> except for > > - <code>to-lport</code> traffic. > > + This table is similar to ingress table <code>Pre-LB</code>. It > > + contains a priority-0 flow that simply moves traffic to the next > table. > > + Moreover it contains two priority-110 flows to move multicast, IPv6 > > + Neighbor Discovery and MLD traffic to the next table. If any load > > + balancing rules exist for the datapath, a priority-100 flow is > added with > > + a match of <code>ip</code> and action of <code>reg0[2] = 1; > next;</code> > > + to act as a hint for table <code>Pre-stateful</code> to send IP > packets > > + to the connection tracker for packet de-fragmentation and possibly > DNAT > > + the destination VIP to one of the selected backend for already > committed > > + load balanced traffic. > > </p> > > > > <p> > > @@ -1901,15 +1914,6 @@ output; > > db="OVN_Northbound"/> table. > > </p> > > > > - <p> > > - This table also has a priority-110 flow with the match > > - <code>outport == <var>I</var></code> for all logical switch > > - datapaths to move traffic to the next table. Where <var>I</var> > > - is the peer of a logical router port. This flow is added to > > - skip the connection tracking of packets which will be entering > > - logical router datapath from logical switch datapath for routing. > > - </p> > > - > > <h3>Egress Table 2: Pre-stateful</h3> > > > > <p> > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > index f41e9d7c0..140dd9a4f 100644 > > --- a/ovn-nb.xml > > +++ b/ovn-nb.xml > > @@ -2063,6 +2063,9 @@ > > outgoing TCP traffic directed to an IP address, then you > probably > > also want to define another rule to allow incoming TCP traffic > coming > > from this same IP address. > > + In addition, traffic that matches stateless ACLs will bypass > > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > > + used instead if the traffic is supposed to be load-balanced. > > </li> > > > > <li> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index 4f399eccb..85d5acfed 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl ls-lb-add sw0 lb2 > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb1 vips > > check ovn-nbctl clear load_balancer $lb3 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb2 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb4 vips > > @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 > vips:"10.0.0.13"="10.0.0.6" > > > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > # Now reverse the order of clearing the vip. > > @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > > check ovn-nbctl clear load_balancer $lb4 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb1 vips > > check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > grep reg0 | sort], [0], [dnl > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > action=(reg0[[2]] = 1; next;) > > ]) > > > > check ovn-nbctl clear load_balancer $lb3 vips > > @@ -3044,18 +3044,10 @@ for direction in from to; do > > done > > ovn-nbctl --wait=sb sync > > > > -# TCP packets should go to conntrack for load balancing. > > +# TCP packets should not go to conntrack for load balancing. > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > [0], [dnl > > -ct_lb_mark { > > - ct_lb_mark { > > - reg0[[6]] = 0; > > - reg0[[12]] = 0; > > - ct_lb_mark /* default (use --ct to customize) */ { > > - output("lsp2"); > > - }; > > - }; > > -}; > > +output("lsp2"); > > ]) > > > > # UDP packets still go to conntrack. > > @@ -3188,18 +3180,10 @@ for direction in from to; do > > done > > ovn-nbctl --wait=sb sync > > > > -# TCP packets should go to conntrack for load balancing. > > +# TCP packets should not go to conntrack for load balancing. > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > [0], [dnl > > -ct_lb_mark { > > - ct_lb_mark { > > - reg0[[6]] = 0; > > - reg0[[12]] = 0; > > - ct_lb_mark /* default (use --ct to customize) */ { > > - output("lsp2"); > > - }; > > - }; > > -}; > > +output("lsp2"); > > ]) > > > > # UDP packets still go to conntrack. > > @@ -4015,8 +3999,8 @@ check_stateful_flows() { > > table=? (ls_in_pre_stateful ), priority=0 , match=(1), > action=(next;) > > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), > action=(ct_next;) > > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > ct_lb_mark;) > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; > ct_lb_mark;) > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; > reg2[[0..15]] = 80; ct_lb_mark;) > > ]) > > > > AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed > 's/table=../table=??/'], [0], [dnl > > @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > ct_lb_mark(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb;) > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb;) > > @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > ct_lb_mark(backends=42.42.42.2);) > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > action=(ct_lb_mark;) > > diff --git a/tests/ovn.at b/tests/ovn.at > > index f8b8db4df..f43455f60 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( > > [ovn-sbctl dump-flows > sbflows > > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed > 's/table=..//'], 0, > > [dnl > > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && > tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= > 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > > ]) > > > > @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > > AT_CHECK( > > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep > priority=120 |\ > > sed 's/table=../table=??/'], [0], [dnl > > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > ct_lb_mark;) > > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > reg2[[0..15]] = 80; ct_lb_mark;) > > table=??(ls_in_lb ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > > ]) > > > > -- > > 2.17.1 > > > > Thanks Venu. I have reviewed this with Venu internally and gave my Ack, but > it would be good if folks can take another look before merging. > Numan, since you were involved in an earlier discussion, probably you want > to take a look? Sure. I'll take a look and thanks for the comments below. Numan > I think there are two things to pay attention to: > 1. Switching the order of PRE_ACL and PRE_LB in the LS egress pipeline. I > don't see any problem here but not sure if there was any specific reason > behind the original order. > 2. If there are VIP traffic matching stateless ACL, they won't work any > more. We think that is ok, because we can't think of a use case that would > add stateless ACLs for VIP traffic because this combination doesn't make > any sense. If such traffic was allowed with CT, it could actually hide a > mistake made by the user - e.g. the intention was to apply stateless ACLs > to bypass CT for certain traffic that was not related to VIP, but by > mistake the match condition added to the ACL touched some VIP traffic. So, > we think the right thing to do is to let it fail when such > *misconfiguration* happens. > > Let me know if you have any concerns about the above points. > > Thanks, > Han > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Wed, Nov 9, 2022 at 4:15 PM Numan Siddique <numans@ovn.org> wrote: > > On Wed, Nov 9, 2022 at 4:11 PM Han Zhou <hzhou@ovn.org> wrote: > > > > On Tue, Nov 8, 2022 at 7:51 AM venu.iyer <venugopali@nvidia.com> wrote: > > > > > > Currently, even stateless flows are subject to connection tracking when > > there are > > > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then > > it shouldn't > > > be configured as stateless. > > > > > > A stateless flow means we should not track it, and this change exempts > > stateless > > > flows from being tracked regardless of whether LB rules are present or > > not. > > > > > > Signed-off-by: venu.iyer <venugopali@nvidia.com> > > > Acked-by: Han Zhou <hzhou@ovn.org> I had a quick look and it looks ok to me. Can you please add system tests to cover some scenarios for this use case to avoid future regressions ? Thanks Numan > > > --- > > > northd/northd.c | 24 +++++++++++++----- > > > northd/ovn-northd.8.xml | 56 ++++++++++++++++++++++------------------- > > > ovn-nb.xml | 3 +++ > > > tests/ovn-northd.at | 48 ++++++++++++----------------------- > > > tests/ovn.at | 4 +-- > > > 5 files changed, 69 insertions(+), 66 deletions(-) > > > > > > diff --git a/northd/northd.c b/northd/northd.c > > > index b7388afc5..da4beede6 100644 > > > --- a/northd/northd.c > > > +++ b/northd/northd.c > > > @@ -137,8 +137,8 @@ enum ovn_stage { > > > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") > > \ > > > > > \ > > > /* Logical switch egress stages. */ > > \ > > > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") > > \ > > > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") > > \ > > > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") > > \ > > > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") > > \ > > > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") > > \ > > > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") > > \ > > > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") > > \ > > > @@ -210,6 +210,7 @@ enum ovn_stage { > > > #define REGBIT_ACL_LABEL "reg0[13]" > > > #define REGBIT_FROM_RAMP "reg0[14]" > > > #define REGBIT_PORT_SEC_DROP "reg0[15]" > > > +#define REGBIT_ACL_STATELESS "reg0[16]" > > > > > > #define REG_ORIG_DIP_IPV4 "reg1" > > > #define REG_ORIG_DIP_IPV6 "xxreg1" > > > @@ -271,7 +272,7 @@ enum ovn_stage { > > > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | > > | > > > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | > > | > > > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | > > | > > > - * | | REGBIT_ACL_LABEL | X | > > | > > > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | > > | > > > * +----+----------------------------------------------+ X | > > | > > > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | > > | > > > * +----+----------------------------------------------+ E | > > | > > > @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, > > > const struct nbrec_acl *acl, > > > struct hmap *lflows) > > > { > > > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > > > if (!strcmp(acl->direction, "from-lport")) { > > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > > > acl->priority + OVN_ACL_PRI_OFFSET, > > > acl->match, > > > - "next;", > > > + action, > > > &acl->header_); > > > } else { > > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > > > acl->priority + OVN_ACL_PRI_OFFSET, > > > acl->match, > > > - "next;", > > > + action, > > > &acl->header_); > > > } > > > } > > > @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const > > struct hmap *port_groups, > > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > > + } else if (od->has_lb_vip) { > > > + /* We'll build stateless filters if there are LB rules so that > > > + * the stateless flows are not tracked in pre-lb. */ > > > + build_stateless_filters(od, port_groups, lflows); > > > } > > > } > > > > > > @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct > > shash *meter_groups, > > > S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, > > > 110, lflows); > > > } > > > + /* Do not sent statless flows via conntrack */ > > > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > > > > > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > > > * packet to conntrack for defragmentation and possibly for > > unNATting. > > > @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, > > struct ovn_northd_lb *lb, > > > } > > > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : > > "ct_lb"); > > > > > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > > > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > > > + ip_match, lb_vip->vip_str); > > > if (lb_vip->vip_port) { > > > ds_put_format(match, " && %s.dst == %d", proto, > > lb_vip->vip_port); > > > } > > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > > > index a70f2e678..162ec2b3b 100644 > > > --- a/northd/ovn-northd.8.xml > > > +++ b/northd/ovn-northd.8.xml > > > @@ -440,7 +440,9 @@ > > > priority-110 flow is added to skip over stateful ACLs. Multicast, > > IPv6 > > > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > > > "allow-stateless" ACLs, a flow is added to bypass setting the hint > > for > > > - connection tracker processing. > > > + connection tracker processing when there are statelful ACLs or LB > > rules; > > > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such > > > + flows for this purpose. > > > </p> > > > > > > <p> > > > @@ -460,8 +462,10 @@ > > > in ingress table <code>LB</code> and <code>Stateful</code>. It > > contains > > > a priority-0 flow that simply moves traffic to the next table. > > Moreover > > > it contains two priority-110 flows to move multicast, IPv6 Neighbor > > > - Discovery and MLD traffic to the next table. If load balancing > > rules with > > > - virtual IP addresses (and ports) are configured in > > > + Discovery and MLD traffic to the next table. It also contains two > > > + priority-110 flows to move stateless traffic, i.e traffic for which > > > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If > > load > > > + balancing rules with virtual IP addresses (and ports) are > > configured in > > > <code>OVN_Northbound</code> database for a logical switch > > datapath, a > > > priority-100 flow is added with the match <code>ip</code> to match > > on IP > > > packets and sets the action <code>reg0[2] = 1; next;</code> to act > > as a > > > @@ -1859,19 +1863,11 @@ output; > > > </li> > > > </ul> > > > > > > - <h3>Egress Table 0: Pre-LB</h3> > > > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > > > > > <p> > > > - This table is similar to ingress table <code>Pre-LB</code>. It > > > - contains a priority-0 flow that simply moves traffic to the next > > table. > > > - Moreover it contains two priority-110 flows to move multicast, IPv6 > > > - Neighbor Discovery and MLD traffic to the next table. If any load > > > - balancing rules exist for the datapath, a priority-100 flow is > > added with > > > - a match of <code>ip</code> and action of <code>reg0[2] = 1; > > next;</code> > > > - to act as a hint for table <code>Pre-stateful</code> to send IP > > packets > > > - to the connection tracker for packet de-fragmentation and possibly > > DNAT > > > - the destination VIP to one of the selected backend for already > > committed > > > - load balanced traffic. > > > + This is similar to ingress table <code>Pre-ACLs</code> except for > > > + <code>to-lport</code> traffic. > > > </p> > > > > > > <p> > > > @@ -1884,11 +1880,28 @@ output; > > > db="OVN_Northbound"/> table. > > > </p> > > > > > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > > > + <p> > > > + This table also has a priority-110 flow with the match > > > + <code>outport == <var>I</var></code> for all logical switch > > > + datapaths to move traffic to the next table. Where <var>I</var> > > > + is the peer of a logical router port. This flow is added to > > > + skip the connection tracking of packets which will be entering > > > + logical router datapath from logical switch datapath for routing. > > > + </p> > > > + > > > + <h3>Egress Table 1: Pre-LB</h3> > > > > > > <p> > > > - This is similar to ingress table <code>Pre-ACLs</code> except for > > > - <code>to-lport</code> traffic. > > > + This table is similar to ingress table <code>Pre-LB</code>. It > > > + contains a priority-0 flow that simply moves traffic to the next > > table. > > > + Moreover it contains two priority-110 flows to move multicast, IPv6 > > > + Neighbor Discovery and MLD traffic to the next table. If any load > > > + balancing rules exist for the datapath, a priority-100 flow is > > added with > > > + a match of <code>ip</code> and action of <code>reg0[2] = 1; > > next;</code> > > > + to act as a hint for table <code>Pre-stateful</code> to send IP > > packets > > > + to the connection tracker for packet de-fragmentation and possibly > > DNAT > > > + the destination VIP to one of the selected backend for already > > committed > > > + load balanced traffic. > > > </p> > > > > > > <p> > > > @@ -1901,15 +1914,6 @@ output; > > > db="OVN_Northbound"/> table. > > > </p> > > > > > > - <p> > > > - This table also has a priority-110 flow with the match > > > - <code>outport == <var>I</var></code> for all logical switch > > > - datapaths to move traffic to the next table. Where <var>I</var> > > > - is the peer of a logical router port. This flow is added to > > > - skip the connection tracking of packets which will be entering > > > - logical router datapath from logical switch datapath for routing. > > > - </p> > > > - > > > <h3>Egress Table 2: Pre-stateful</h3> > > > > > > <p> > > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > > index f41e9d7c0..140dd9a4f 100644 > > > --- a/ovn-nb.xml > > > +++ b/ovn-nb.xml > > > @@ -2063,6 +2063,9 @@ > > > outgoing TCP traffic directed to an IP address, then you > > probably > > > also want to define another rule to allow incoming TCP traffic > > coming > > > from this same IP address. > > > + In addition, traffic that matches stateless ACLs will bypass > > > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > > > + used instead if the traffic is supposed to be load-balanced. > > > </li> > > > > > > <li> > > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > > index 4f399eccb..85d5acfed 100644 > > > --- a/tests/ovn-northd.at > > > +++ b/tests/ovn-northd.at > > > @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl ls-lb-add sw0 lb2 > > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl clear load_balancer $lb1 vips > > > check ovn-nbctl clear load_balancer $lb3 vips > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl clear load_balancer $lb2 vips > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl clear load_balancer $lb4 vips > > > @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 > > vips:"10.0.0.13"="10.0.0.6" > > > > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > # Now reverse the order of clearing the vip. > > > @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > > > check ovn-nbctl clear load_balancer $lb4 vips > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl clear load_balancer $lb1 vips > > > check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > grep reg0 | sort], [0], [dnl > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > action=(reg0[[2]] = 1; next;) > > > ]) > > > > > > check ovn-nbctl clear load_balancer $lb3 vips > > > @@ -3044,18 +3044,10 @@ for direction in from to; do > > > done > > > ovn-nbctl --wait=sb sync > > > > > > -# TCP packets should go to conntrack for load balancing. > > > +# TCP packets should not go to conntrack for load balancing. > > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > > [0], [dnl > > > -ct_lb_mark { > > > - ct_lb_mark { > > > - reg0[[6]] = 0; > > > - reg0[[12]] = 0; > > > - ct_lb_mark /* default (use --ct to customize) */ { > > > - output("lsp2"); > > > - }; > > > - }; > > > -}; > > > +output("lsp2"); > > > ]) > > > > > > # UDP packets still go to conntrack. > > > @@ -3188,18 +3180,10 @@ for direction in from to; do > > > done > > > ovn-nbctl --wait=sb sync > > > > > > -# TCP packets should go to conntrack for load balancing. > > > +# TCP packets should not go to conntrack for load balancing. > > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > > [0], [dnl > > > -ct_lb_mark { > > > - ct_lb_mark { > > > - reg0[[6]] = 0; > > > - reg0[[12]] = 0; > > > - ct_lb_mark /* default (use --ct to customize) */ { > > > - output("lsp2"); > > > - }; > > > - }; > > > -}; > > > +output("lsp2"); > > > ]) > > > > > > # UDP packets still go to conntrack. > > > @@ -4015,8 +3999,8 @@ check_stateful_flows() { > > > table=? (ls_in_pre_stateful ), priority=0 , match=(1), > > action=(next;) > > > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), > > action=(ct_next;) > > > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb_mark;) > > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > > ct_lb_mark;) > > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; > > ct_lb_mark;) > > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > reg2[[0..15]] = 80; ct_lb_mark;) > > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; > > reg2[[0..15]] = 80; ct_lb_mark;) > > > ]) > > > > > > AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed > > 's/table=../table=??/'], [0], [dnl > > > @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb_mark;) > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > > ct_lb_mark(backends=42.42.42.2);) > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb_mark;) > > > @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb;) > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb;) > > > @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb_mark;) > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > > ct_lb_mark(backends=42.42.42.2);) > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > action=(ct_lb_mark;) > > > diff --git a/tests/ovn.at b/tests/ovn.at > > > index f8b8db4df..f43455f60 100644 > > > --- a/tests/ovn.at > > > +++ b/tests/ovn.at > > > @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( > > > [ovn-sbctl dump-flows > sbflows > > > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed > > 's/table=..//'], 0, > > > [dnl > > > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && > > tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > > > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && > > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > reg2[[0..15]] = 80; ct_lb_mark;) > > > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == > > 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= > > 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > > > ]) > > > > > > @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > > > AT_CHECK( > > > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep > > priority=120 |\ > > > sed 's/table=../table=??/'], [0], [dnl > > > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > > ct_lb_mark;) > > > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > reg2[[0..15]] = 80; ct_lb_mark;) > > > table=??(ls_in_lb ), priority=120 , match=(ct.new && > > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > > > ]) > > > > > > -- > > > 2.17.1 > > > > > > > Thanks Venu. I have reviewed this with Venu internally and gave my Ack, but > > it would be good if folks can take another look before merging. > > Numan, since you were involved in an earlier discussion, probably you want > > to take a look? > > Sure. I'll take a look and thanks for the comments below. > > Numan > > > I think there are two things to pay attention to: > > 1. Switching the order of PRE_ACL and PRE_LB in the LS egress pipeline. I > > don't see any problem here but not sure if there was any specific reason > > behind the original order. > > 2. If there are VIP traffic matching stateless ACL, they won't work any > > more. We think that is ok, because we can't think of a use case that would > > add stateless ACLs for VIP traffic because this combination doesn't make > > any sense. If such traffic was allowed with CT, it could actually hide a > > mistake made by the user - e.g. the intention was to apply stateless ACLs > > to bypass CT for certain traffic that was not related to VIP, but by > > mistake the match condition added to the ACL touched some VIP traffic. So, > > we think the right thing to do is to let it fail when such > > *misconfiguration* happens. > > > > Let me know if you have any concerns about the above points. > > > > Thanks, > > Han > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >
Thanks for looking at this, Numan. I haven't worked with the system tests before, and am having some issues getting it to run. So, let me get back to you on this. regards, -venu
On Wed, Nov 16, 2022 at 3:05 PM Venugopal Iyer via dev <ovs-dev@openvswitch.org> wrote: > > Thanks for looking at this, Numan. I haven't worked with the system tests before, and am having some > issues getting it to run. So, let me get back to you on this. > Sure. Please make sure that the test case checks that there is no conntrack entry for the stateless traffic. Existing system tests should give you a good reference. Let me know if you have any questions. Numan > regards, > > -venu > > ________________________________________ > From: Numan Siddique <numans@ovn.org> > Sent: Monday, November 14, 2022 9:17 AM > To: Han Zhou > Cc: Venugopal Iyer; dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH ovn] northd: bypass connection tracking for stateless flows when there are LB flows present > > External email: Use caution opening links or attachments > > > On Wed, Nov 9, 2022 at 4:15 PM Numan Siddique <numans@ovn.org> wrote: > > > > On Wed, Nov 9, 2022 at 4:11 PM Han Zhou <hzhou@ovn.org> wrote: > > > > > > On Tue, Nov 8, 2022 at 7:51 AM venu.iyer <venugopali@nvidia.com> wrote: > > > > > > > > Currently, even stateless flows are subject to connection tracking when > > > there are > > > > LB rules (for DNAT). However, if a flow needs to be subjected to LB, then > > > it shouldn't > > > > be configured as stateless. > > > > > > > > A stateless flow means we should not track it, and this change exempts > > > stateless > > > > flows from being tracked regardless of whether LB rules are present or > > > not. > > > > > > > > Signed-off-by: venu.iyer <venugopali@nvidia.com> > > > > Acked-by: Han Zhou <hzhou@ovn.org> > > I had a quick look and it looks ok to me. > Can you please add system tests to cover some scenarios for this use > case to avoid future regressions ? > > Thanks > Numan > > > > > > > > --- > > > > northd/northd.c | 24 +++++++++++++----- > > > > northd/ovn-northd.8.xml | 56 ++++++++++++++++++++++------------------- > > > > ovn-nb.xml | 3 +++ > > > > tests/ovn-northd.at | 48 ++++++++++++----------------------- > > > > tests/ovn.at | 4 +-- > > > > 5 files changed, 69 insertions(+), 66 deletions(-) > > > > > > > > diff --git a/northd/northd.c b/northd/northd.c > > > > index b7388afc5..da4beede6 100644 > > > > --- a/northd/northd.c > > > > +++ b/northd/northd.c > > > > @@ -137,8 +137,8 @@ enum ovn_stage { > > > > PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") > > > \ > > > > > > > \ > > > > /* Logical switch egress stages. */ > > > \ > > > > - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") > > > \ > > > > - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") > > > \ > > > > + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") > > > \ > > > > + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") > > > \ > > > > PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") > > > \ > > > > PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") > > > \ > > > > PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") > > > \ > > > > @@ -210,6 +210,7 @@ enum ovn_stage { > > > > #define REGBIT_ACL_LABEL "reg0[13]" > > > > #define REGBIT_FROM_RAMP "reg0[14]" > > > > #define REGBIT_PORT_SEC_DROP "reg0[15]" > > > > +#define REGBIT_ACL_STATELESS "reg0[16]" > > > > > > > > #define REG_ORIG_DIP_IPV4 "reg1" > > > > #define REG_ORIG_DIP_IPV6 "xxreg1" > > > > @@ -271,7 +272,7 @@ enum ovn_stage { > > > > * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | > > > | > > > > * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | > > > | > > > > * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | > > > | > > > > - * | | REGBIT_ACL_LABEL | X | > > > | > > > > + * | | REGBIT_ACL_{LABEL/STATELESS} | X | > > > | > > > > * +----+----------------------------------------------+ X | > > > | > > > > * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | > > > | > > > > * +----+----------------------------------------------+ E | > > > | > > > > @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, > > > > const struct nbrec_acl *acl, > > > > struct hmap *lflows) > > > > { > > > > + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; > > > > if (!strcmp(acl->direction, "from-lport")) { > > > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, > > > > acl->priority + OVN_ACL_PRI_OFFSET, > > > > acl->match, > > > > - "next;", > > > > + action, > > > > &acl->header_); > > > > } else { > > > > ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, > > > > acl->priority + OVN_ACL_PRI_OFFSET, > > > > acl->match, > > > > - "next;", > > > > + action, > > > > &acl->header_); > > > > } > > > > } > > > > @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const > > > struct hmap *port_groups, > > > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > > > ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", > > > > REGBIT_CONNTRACK_DEFRAG" = 1; next;"); > > > > + } else if (od->has_lb_vip) { > > > > + /* We'll build stateless filters if there are LB rules so that > > > > + * the stateless flows are not tracked in pre-lb. */ > > > > + build_stateless_filters(od, port_groups, lflows); > > > > } > > > > } > > > > > > > > @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct > > > shash *meter_groups, > > > > S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, > > > > 110, lflows); > > > > } > > > > + /* Do not sent statless flows via conntrack */ > > > > + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, > > > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > > > + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, > > > > + REGBIT_ACL_STATELESS" == 1", "next;"); > > > > > > > > /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send > > > > * packet to conntrack for defragmentation and possibly for > > > unNATting. > > > > @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, > > > struct ovn_northd_lb *lb, > > > > } > > > > ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : > > > "ct_lb"); > > > > > > > > - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); > > > > + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", > > > > + ip_match, lb_vip->vip_str); > > > > if (lb_vip->vip_port) { > > > > ds_put_format(match, " && %s.dst == %d", proto, > > > lb_vip->vip_port); > > > > } > > > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > > > > index a70f2e678..162ec2b3b 100644 > > > > --- a/northd/ovn-northd.8.xml > > > > +++ b/northd/ovn-northd.8.xml > > > > @@ -440,7 +440,9 @@ > > > > priority-110 flow is added to skip over stateful ACLs. Multicast, > > > IPv6 > > > > Neighbor Discovery and MLD traffic also skips stateful ACLs. For > > > > "allow-stateless" ACLs, a flow is added to bypass setting the hint > > > for > > > > - connection tracker processing. > > > > + connection tracker processing when there are statelful ACLs or LB > > > rules; > > > > + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such > > > > + flows for this purpose. > > > > </p> > > > > > > > > <p> > > > > @@ -460,8 +462,10 @@ > > > > in ingress table <code>LB</code> and <code>Stateful</code>. It > > > contains > > > > a priority-0 flow that simply moves traffic to the next table. > > > Moreover > > > > it contains two priority-110 flows to move multicast, IPv6 Neighbor > > > > - Discovery and MLD traffic to the next table. If load balancing > > > rules with > > > > - virtual IP addresses (and ports) are configured in > > > > + Discovery and MLD traffic to the next table. It also contains two > > > > + priority-110 flows to move stateless traffic, i.e traffic for which > > > > + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If > > > load > > > > + balancing rules with virtual IP addresses (and ports) are > > > configured in > > > > <code>OVN_Northbound</code> database for a logical switch > > > datapath, a > > > > priority-100 flow is added with the match <code>ip</code> to match > > > on IP > > > > packets and sets the action <code>reg0[2] = 1; next;</code> to act > > > as a > > > > @@ -1859,19 +1863,11 @@ output; > > > > </li> > > > > </ul> > > > > > > > > - <h3>Egress Table 0: Pre-LB</h3> > > > > + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> > > > > > > > > <p> > > > > - This table is similar to ingress table <code>Pre-LB</code>. It > > > > - contains a priority-0 flow that simply moves traffic to the next > > > table. > > > > - Moreover it contains two priority-110 flows to move multicast, IPv6 > > > > - Neighbor Discovery and MLD traffic to the next table. If any load > > > > - balancing rules exist for the datapath, a priority-100 flow is > > > added with > > > > - a match of <code>ip</code> and action of <code>reg0[2] = 1; > > > next;</code> > > > > - to act as a hint for table <code>Pre-stateful</code> to send IP > > > packets > > > > - to the connection tracker for packet de-fragmentation and possibly > > > DNAT > > > > - the destination VIP to one of the selected backend for already > > > committed > > > > - load balanced traffic. > > > > + This is similar to ingress table <code>Pre-ACLs</code> except for > > > > + <code>to-lport</code> traffic. > > > > </p> > > > > > > > > <p> > > > > @@ -1884,11 +1880,28 @@ output; > > > > db="OVN_Northbound"/> table. > > > > </p> > > > > > > > > - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> > > > > + <p> > > > > + This table also has a priority-110 flow with the match > > > > + <code>outport == <var>I</var></code> for all logical switch > > > > + datapaths to move traffic to the next table. Where <var>I</var> > > > > + is the peer of a logical router port. This flow is added to > > > > + skip the connection tracking of packets which will be entering > > > > + logical router datapath from logical switch datapath for routing. > > > > + </p> > > > > + > > > > + <h3>Egress Table 1: Pre-LB</h3> > > > > > > > > <p> > > > > - This is similar to ingress table <code>Pre-ACLs</code> except for > > > > - <code>to-lport</code> traffic. > > > > + This table is similar to ingress table <code>Pre-LB</code>. It > > > > + contains a priority-0 flow that simply moves traffic to the next > > > table. > > > > + Moreover it contains two priority-110 flows to move multicast, IPv6 > > > > + Neighbor Discovery and MLD traffic to the next table. If any load > > > > + balancing rules exist for the datapath, a priority-100 flow is > > > added with > > > > + a match of <code>ip</code> and action of <code>reg0[2] = 1; > > > next;</code> > > > > + to act as a hint for table <code>Pre-stateful</code> to send IP > > > packets > > > > + to the connection tracker for packet de-fragmentation and possibly > > > DNAT > > > > + the destination VIP to one of the selected backend for already > > > committed > > > > + load balanced traffic. > > > > </p> > > > > > > > > <p> > > > > @@ -1901,15 +1914,6 @@ output; > > > > db="OVN_Northbound"/> table. > > > > </p> > > > > > > > > - <p> > > > > - This table also has a priority-110 flow with the match > > > > - <code>outport == <var>I</var></code> for all logical switch > > > > - datapaths to move traffic to the next table. Where <var>I</var> > > > > - is the peer of a logical router port. This flow is added to > > > > - skip the connection tracking of packets which will be entering > > > > - logical router datapath from logical switch datapath for routing. > > > > - </p> > > > > - > > > > <h3>Egress Table 2: Pre-stateful</h3> > > > > > > > > <p> > > > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > > > index f41e9d7c0..140dd9a4f 100644 > > > > --- a/ovn-nb.xml > > > > +++ b/ovn-nb.xml > > > > @@ -2063,6 +2063,9 @@ > > > > outgoing TCP traffic directed to an IP address, then you > > > probably > > > > also want to define another rule to allow incoming TCP traffic > > > coming > > > > from this same IP address. > > > > + In addition, traffic that matches stateless ACLs will bypass > > > > + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be > > > > + used instead if the traffic is supposed to be load-balanced. > > > > </li> > > > > > > > > <li> > > > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > > > index 4f399eccb..85d5acfed 100644 > > > > --- a/tests/ovn-northd.at > > > > +++ b/tests/ovn-northd.at > > > > @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 > > > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl ls-lb-add sw0 lb2 > > > > check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl clear load_balancer $lb1 vips > > > > check ovn-nbctl clear load_balancer $lb3 vips > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl clear load_balancer $lb2 vips > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl clear load_balancer $lb4 vips > > > > @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 > > > vips:"10.0.0.13"="10.0.0.6" > > > > > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > # Now reverse the order of clearing the vip. > > > > @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips > > > > check ovn-nbctl clear load_balancer $lb4 vips > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl clear load_balancer $lb1 vips > > > > check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | > > > grep reg0 | sort], [0], [dnl > > > > - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), > > > action=(reg0[[2]] = 1; next;) > > > > ]) > > > > > > > > check ovn-nbctl clear load_balancer $lb3 vips > > > > @@ -3044,18 +3044,10 @@ for direction in from to; do > > > > done > > > > ovn-nbctl --wait=sb sync > > > > > > > > -# TCP packets should go to conntrack for load balancing. > > > > +# TCP packets should not go to conntrack for load balancing. > > > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > > > [0], [dnl > > > > -ct_lb_mark { > > > > - ct_lb_mark { > > > > - reg0[[6]] = 0; > > > > - reg0[[12]] = 0; > > > > - ct_lb_mark /* default (use --ct to customize) */ { > > > > - output("lsp2"); > > > > - }; > > > > - }; > > > > -}; > > > > +output("lsp2"); > > > > ]) > > > > > > > > # UDP packets still go to conntrack. > > > > @@ -3188,18 +3180,10 @@ for direction in from to; do > > > > done > > > > ovn-nbctl --wait=sb sync > > > > > > > > -# TCP packets should go to conntrack for load balancing. > > > > +# TCP packets should not go to conntrack for load balancing. > > > > flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" > > > > AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], > > > [0], [dnl > > > > -ct_lb_mark { > > > > - ct_lb_mark { > > > > - reg0[[6]] = 0; > > > > - reg0[[12]] = 0; > > > > - ct_lb_mark /* default (use --ct to customize) */ { > > > > - output("lsp2"); > > > > - }; > > > > - }; > > > > -}; > > > > +output("lsp2"); > > > > ]) > > > > > > > > # UDP packets still go to conntrack. > > > > @@ -4015,8 +3999,8 @@ check_stateful_flows() { > > > > table=? (ls_in_pre_stateful ), priority=0 , match=(1), > > > action=(next;) > > > > table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), > > > action=(ct_next;) > > > > table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb_mark;) > > > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > > > ct_lb_mark;) > > > > - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; > > > ct_lb_mark;) > > > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > > reg2[[0..15]] = 80; ct_lb_mark;) > > > > + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; > > > reg2[[0..15]] = 80; ct_lb_mark;) > > > > ]) > > > > > > > > AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed > > > 's/table=../table=??/'], [0], [dnl > > > > @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb_mark;) > > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > > > ct_lb_mark(backends=42.42.42.2);) > > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb_mark;) > > > > @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > > reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > > reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) > > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) > > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb;) > > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) > > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb;) > > > > @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync > > > > AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && > > > reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) > > > > table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && > > > reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) > > > > - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > > + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) > > > > table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb_mark;) > > > > table=11(ls_in_lb ), priority=110 , match=(ct.new && > > > ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; > > > ct_lb_mark(backends=42.42.42.2);) > > > > table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), > > > action=(ct_lb_mark;) > > > > diff --git a/tests/ovn.at b/tests/ovn.at > > > > index f8b8db4df..f43455f60 100644 > > > > --- a/tests/ovn.at > > > > +++ b/tests/ovn.at > > > > @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( > > > > [ovn-sbctl dump-flows > sbflows > > > > ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed > > > 's/table=..//'], 0, > > > > [dnl > > > > - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && > > > tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) > > > > + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && > > > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > > reg2[[0..15]] = 80; ct_lb_mark;) > > > > (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == > > > 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends= > > > 10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) > > > > ]) > > > > > > > > @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 > > > > AT_CHECK( > > > > [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep > > > priority=120 |\ > > > > sed 's/table=../table=??/'], [0], [dnl > > > > - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == > > > 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; > > > ct_lb_mark;) > > > > + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 > > > && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; > > > reg2[[0..15]] = 80; ct_lb_mark;) > > > > table=??(ls_in_lb ), priority=120 , match=(ct.new && > > > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) > > > > ]) > > > > > > > > -- > > > > 2.17.1 > > > > > > > > > > Thanks Venu. I have reviewed this with Venu internally and gave my Ack, but > > > it would be good if folks can take another look before merging. > > > Numan, since you were involved in an earlier discussion, probably you want > > > to take a look? > > > > Sure. I'll take a look and thanks for the comments below. > > > > Numan > > > > > I think there are two things to pay attention to: > > > 1. Switching the order of PRE_ACL and PRE_LB in the LS egress pipeline. I > > > don't see any problem here but not sure if there was any specific reason > > > behind the original order. > > > 2. If there are VIP traffic matching stateless ACL, they won't work any > > > more. We think that is ok, because we can't think of a use case that would > > > add stateless ACLs for VIP traffic because this combination doesn't make > > > any sense. If such traffic was allowed with CT, it could actually hide a > > > mistake made by the user - e.g. the intention was to apply stateless ACLs > > > to bypass CT for certain traffic that was not related to VIP, but by > > > mistake the match condition added to the ACL touched some VIP traffic. So, > > > we think the right thing to do is to let it fail when such > > > *misconfiguration* happens. > > > > > > Let me know if you have any concerns about the above points. > > > > > > Thanks, > > > Han > > > _______________________________________________ > > > dev mailing list > > > dev@openvswitch.org > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=05%7C01%7Cvenugopali%40nvidia.com%7Cb334b9e522434495b9bb08dac6642a5c%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C638040430761722189%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=IqTdVyCGJV5deuorgE0VwTpJGtjqbfuwV5TuRXOIAQI%3D&reserved=0 > > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/northd/northd.c b/northd/northd.c index b7388afc5..da4beede6 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -137,8 +137,8 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") \ \ /* Logical switch egress stages. */ \ - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ @@ -210,6 +210,7 @@ enum ovn_stage { #define REGBIT_ACL_LABEL "reg0[13]" #define REGBIT_FROM_RAMP "reg0[14]" #define REGBIT_PORT_SEC_DROP "reg0[15]" +#define REGBIT_ACL_STATELESS "reg0[16]" #define REG_ORIG_DIP_IPV4 "reg1" #define REG_ORIG_DIP_IPV6 "xxreg1" @@ -271,7 +272,7 @@ enum ovn_stage { * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | - * | | REGBIT_ACL_LABEL | X | | + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | * +----+----------------------------------------------+ X | | * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | * +----+----------------------------------------------+ E | | @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, const struct nbrec_acl *acl, struct hmap *lflows) { + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; if (!strcmp(acl->direction, "from-lport")) { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } else { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } } @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); + } else if (od->has_lb_vip) { + /* We'll build stateless filters if there are LB rules so that + * the stateless flows are not tracked in pre-lb. */ + build_stateless_filters(od, port_groups, lflows); } } @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, 110, lflows); } + /* Do not sent statless flows via conntrack */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send * packet to conntrack for defragmentation and possibly for unNATting. @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, } ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", + ip_match, lb_vip->vip_str); if (lb_vip->vip_port) { ds_put_format(match, " && %s.dst == %d", proto, lb_vip->vip_port); } diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index a70f2e678..162ec2b3b 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -440,7 +440,9 @@ priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 Neighbor Discovery and MLD traffic also skips stateful ACLs. For "allow-stateless" ACLs, a flow is added to bypass setting the hint for - connection tracker processing. + connection tracker processing when there are statelful ACLs or LB rules; + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such + flows for this purpose. </p> <p> @@ -460,8 +462,10 @@ in ingress table <code>LB</code> and <code>Stateful</code>. It contains a priority-0 flow that simply moves traffic to the next table. Moreover it contains two priority-110 flows to move multicast, IPv6 Neighbor - Discovery and MLD traffic to the next table. If load balancing rules with - virtual IP addresses (and ports) are configured in + Discovery and MLD traffic to the next table. It also contains two + priority-110 flows to move stateless traffic, i.e traffic for which + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load + balancing rules with virtual IP addresses (and ports) are configured in <code>OVN_Northbound</code> database for a logical switch datapath, a priority-100 flow is added with the match <code>ip</code> to match on IP packets and sets the action <code>reg0[2] = 1; next;</code> to act as a @@ -1859,19 +1863,11 @@ output; </li> </ul> - <h3>Egress Table 0: Pre-LB</h3> + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> <p> - This table is similar to ingress table <code>Pre-LB</code>. It - contains a priority-0 flow that simply moves traffic to the next table. - Moreover it contains two priority-110 flows to move multicast, IPv6 - Neighbor Discovery and MLD traffic to the next table. If any load - balancing rules exist for the datapath, a priority-100 flow is added with - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> - to act as a hint for table <code>Pre-stateful</code> to send IP packets - to the connection tracker for packet de-fragmentation and possibly DNAT - the destination VIP to one of the selected backend for already committed - load balanced traffic. + This is similar to ingress table <code>Pre-ACLs</code> except for + <code>to-lport</code> traffic. </p> <p> @@ -1884,11 +1880,28 @@ output; db="OVN_Northbound"/> table. </p> - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> + <p> + This table also has a priority-110 flow with the match + <code>outport == <var>I</var></code> for all logical switch + datapaths to move traffic to the next table. Where <var>I</var> + is the peer of a logical router port. This flow is added to + skip the connection tracking of packets which will be entering + logical router datapath from logical switch datapath for routing. + </p> + + <h3>Egress Table 1: Pre-LB</h3> <p> - This is similar to ingress table <code>Pre-ACLs</code> except for - <code>to-lport</code> traffic. + This table is similar to ingress table <code>Pre-LB</code>. It + contains a priority-0 flow that simply moves traffic to the next table. + Moreover it contains two priority-110 flows to move multicast, IPv6 + Neighbor Discovery and MLD traffic to the next table. If any load + balancing rules exist for the datapath, a priority-100 flow is added with + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> + to act as a hint for table <code>Pre-stateful</code> to send IP packets + to the connection tracker for packet de-fragmentation and possibly DNAT + the destination VIP to one of the selected backend for already committed + load balanced traffic. </p> <p> @@ -1901,15 +1914,6 @@ output; db="OVN_Northbound"/> table. </p> - <p> - This table also has a priority-110 flow with the match - <code>outport == <var>I</var></code> for all logical switch - datapaths to move traffic to the next table. Where <var>I</var> - is the peer of a logical router port. This flow is added to - skip the connection tracking of packets which will be entering - logical router datapath from logical switch datapath for routing. - </p> - <h3>Egress Table 2: Pre-stateful</h3> <p> diff --git a/ovn-nb.xml b/ovn-nb.xml index f41e9d7c0..140dd9a4f 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2063,6 +2063,9 @@ outgoing TCP traffic directed to an IP address, then you probably also want to define another rule to allow incoming TCP traffic coming from this same IP address. + In addition, traffic that matches stateless ACLs will bypass + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be + used instead if the traffic is supposed to be load-balanced. </li> <li> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 4f399eccb..85d5acfed 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl ls-lb-add sw0 lb2 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl clear load_balancer $lb3 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb4 vips @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) # Now reverse the order of clearing the vip. @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl clear load_balancer $lb4 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb3 vips @@ -3044,18 +3044,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -3188,18 +3180,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -4015,8 +3999,8 @@ check_stateful_flows() { table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) ]) AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) diff --git a/tests/ovn.at b/tests/ovn.at index f8b8db4df..f43455f60 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( [ovn-sbctl dump-flows > sbflows ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, [dnl - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) ]) @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 AT_CHECK( [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ sed 's/table=../table=??/'], [0], [dnl - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) ])