diff mbox series

[ovs-dev,ovn,1/2] ovn-northd: Don't send the pkt to conntrack if it is to be routed in egress stage.

Message ID 20200722192130.8984-1-numans@ovn.org
State Superseded
Headers show
Series [ovs-dev,ovn,1/2] ovn-northd: Don't send the pkt to conntrack if it is to be routed in egress stage. | expand

Commit Message

Numan Siddique July 22, 2020, 7:21 p.m. UTC
From: Numan Siddique <numans@ovn.org>

If there is a logical port 'P1' with the IP - 10.0.0.3 and a logical port 'P2' with
the IP 20.0.0.3 and if the logical switch of 'P1' has atleast one load balancer
associated with it and atleast one ACL with allow-related action associated with it.
Then for every packet from 'P1' to 'P2' after the TCP connection
is established we see a total of 4 recirculations in the datapath on the chassis
claiming 'P1'. This is because,

In the ingress logical switch pipeline, below logical flows are hit
  - table=9 (ls_in_lb           ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
  - table=10(ls_in_stateful     ), priority=100  , match=(reg0[2] == 1), action=(ct_lb;)

And in the egress logical switch pipeline, below logical flows are hit
 - table=0 (ls_out_pre_lb      ), priority=100  , match=(ip), action=(reg0[0] = 1; next;)
 - table=2 (ls_out_pre_stateful), priority=100  , match=(reg0[0] == 1), action=(ct_next;)
 - table=3 (ls_out_lb          ), priority=65535, match=(ct.est && !ct.rel && !ct.new && !ct.inv), action=(reg0[2] = 1; next;)
 - table=7 (ls_out_stateful    ), priority=100  , match=(reg0[2] == 1), action=(ct_lb;)

In the above example, when the packet enters the egress pipeline and since it needs to
enter the router pipeline, we can skip setting reg0[0] if outport is peer port of
logical router port. There is no need to send the packet to conntrack in this case.

This patch handles this case for router ports. Next patch in the series avoids sending to
conntrack with the action - ct_lb if the packet is not destined to the LB VIP.

With the present master for the above example, we see total of 4 recirculations on the
chassis claiming the lport 'P1'. With this patch we see only 2 recirculations.

Signed-off-by: Numan Siddique <numans@ovn.org>
---
 northd/ovn-northd.8.xml | 33 ++++++++++++++++++++++++++++++++-
 northd/ovn-northd.c     | 39 ++++++++++++++++++++++++++++++---------
 2 files changed, 62 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index eb2514f15c..3a34e0ad7f 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -366,6 +366,15 @@ 
       db="OVN_Northbound"/> table.
     </p>
 
+    <p>
+      This table also has a priority-110 flow with the match
+      <code>inport == <var>I</var></code> for all logical switch
+      datapaths to move traffic to the next table. Where <var>I</var>
+      is the peer of a logical router port. This flow is added to
+      skip the connection tracking of packets which enter from
+      logical router datapath to logical switch datapath.
+    </p>
+
     <h3>Ingress Table 5: Pre-stateful</h3>
 
     <p>
@@ -533,7 +542,20 @@ 
 
     <p>
       It contains a priority-0 flow that simply moves traffic to the next
-      table.  For established connections a priority 100 flow matches on
+      table.
+    </p>
+
+    <p>
+      A priority-65535 flow with the match
+      <code>inport == <var>I</var></code> for all logical switch
+      datapaths to move traffic to the next table. Where <var>I</var>
+      is the peer of a logical router port. This flow is added to
+      skip the connection tracking of packets which enter from
+      logical router datapath to logical switch datapath.
+    </p>
+
+    <p>
+      For established connections a priority 65534 flow matches on
       <code>ct.est &amp;&amp; !ct.rel &amp;&amp; !ct.new &amp;&amp;
       !ct.inv</code> and sets an action <code>reg0[2] = 1; next;</code> to act
       as a hint for table <code>Stateful</code> to send packets through
@@ -1359,6 +1381,15 @@  output;
       db="OVN_Northbound"/> table.
     </p>
 
+    <p>
+      This table also has a priority-110 flow with the match
+      <code>outport == <var>I</var></code> for all logical switch
+      datapaths to move traffic to the next table. Where <var>I</var>
+      is the peer of a logical router port. This flow is added to
+      skip the connection tracking of packets which will be entering
+      logical router datapath from logical switch datapath for routing.
+    </p>
+
     <h3>Egress Table 2: Pre-stateful</h3>
 
     <p>
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 192198272a..ac1da73342 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4850,8 +4850,9 @@  build_lswitch_output_port_sec(struct hmap *ports, struct hmap *datapaths,
 }
 
 static void
-build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
-                    struct hmap *lflows)
+skip_port_from_conntrack(struct ovn_datapath *od, struct ovn_port *op,
+                         enum ovn_stage in_stage, enum ovn_stage out_stage,
+                         uint16_t priority, struct hmap *lflows)
 {
     /* Can't use ct() for router ports. Consider the following configuration:
      * lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
@@ -4867,10 +4868,10 @@  build_pre_acl_flows(struct ovn_datapath *od, struct ovn_port *op,
 
     ds_put_format(&match_in, "ip && inport == %s", op->json_key);
     ds_put_format(&match_out, "ip && outport == %s", op->json_key);
-    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
+    ovn_lflow_add_with_hint(lflows, od, in_stage, priority,
                             ds_cstr(&match_in), "next;",
                             &op->nbsp->header_);
-    ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
+    ovn_lflow_add_with_hint(lflows, od, out_stage, priority,
                             ds_cstr(&match_out), "next;",
                             &op->nbsp->header_);
 
@@ -4903,10 +4904,14 @@  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
      * defragmentation, in order to match L4 headers. */
     if (has_stateful) {
         for (size_t i = 0; i < od->n_router_ports; i++) {
-            build_pre_acl_flows(od, od->router_ports[i], lflows);
+            skip_port_from_conntrack(od, od->router_ports[i],
+                                     S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+                                     110, lflows);
         }
         for (size_t i = 0; i < od->n_localnet_ports; i++) {
-            build_pre_acl_flows(od, od->localnet_ports[i], lflows);
+            skip_port_from_conntrack(od, od->localnet_ports[i],
+                                     S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+                                     110, lflows);
         }
 
         /* Ingress and Egress Pre-ACL Table (Priority 110).
@@ -5050,6 +5055,17 @@  build_pre_lb(struct ovn_datapath *od, struct hmap *lflows,
     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
 
+    for (size_t i = 0; i < od->n_router_ports; i++) {
+        skip_port_from_conntrack(od, od->router_ports[i],
+                                 S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
+                                 110, lflows);
+    }
+    for (size_t i = 0; i < od->n_localnet_ports; i++) {
+        skip_port_from_conntrack(od, od->localnet_ports[i],
+                                 S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
+                                 110, lflows);
+    }
+
     struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
     struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
     bool vip_configured = false;
@@ -5725,13 +5741,18 @@  build_lb(struct ovn_datapath *od, struct hmap *lflows)
     ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
 
     if (od->nbs->load_balancer) {
-        /* Ingress and Egress LB Table (Priority 65535).
+        for (size_t i = 0; i < od->n_router_ports; i++) {
+            skip_port_from_conntrack(od, od->router_ports[i],
+                                     S_SWITCH_IN_LB, S_SWITCH_OUT_LB,
+                                     UINT16_MAX, lflows);
+        }
+        /* Ingress and Egress LB Table (Priority 65534).
          *
          * Send established traffic through conntrack for just NAT. */
-        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX - 1,
                       "ct.est && !ct.rel && !ct.new && !ct.inv",
                       REGBIT_CONNTRACK_NAT" = 1; next;");
-        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX - 1,
                       "ct.est && !ct.rel && !ct.new && !ct.inv",
                       REGBIT_CONNTRACK_NAT" = 1; next;");
     }