diff mbox series

[ovs-dev,v3] northd: Fix logical router load-balancer nat rules when using DGP.

Message ID 20240723145032.58015-1-roberto.acosta@luizalabs.com
State Superseded
Headers show
Series [ovs-dev,v3] northd: Fix logical router load-balancer nat rules when using DGP. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Roberto Bartzen Acosta July 23, 2024, 2:50 p.m. UTC
This commit fixes the build_distr_lrouter_nat_flows_for_lb function to
include one NAT stateless flow entry for each DGP in use. Since we have
added support to create multiple gateway ports per logical router, it's
necessary to include in the LR nat rules pipeline a specific entry for each
attached DGP. Otherwise, the ingress traffic is only redirected when the
incoming LRP matches the chassis_resident field.

Considering that DNAT rules for DGPs were implemented with the need to
configure the DGP-related gateway-port column, the load-balancer NAT rule
configuration can use a similar idea. In this case, we don't know the LRP
responsible for the incoming traffic, and therefore we need to automatically
apply a stateless NAT rule to the load-balancer on all DGPs to allow inbound
traffic.

After applying this patch, the incoming and/or outgoing traffic can pass
through any chassis where the DGP resides without having problems with CT
state.

Reported-at: https://bugs.launchpad.net/ubuntu/+source/ovn/+bug/2054322
Fixes: 15348b7b806f ("ovn-northd: Multiple distributed gateway port support.")
Signed-off-by: Roberto Bartzen Acosta <roberto.acosta@luizalabs.com>
---
 northd/en-lr-stateful.c |  12 -----
 northd/northd.c         |  96 +++++++++++++++++++++++++---------
 tests/ovn-northd.at     | 111 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 182 insertions(+), 37 deletions(-)
diff mbox series

Patch

diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c
index baf1bd2f8..f09691af6 100644
--- a/northd/en-lr-stateful.c
+++ b/northd/en-lr-stateful.c
@@ -516,18 +516,6 @@  lr_stateful_record_create(struct lr_stateful_table *table,
 
     table->array[od->index] = lr_stateful_rec;
 
-    /* Load balancers are not supported (yet) if a logical router has multiple
-     * distributed gateway port.  Log a warning. */
-    if (lr_stateful_rec->has_lb_vip && lr_has_multiple_gw_ports(od)) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-        VLOG_WARN_RL(&rl, "Load-balancers are configured on logical "
-                     "router %s, which has %"PRIuSIZE" distributed "
-                     "gateway ports. Load-balancer is not supported "
-                     "yet when there is more than one distributed "
-                     "gateway port on the router.",
-                     od->nbr->name, od->n_l3dgw_ports);
-    }
-
     return lr_stateful_rec;
 }
 
diff --git a/northd/northd.c b/northd/northd.c
index 6898daa00..853d58f29 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -11026,31 +11026,30 @@  static void
 build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
                                      enum lrouter_nat_lb_flow_type type,
                                      struct ovn_datapath *od,
-                                     struct lflow_ref *lflow_ref)
+                                     struct lflow_ref *lflow_ref,
+                                     struct ovn_port *dgp)
 {
-    struct ovn_port *dgp = od->l3dgw_ports[0];
-
-    const char *undnat_action;
-
-    switch (type) {
-    case LROUTER_NAT_LB_FLOW_FORCE_SNAT:
-        undnat_action = "flags.force_snat_for_lb = 1; next;";
-        break;
-    case LROUTER_NAT_LB_FLOW_SKIP_SNAT:
-        undnat_action = "flags.skip_snat_for_lb = 1; next;";
-        break;
-    case LROUTER_NAT_LB_FLOW_NORMAL:
-    case LROUTER_NAT_LB_FLOW_MAX:
-        undnat_action = lrouter_use_common_zone(od)
-                        ? "ct_dnat_in_czone;"
-                        : "ct_dnat;";
-        break;
-    }
+    struct ds dnat_action = DS_EMPTY_INITIALIZER;
 
     /* Store the match lengths, so we can reuse the ds buffer. */
     size_t new_match_len = ctx->new_match->length;
     size_t undnat_match_len = ctx->undnat_match->length;
 
+    /* Create stateless LB NAT rules when using DGPs.
+     * dnat_action: Add the LB backend IPs as a destination action of the
+     *              lr_in_dnat NAT rule with cumulative effect because any
+     *              backend dst IP used in the action list will redirect the
+     *              packet to the ct_lb pipeline.
+     */
+    if (od->n_l3dgw_ports > 1) {
+        for (size_t i = 0; i < ctx->lb_vip->n_backends; i++) {
+            struct ovn_lb_backend *backend = &ctx->lb_vip->backends[i];
+            bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip);
+            ds_put_format(&dnat_action, "%s.dst=%s;", ipv6 ? "ip6" : "ip4",
+                          backend->ip_str);
+        }
+    }
+    ds_put_format(&dnat_action, "%s", ctx->new_action[type]);
 
     const char *meter = NULL;
 
@@ -11060,20 +11059,47 @@  build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
 
     if (ctx->lb_vip->n_backends || !ctx->lb_vip->empty_backend_rej) {
         ds_put_format(ctx->new_match, " && is_chassis_resident(%s)",
-                      od->l3dgw_ports[0]->cr_port->json_key);
+                      dgp->cr_port->json_key);
     }
 
     ovn_lflow_add_with_hint__(ctx->lflows, od, S_ROUTER_IN_DNAT, ctx->prio,
-                              ds_cstr(ctx->new_match), ctx->new_action[type],
+                              ds_cstr(ctx->new_match), ds_cstr(&dnat_action),
                               NULL, meter, &ctx->lb->nlb->header_,
                               lflow_ref);
 
     ds_truncate(ctx->new_match, new_match_len);
 
+    ds_destroy(&dnat_action);
     if (!ctx->lb_vip->n_backends) {
         return;
     }
 
+    struct ds undnat_action = DS_EMPTY_INITIALIZER;
+    struct ds snat_action = DS_EMPTY_INITIALIZER;
+
+    switch (type) {
+    case LROUTER_NAT_LB_FLOW_FORCE_SNAT:
+        ds_put_format(&undnat_action, "flags.force_snat_for_lb = 1; next;");
+        break;
+    case LROUTER_NAT_LB_FLOW_SKIP_SNAT:
+        ds_put_format(&undnat_action, "flags.skip_snat_for_lb = 1; next;");
+        break;
+    case LROUTER_NAT_LB_FLOW_NORMAL:
+    case LROUTER_NAT_LB_FLOW_MAX:
+        ds_put_format(&undnat_action, "%s",
+                      lrouter_use_common_zone(od) ? "ct_dnat_in_czone;"
+                      : "ct_dnat;");
+        break;
+    }
+
+    /* Create stateless LB NAT rules when using DGPs.
+     * undnat_action: Remove the ct action from the lr_out_undenat NAT rule.
+     */
+    if (od->n_l3dgw_ports > 1) {
+        ds_clear(&undnat_action);
+        ds_put_format(&undnat_action, "next;");
+    }
+
     /* We need to centralize the LB traffic to properly perform
      * the undnat stage.
      */
@@ -11093,10 +11119,27 @@  build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
                   " && is_chassis_resident(%s)", dgp->json_key, dgp->json_key,
                   dgp->cr_port->json_key);
     ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_OUT_UNDNAT, 120,
-                            ds_cstr(ctx->undnat_match), undnat_action,
-                            &ctx->lb->nlb->header_,
+                            ds_cstr(ctx->undnat_match),
+                            ds_cstr(&undnat_action), &ctx->lb->nlb->header_,
                             lflow_ref);
+
+    /* Create stateless LB NAT rules when using DGPs.
+     * snat_action: Add a new lr_out_snat rule with the LB VIP as source IP
+     *              action to perform the NAT stateless pipeline completely.
+     */
+    if (od->n_l3dgw_ports > 1) {
+        ds_put_format(&snat_action, "%s.src=%s; next;",
+                      ctx->lb_vip->address_family == AF_INET6 ? "ip6" : "ip4",
+                      ctx->lb_vip->vip_str);
+        ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_OUT_SNAT, 120,
+                                ds_cstr(ctx->undnat_match),
+                                ds_cstr(&snat_action), &ctx->lb->nlb->header_,
+                                lflow_ref);
+    }
+
     ds_truncate(ctx->undnat_match, undnat_match_len);
+    ds_destroy(&undnat_action);
+    ds_destroy(&snat_action);
 }
 
 static void
@@ -11263,8 +11306,11 @@  build_lrouter_nat_flows_for_lb(
         if (!od->n_l3dgw_ports) {
             bitmap_set1(gw_dp_bitmap[type], index);
         } else {
-            build_distr_lrouter_nat_flows_for_lb(&ctx, type, od,
-                                                 lb_dps->lflow_ref);
+            for (size_t i = 0; i < od->n_l3dgw_ports; i++) {
+                struct ovn_port *dgp = od->l3dgw_ports[i];
+                build_distr_lrouter_nat_flows_for_lb(&ctx, type, od,
+                                                     lb_dps->lflow_ref, dgp);
+            }
         }
 
         if (lb->affinity_timeout) {
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a389d1988..bb508f9e6 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -12721,3 +12721,114 @@  AT_CHECK([ovn-sbctl dump-flows lr | grep lr_in_dnat | ovn_strip_lflows], [0], [d
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD_NO_HV_PARALLELIZATION([
+AT_SETUP([Load balancer with Distributed Gateway Ports (LB + DGP + NAT Stateless)])
+ovn_start
+
+check ovn-nbctl ls-add public
+check ovn-nbctl lr-add lr1
+
+# lr1 DGP ts1
+check ovn-nbctl ls-add ts1
+check ovn-nbctl lrp-add lr1 lr1-ts1 00:00:01:02:03:04 172.16.10.1/24
+check ovn-nbctl lrp-set-gateway-chassis lr1-ts1 chassis-2
+
+# lr1 DGP ts2
+check ovn-nbctl ls-add ts2
+check ovn-nbctl lrp-add lr1 lr1-ts2 00:00:01:02:03:05 172.16.20.1/24
+check ovn-nbctl lrp-set-gateway-chassis lr1-ts2 chassis-3
+
+# lr1 DGP public
+check ovn-nbctl lrp-add lr1 lr1_public 00:de:ad:ff:00:01 173.16.0.1/16
+check ovn-nbctl lrp-add lr1 lr1_s1 00:de:ad:fe:00:02 172.16.0.1/24
+check ovn-nbctl lrp-set-gateway-chassis lr1_public chassis-1
+
+check ovn-nbctl ls-add s1
+# s1 - lr1
+check ovn-nbctl lsp-add s1 s1_lr1
+check ovn-nbctl lsp-set-type s1_lr1 router
+check ovn-nbctl lsp-set-addresses s1_lr1 "00:de:ad:fe:00:02 172.16.0.1"
+check ovn-nbctl lsp-set-options s1_lr1 router-port=lr1_s1
+
+# s1 - backend vm1
+check ovn-nbctl lsp-add s1 vm1
+check ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 172.16.0.101"
+
+# s1 - backend vm2
+check ovn-nbctl lsp-add s1 vm2
+check ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 172.16.0.102"
+
+# s1 - backend vm3
+check ovn-nbctl lsp-add s1 vm3
+check ovn-nbctl lsp-set-addresses vm3 "00:de:ad:01:00:03 172.16.0.103"
+
+# Add the lr1 DGP ts1 to the public switch
+check ovn-nbctl lsp-add public public_lr1_ts1
+check ovn-nbctl lsp-set-type public_lr1_ts1 router
+check ovn-nbctl lsp-set-addresses public_lr1_ts1 router
+check ovn-nbctl lsp-set-options public_lr1_ts1 router-port=lr1-ts1 nat-addresses=router
+
+# Add the lr1 DGP ts2 to the public switch
+check ovn-nbctl lsp-add public public_lr1_ts2
+check ovn-nbctl lsp-set-type public_lr1_ts2 router
+check ovn-nbctl lsp-set-addresses public_lr1_ts2 router
+check ovn-nbctl lsp-set-options public_lr1_ts2 router-port=lr1-ts2 nat-addresses=router
+
+# Add the lr1 DGP public to the public switch
+check ovn-nbctl lsp-add public public_lr1
+check ovn-nbctl lsp-set-type public_lr1 router
+check ovn-nbctl lsp-set-addresses public_lr1 router
+check ovn-nbctl lsp-set-options public_lr1 router-port=lr1_public nat-addresses=router
+
+# Create the Load Balancer lb1
+check ovn-nbctl --wait=sb lb-add lb1 "30.0.0.1" "172.16.0.103,172.16.0.102,172.16.0.101"
+
+# Associate load balancer to s1
+check ovn-nbctl ls-lb-add s1 lb1
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl dump-flows s1 > s1flows
+AT_CAPTURE_FILE([s1flows])
+
+AT_CHECK([grep "ls_in_pre_stateful" s1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl
+  table=??(ls_in_pre_stateful ), priority=120  , match=(reg0[[2]] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;)
+])
+AT_CHECK([grep "ls_in_lb" s1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl
+  table=??(ls_in_lb           ), priority=110  , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[[1]] = 0; ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);)
+])
+
+# Associate load balancer to lr1 with DGP
+check ovn-nbctl lr-lb-add lr1 lb1
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl dump-flows lr1 > lr1flows
+AT_CAPTURE_FILE([lr1flows])
+
+# Check stateless NAT rules for load balancer with multiple DGP
+# 1. Check if the backend IPs are in the ipX.dst action
+AT_CHECK([grep "lr_in_dnat" lr1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl
+  table=??(lr_in_dnat         ), priority=110  , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1-ts1")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);)
+  table=??(lr_in_dnat         ), priority=110  , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1-ts2")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);)
+  table=??(lr_in_dnat         ), priority=110  , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1_public")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);)
+])
+
+# 2. Check if the DGP ports are in the match with action next
+AT_CHECK([grep "lr_out_undnat" lr1flows | ovn_strip_lflows], [0], [dnl
+  table=??(lr_out_undnat      ), priority=0    , match=(1), action=(next;)
+  table=??(lr_out_undnat      ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts1" || outport == "lr1-ts1") && is_chassis_resident("cr-lr1-ts1")), action=(next;)
+  table=??(lr_out_undnat      ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts2" || outport == "lr1-ts2") && is_chassis_resident("cr-lr1-ts2")), action=(next;)
+  table=??(lr_out_undnat      ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1_public" || outport == "lr1_public") && is_chassis_resident("cr-lr1_public")), action=(next;)
+])
+
+# 3. Check if the VIP IP is in the ipX.src action
+AT_CHECK([grep "lr_out_snat" lr1flows | ovn_strip_lflows], [0], [dnl
+  table=??(lr_out_snat        ), priority=0    , match=(1), action=(next;)
+  table=??(lr_out_snat        ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts1" || outport == "lr1-ts1") && is_chassis_resident("cr-lr1-ts1")), action=(ip4.src=30.0.0.1; next;)
+  table=??(lr_out_snat        ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts2" || outport == "lr1-ts2") && is_chassis_resident("cr-lr1-ts2")), action=(ip4.src=30.0.0.1; next;)
+  table=??(lr_out_snat        ), priority=120  , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1_public" || outport == "lr1_public") && is_chassis_resident("cr-lr1_public")), action=(ip4.src=30.0.0.1; next;)
+  table=??(lr_out_snat        ), priority=120  , match=(nd_ns), action=(next;)
+])
+
+AT_CLEANUP
+])