diff mbox series

[ovs-dev,v2,2/3] Northd: Pause virtual port binding requests for crowded ports.

Message ID 20240801104422.124876-3-mheib@redhat.com
State Changes Requested
Headers show
Series Virtual ports add binding request auto-pausing mechanism. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Mohammad Heib Aug. 1, 2024, 10:44 a.m. UTC
ovn-controller sends binding requests to update the virtual parent of a
virtual port to northd, in some cases those requests are not handled
immediately and  ovn-controller keeps sending requests over and over
which can lead to flooding northd with these requests.

This patch add the ability to pause virtual ports that send
so many binding requests to northd.

Signed-off-by: Mohammad Heib <mheib@redhat.com>
---
 northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

Comments

Mark Michelson Aug. 28, 2024, 10:20 p.m. UTC | #1
On 8/1/24 06:44, Mohammad Heib wrote:
> ovn-controller sends binding requests to update the virtual parent of a
> virtual port to northd, in some cases those requests are not handled
> immediately and  ovn-controller keeps sending requests over and over
> which can lead to flooding northd with these requests.
> 
> This patch add the ability to pause virtual ports that send
> so many binding requests to northd.
> 
> Signed-off-by: Mohammad Heib <mheib@redhat.com>
> ---
>   northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 98 insertions(+)
> 
> diff --git a/northd/northd.c b/northd/northd.c
> index 89d5b2936..a2782031d 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -3093,6 +3093,15 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
>                                   "qdisc_queue_id", "%d", queue_id);
>               }
>   
> +            if (smap_get_bool(&op->sb->options,
> +                                "binding_request_pause", false)) {
> +                long long int p_time = smap_get_ullong(&op->sb->options,
> +                                "binding_request_pause_ts", 0);
> +                smap_add_format(&options, "binding_request_pause_ts",
> +                                "%lld", p_time);
> +                smap_add(&options, "binding_request_pause", "true");
> +            }
> +
>               if (smap_get_bool(&op->od->nbs->other_config, "vlan-passthru", false)) {
>                   smap_add(&options, "vlan-passthru", "true");
>               }
> @@ -3830,6 +3839,90 @@ void destroy_tracked_virtual_ports(void) {
>       hmap_destroy(&tracked_virtual_ports);
>   }
>   
> +/*
> + * For every virtual port that send request to update thier virtual_parent
> + * This function will update the following Port_binding options if needed:
> + *
> + * 1. tracked_virtual_port record belongs to this virtual port was created
> + *    when this port created. This tracked struct have two main Fields:
> + *
> + *      a. First_bind_in_tframe: this field will be set to the time that
> + *         binding request were reicved for this vport for the first time
> + *         within a timeframe.
> + *
> + *      b. Bind_request_cnt: this filed will be incresses every time a binding
> + *         request recived for that virtual port.
> + *
> + *
> + *  2. For each binding request received for a specific virtual port
> + *     check if the time diff between now and the first time that a
> + *     binding request were recived for this port within a pre-define
> + *     timeframe is less than that timeframe.
> + *
> + *  3. If the previous condition true increase Bind_request_cnt and
> + *     check if the total recived binding request recived for this port
> + *     within a time fram exceeded the VPORT_MAX_BINDING_REQUEST_TRESHOLD
> + *     set the Port_binding options:
> + *
> + *       PB:OPTIONS:binding_request_pause=true
> + *       PB:OPTIONS:binding_request_pause_ts=time_now
> + *
> + *
> + *  4. When ovn-controller recived a new GARP for this virtual port
> + *     before sending a binding request update to northd  it will check
> + *     if the port have binding_request_pause=true, ovn-controller will do
> + *     the following:
> + *
> + *     If the PB:OPTIONS:binding_request_pause_ts + 10 seconds greater
> + *     than the time now (GARP processing time), drop the GARP packet.
> + *
> + *     Otherwise, set the PB:OPTIONS:binding_request_pause=false and resume
> + *     binding request handling on this virtual port.
> + *

Please run the above comment block through a spelling and grammar checker.

> + *
> + */
> +static void
> +vport_binding_request_exceed_threshold(struct ovn_port *op)
> +{
> +    struct tracked_virtual_port * vport =
> +                  find_tracked_virtual_port(op->key);
> +    if (op->sb != NULL) {

There's nothing necessarily wrong with comparing to NULL, but the rest 
of the codebase tends use "if (op->sb)" instead, so this sticks out.

> +        /* This port already paused or not found ignore it */
> +        if ((smap_get_bool(&op->sb->options, "binding_request_pause",
> +             false) ==  true) || !vport) {

Similarly, the " == true" here sticks out as odd since it is not 
typically done this way in OVS/OVN code. Most other instances (including 
in this patch) just use "if (smap_get_bool())"

> +            return;
> +        }
> +    }
> +
> +    long long int cur_time = time_msec();
> +
> +    /* Still in the range of the time frame. */
> +    if ((vport->First_bind_in_tframe + VPORT_BINDING_TIMEFRAME) > cur_time) {
> +        if (++vport->Bind_request_cnt > VPORT_MAX_BINDING_REQUEST_TRESHOLD) {
> +            if (op->sb != NULL) {

Nit: Instead of three separate nested ifs, you could combine these into 
one if statement. This reduces the indentation level of the code inside.

> +                static struct vlog_rate_limit rl =
> +                    VLOG_RATE_LIMIT_INIT(1, 1);
> +                VLOG_WARN_RL(&rl, "Pausing virtual port %s from sending"
> +                    " binding requests for few seconds. "
> +                    " This port was paused in order to reduce the load on the"
> +                    " network.\n" , vport->name);
> +                struct smap options;
> +                smap_clone(&options, &op->sb->options);
> +                smap_add(&options, "binding_request_pause", "true");
> +                smap_add_format(&options, "binding_request_pause_ts", "%lld",
> +                    cur_time);

I looked ahead to patch 3 and I found out that ovn-controller is what 
reads options:binding_request_pause_ts, not ovn-northd. Since 
ovn-controller can run on a separate machine from ovn-northd, you should 
set options:binding_request_pause_ts based on the wall clock instead of 
a monotonic clock. The monotonic clocks between two separate machines 
can be wildly out of sync, but the wall clock should be in sync.

It's fine to use the monotonic clock for setting and checking 
vport->First_bind_in_tframe since it is an internal value only used by 
ovn-northd. But use time_wall_msec() to set 
options:binding_request_pause_ts instead of time_msec().

> +                sbrec_port_binding_set_options(op->sb, &options);
> +            }
> +        }
> +    } else {
> +        /* New Timeframe, that mean we had less than max binding
> +         * request for this vport with it the past time frame.
> +         */
> +        vport->First_bind_in_tframe = cur_time;
> +        vport->Bind_request_cnt = 0;
> +    }
> +}
> +
>   /* Syncs the SB port binding for the ovn_port 'op' of a logical switch port.
>    * Caller should make sure that the OVN SB IDL txn is not NULL.  Presently it
>    * only syncs the nat column of port binding corresponding to the 'op->nbsp' */
> @@ -5019,6 +5112,11 @@ northd_handle_sb_port_binding_changes(
>                                "IDL row, which is unusual.", pb->logical_port);
>                   return false;
>               }
> +
> +            if (sbrec_port_binding_is_updated(pb,
> +                        SBREC_PORT_BINDING_COL_VIRTUAL_PARENT)) {
> +                vport_binding_request_exceed_threshold(op);
> +            }
>           }
>       }
>       return true;
diff mbox series

Patch

diff --git a/northd/northd.c b/northd/northd.c
index 89d5b2936..a2782031d 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -3093,6 +3093,15 @@  ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
                                 "qdisc_queue_id", "%d", queue_id);
             }
 
+            if (smap_get_bool(&op->sb->options,
+                                "binding_request_pause", false)) {
+                long long int p_time = smap_get_ullong(&op->sb->options,
+                                "binding_request_pause_ts", 0);
+                smap_add_format(&options, "binding_request_pause_ts",
+                                "%lld", p_time);
+                smap_add(&options, "binding_request_pause", "true");
+            }
+
             if (smap_get_bool(&op->od->nbs->other_config, "vlan-passthru", false)) {
                 smap_add(&options, "vlan-passthru", "true");
             }
@@ -3830,6 +3839,90 @@  void destroy_tracked_virtual_ports(void) {
     hmap_destroy(&tracked_virtual_ports);
 }
 
+/*
+ * For every virtual port that send request to update thier virtual_parent
+ * This function will update the following Port_binding options if needed:
+ *
+ * 1. tracked_virtual_port record belongs to this virtual port was created
+ *    when this port created. This tracked struct have two main Fields:
+ *
+ *      a. First_bind_in_tframe: this field will be set to the time that
+ *         binding request were reicved for this vport for the first time
+ *         within a timeframe.
+ *
+ *      b. Bind_request_cnt: this filed will be incresses every time a binding
+ *         request recived for that virtual port.
+ *
+ *
+ *  2. For each binding request received for a specific virtual port
+ *     check if the time diff between now and the first time that a
+ *     binding request were recived for this port within a pre-define
+ *     timeframe is less than that timeframe.
+ *
+ *  3. If the previous condition true increase Bind_request_cnt and
+ *     check if the total recived binding request recived for this port
+ *     within a time fram exceeded the VPORT_MAX_BINDING_REQUEST_TRESHOLD
+ *     set the Port_binding options:
+ *
+ *       PB:OPTIONS:binding_request_pause=true
+ *       PB:OPTIONS:binding_request_pause_ts=time_now
+ *
+ *
+ *  4. When ovn-controller recived a new GARP for this virtual port
+ *     before sending a binding request update to northd  it will check
+ *     if the port have binding_request_pause=true, ovn-controller will do
+ *     the following:
+ *
+ *     If the PB:OPTIONS:binding_request_pause_ts + 10 seconds greater
+ *     than the time now (GARP processing time), drop the GARP packet.
+ *
+ *     Otherwise, set the PB:OPTIONS:binding_request_pause=false and resume
+ *     binding request handling on this virtual port.
+ *
+ *
+ */
+static void
+vport_binding_request_exceed_threshold(struct ovn_port *op)
+{
+    struct tracked_virtual_port * vport =
+                  find_tracked_virtual_port(op->key);
+    if (op->sb != NULL) {
+        /* This port already paused or not found ignore it */
+        if ((smap_get_bool(&op->sb->options, "binding_request_pause",
+             false) ==  true) || !vport) {
+            return;
+        }
+    }
+
+    long long int cur_time = time_msec();
+
+    /* Still in the range of the time frame. */
+    if ((vport->First_bind_in_tframe + VPORT_BINDING_TIMEFRAME) > cur_time) {
+        if (++vport->Bind_request_cnt > VPORT_MAX_BINDING_REQUEST_TRESHOLD) {
+            if (op->sb != NULL) {
+                static struct vlog_rate_limit rl =
+                    VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_WARN_RL(&rl, "Pausing virtual port %s from sending"
+                    " binding requests for few seconds. "
+                    " This port was paused in order to reduce the load on the"
+                    " network.\n" , vport->name);
+                struct smap options;
+                smap_clone(&options, &op->sb->options);
+                smap_add(&options, "binding_request_pause", "true");
+                smap_add_format(&options, "binding_request_pause_ts", "%lld",
+                    cur_time);
+                sbrec_port_binding_set_options(op->sb, &options);
+            }
+        }
+    } else {
+        /* New Timeframe, that mean we had less than max binding
+         * request for this vport with it the past time frame.
+         */
+        vport->First_bind_in_tframe = cur_time;
+        vport->Bind_request_cnt = 0;
+    }
+}
+
 /* Syncs the SB port binding for the ovn_port 'op' of a logical switch port.
  * Caller should make sure that the OVN SB IDL txn is not NULL.  Presently it
  * only syncs the nat column of port binding corresponding to the 'op->nbsp' */
@@ -5019,6 +5112,11 @@  northd_handle_sb_port_binding_changes(
                              "IDL row, which is unusual.", pb->logical_port);
                 return false;
             }
+
+            if (sbrec_port_binding_is_updated(pb,
+                        SBREC_PORT_BINDING_COL_VIRTUAL_PARENT)) {
+                vport_binding_request_exceed_threshold(op);
+            }
         }
     }
     return true;