diff mbox series

[ovs-dev,v6,4/4] northd: Restore parallel build with dp_groups

Message ID 20210902132739.4175-4-anton.ivanov@cambridgegreys.com
State Superseded
Headers show
Series [ovs-dev,v6,1/4] northd: Disable parallel processing for logical_dp_groups | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Anton Ivanov Sept. 2, 2021, 1:27 p.m. UTC
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
 1 file changed, 159 insertions(+), 56 deletions(-)

Comments

Mark Michelson Sept. 9, 2021, 9:02 p.m. UTC | #1
Hi Anton,

On a high level, this change results in some parts of the parallelized 
hashmap being unused. Should we keep the hashrow_locks structure and its 
APIs, for instance?

See below for more comments.

On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Restore parallel build with dp groups using rwlock instead
> of per row locking as an underlying mechanism.
> 
> This provides improvement ~ 10% end-to-end on ovn-heater
> under virutalization despite awakening some qemu gremlin
> which makes qemu climb to silly CPU usage. The gain on
> bare metal is likely to be higher.
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> ---
>   northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>   1 file changed, 159 insertions(+), 56 deletions(-)
> 
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 1f903882b..4537c55dd 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -59,6 +59,7 @@
>   #include "unixctl.h"
>   #include "util.h"
>   #include "uuid.h"
> +#include "ovs-thread.h"
>   #include "openvswitch/vlog.h"
>   
>   VLOG_DEFINE_THIS_MODULE(ovn_northd);
> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>   static bool use_logical_dp_groups = false;
>   static bool use_parallel_build = true;
>   
> -static struct hashrow_locks lflow_locks;
> +static struct ovs_rwlock flowtable_lock;

With the change from the custom hashrow_locks to using an ovs_rwlock, I 
think it's important to document what data this lock is intending to 
protect.

 From what I can tell, this lock specifically is intended to protect 
access to the hmaps in the global lflow_state structure, and it's also 
intended to protect all ovn_datapaths' od_group hmaps. This is not 
something that is immediately obvious just from a global rwlock declaration.

> +
> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
> +                              struct ovn_datapath *od,
> +                              struct lflow_state *lflow_map,
> +                              uint32_t hash)
> +{
> +    hmapx_add(&old_lflow->od_group, od);
> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
> +    if (use_parallel_build) {
> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
> +    } else {
> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
> +    }
> +}
> +
> +#ifdef __clang__
> +#pragma clang diagnostic push
> +#pragma clang diagnostic ignored "-Wthread-safety"
> +#endif

The section above needs a comment to explain why -Wthread-safety is ignored.

>   
>   /* Adds a row with the specified contents to the Logical_Flow table.
>    * Version to use when locking is required.
> @@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
>       struct ovn_lflow *old_lflow;
>       struct ovn_lflow *lflow;
>   
> +    /* Fast Path.
> +     * See if we can get away without writing - grab a rdlock and check
> +     * if we can get away with as little work as possible.
> +     */
> +
>       if (use_logical_dp_groups) {
> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od, NULL, stage,
> -                                      priority, match,
> +        if (use_parallel_build) {
> +            ovs_rwlock_rdlock(&flowtable_lock);
> +        }
> +        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                                      NULL, stage, priority, match,
>                                         actions, ctrl_meter, hash);
>           if (old_lflow) {
> -            hmapx_add(&old_lflow->od_group, od);
> -            /* Found, different, od count went up. Move to multiple od. */
> -            if (hmapx_count(&old_lflow->od_group) > 1) {
> -                hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
> +            if (!hmapx_contains(&old_lflow->od_group, od)) {
> +                /* od not in od_group, we need to add it and move to
> +                 * multiple. */
>                   if (use_parallel_build) {
> -                    hmap_insert_fast(&lflow_map->multiple_od,
> -                                     &old_lflow->hmap_node, hash);
> -                } else {
> -                    hmap_insert(&lflow_map->multiple_od,
> -                                &old_lflow->hmap_node, hash);
> +                    /* Upgrade the lock to write, we are likely to
> +                     * modify data. */
> +                    ovs_rwlock_unlock(&flowtable_lock);
> +                    ovs_rwlock_wrlock(&flowtable_lock);
> +
> +                    /* Check if someone got ahead of us and the flow is already
> +                     * in multiple. */
> +                    if (!hmap_contains(&lflow_map->single_od,
> +                                       &old_lflow->hmap_node)) {

The logic here is fine, but that comment paired with that if statement 
is strange. Either

a) Change the comment to say "Check if someone got ahead of us and the 
flow has been removed from single."

or

b) Change the if statement to "if 
(hmap_contains(&lflow_map->multiple_od, &old_lflow->hmap_node))"

> +                        /* Someone did get ahead of us, add the od to the
> +                         * group. */
> +                        hmapx_add(&old_lflow->od_group, od);
> +                        goto done_update_unlock;
> +                    }
>                   }
> +                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
> +                goto done_update_unlock;
>               }
> -        } else {
> -            /* Not found, lookup in multiple od. */
> +        }
> +        if (!old_lflow) {

I think this "if (!old_lflow)" can be left as an "else" instead. This 
would allow for you to remove one of the goto statements in the previous 
section.

> +            /* Not found in single, lookup in multiple od. */
>               old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
>                                             stage, priority, match,
>                                             actions, ctrl_meter, hash);
>               if (old_lflow) {
> -                hmapx_add(&old_lflow->od_group, od);
> +                if (!hmapx_contains(&old_lflow->od_group, od)) {
> +                    if (use_parallel_build) {
> +                        /* Upgrade lock to write.*/
> +                        ovs_rwlock_unlock(&flowtable_lock);
> +                        ovs_rwlock_wrlock(&flowtable_lock);
> +                    }
> +                    hmapx_add(&old_lflow->od_group, od);
> +                }
>               }
>           }
> +done_update_unlock:
> +        if (use_parallel_build) {
> +            ovs_rwlock_unlock(&flowtable_lock);
> +        }
>           if (old_lflow) {
>               return old_lflow;
>           }
>       }
>   
> -    lflow = xmalloc(sizeof *lflow);
> -    /* While adding new logical flows we're not setting single datapath, but
> -     * collecting a group.  'od' will be updated later for all flows with only
> -     * one datapath in a group, so it could be hashed correctly. */
> -    ovn_lflow_init(lflow, NULL, stage, priority,
> -                   xstrdup(match), xstrdup(actions),
> -                   io_port ? xstrdup(io_port) : NULL,
> -                   nullable_xstrdup(ctrl_meter),
> -                   ovn_lflow_hint(stage_hint), where);
> -    hmapx_add(&lflow->od_group, od);
> -
> -    /* Insert "fresh" lflows into single_od. */
> +    /* Slow Path.
> +     * We could not get away with minimal mostly ro amount of work,
> +     * lock with rw and try to do an insert (may end up repeating
> +     * some of what we do for ro). */
>   
> +    if (use_logical_dp_groups && use_parallel_build) {
> +        ovs_rwlock_wrlock(&flowtable_lock);
> +    }
>       if (!use_parallel_build) {
> +        lflow = xmalloc(sizeof *lflow);
> +        /* While adding new logical flows we are not setting single datapath,
> +         * but collecting a group.  'od' will be updated later for all flows
> +         * with only one datapath in a group, so it could be hashed correctly.
> +         */
> +        ovn_lflow_init(lflow, NULL, stage, priority,
> +                       xstrdup(match), xstrdup(actions),
> +                       io_port ? xstrdup(io_port) : NULL,
> +                       nullable_xstrdup(ctrl_meter),
> +                       ovn_lflow_hint(stage_hint), where);
> +        hmapx_add(&lflow->od_group, od);
>           hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
>       } else {
> +        if (use_logical_dp_groups) {
> +            /* Search again in case someone else got here first. */
> +            lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                                          NULL, stage, priority, match,
> +                                          actions, ctrl_meter, hash);
> +            if (lflow) {
> +                if (!hmapx_contains(&lflow->od_group, od)) {
> +                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> +                }
> +                goto done_add_unlock;
> +            }
> +            /* Unlikely, but possible, more than one thread got here
> +             * ahead of us while we were wating to acquire a write lock. */
> +            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> +                                          stage, priority, match,
> +                                          actions, ctrl_meter, hash);
> +            if (lflow) {
> +                hmapx_add(&lflow->od_group, od);
> +                goto done_add_unlock;
> +            }
> +        }
> +        lflow = xmalloc(sizeof *lflow);
> +        /* While adding new logical flows we're not setting single datapath,
> +         * but collecting a group.  'od' will be updated later for all
> +         * flows with only * one datapath in a group, so it could be hashed
> +         * correctly. */
> +        ovn_lflow_init(lflow, NULL, stage, priority,
> +                       xstrdup(match), xstrdup(actions),
> +                       io_port ? xstrdup(io_port) : NULL,
> +                       nullable_xstrdup(ctrl_meter),
> +                       ovn_lflow_hint(stage_hint), where);
> +        hmapx_add(&lflow->od_group, od);

This section can run if we are not using logical dp groups. Should this 
hmapx_add() call have "if (use_logical_dp_groups)" as a precondition?

>           hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
>       }
> +done_add_unlock:
> +    if (use_logical_dp_groups && use_parallel_build) {
> +        ovs_rwlock_unlock(&flowtable_lock);
> +    }
>       return lflow;
>   }

I found the logic of do_ovn_lflow_add() to be very difficult to follow. 
It was hard to keep in mind what conditions resulted in locks being 
held, and what the current state of the locks was. It also contains a 
lot of repeated code. I have a suggested rewrite of this function that I 
am placing at the bottom of this email. I think I have the logic the 
same as what you have, but I think it's a lot easier to follow what's 
happening. Let me know what you think.

>   
> @@ -4449,22 +4541,16 @@ ovn_lflow_add_at_with_hash(struct lflow_state *lflow_map,
>                              const struct ovsdb_idl_row *stage_hint,
>                              const char *where, uint32_t hash)
>   {
> -    struct ovn_lflow *lflow;
> -
>       ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        lock_hash_row(&lflow_locks, hash);
> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> +    return do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
>                                    actions, io_port, stage_hint, where,
>                                    ctrl_meter);
> -        unlock_hash_row(&lflow_locks, hash);
> -    } else {
> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
> -                         actions, io_port, stage_hint, where, ctrl_meter);
> -    }
> -    return lflow;
>   }
>   
> +#ifdef __clang__
> +#pragma clang diagnostic pop
> +#endif
> +
>   /* Adds a row with the specified contents to the Logical_Flow table. */
>   static void
>   ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
> @@ -4483,19 +4569,29 @@ ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
>                                  io_port, ctrl_meter, stage_hint, where, hash);
>   }
>   
> +
> +#ifdef __clang__
> +#pragma clang diagnostic push
> +#pragma clang diagnostic ignored "-Wthread-safety"
> +#endif
> +
>   static bool
>   ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
> -                                struct ovn_datapath *od,
> -                                uint32_t hash)
> +                                struct ovn_datapath *od)
>   {
>       if (!use_logical_dp_groups || !lflow_ref) {
>           return false;
>       }
>   
>       if (use_parallel_build) {
> -        lock_hash_row(&lflow_locks, hash);
> -        hmapx_add(&lflow_ref->od_group, od);
> -        unlock_hash_row(&lflow_locks, hash);
> +        ovs_rwlock_rdlock(&flowtable_lock);
> +        if (!hmapx_contains(&lflow_ref->od_group, od)) {
> +            /* Upgrade lock to write. */
> +            ovs_rwlock_unlock(&flowtable_lock);
> +            ovs_rwlock_wrlock(&flowtable_lock);
> +            hmapx_add(&lflow_ref->od_group, od);
> +        }
> +        ovs_rwlock_unlock(&flowtable_lock);
>       } else {
>           hmapx_add(&lflow_ref->od_group, od);
>       }
> @@ -4503,6 +4599,11 @@ ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
>       return true;
>   }
>   
> +#ifdef __clang__
> +#pragma clang diagnostic pop
> +#endif
> +
> +
>   /* Adds a row with the specified contents to the Logical_Flow table. */
>   #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
>                                     ACTIONS, IN_OUT_PORT, CTRL_METER, \
> @@ -4594,15 +4695,9 @@ hmap_safe_remove(struct hmap *hmap, struct hmap_node *node)
>   static void
>   remove_lflow_from_lflows(struct lflow_state *lflows, struct ovn_lflow *lflow)
>   {
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        lock_hash_row(&lflow_locks, lflow->hmap_node.hash);
> -    }
>       if (!hmap_safe_remove(&lflows->multiple_od, &lflow->hmap_node)) {
>           hmap_remove(&lflows->single_od, &lflow->hmap_node);
>       }
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        unlock_hash_row(&lflow_locks, lflow->hmap_node.hash);
> -    }
>   }
>   
>   static void
> @@ -6511,7 +6606,7 @@ build_lb_rules(struct lflow_state *lflows, struct ovn_northd_lb *lb,
>               if (reject) {
>                   meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
>                                          meter_groups);
> -            } else if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
> +            } else if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>                   continue;
>               }
>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
> @@ -9565,7 +9660,7 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
>                   ds_cstr(match), ds_cstr(&defrag_actions));
>           for (size_t j = 0; j < lb->n_nb_lr; j++) {
>               struct ovn_datapath *od = lb->nb_lr[j];
> -            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
> +            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>                   continue;
>               }
>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
> @@ -9629,7 +9724,7 @@ build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
>                   continue;
>               }
>   
> -            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od, hash)) {
> +            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
>                   continue;
>               }
>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
> @@ -13081,7 +13176,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>           }
>       }
>   
> -    if (use_parallel_build && (!use_logical_dp_groups)) {
> +    if (use_parallel_build) {
>           struct lflow_state *lflow_segs;
>           struct lswitch_flow_build_info *lsiv;
>           int index;
> @@ -13319,6 +13414,8 @@ reconcile_lflow(struct ovn_lflow *lflow, struct northd_context *ctx,
>       ovn_lflow_destroy(lflows, lflow);
>   }
>   
> +static bool needs_init = true;

I think this variable should be renamed. "needs_init" directly relates 
to the flowtable_lock, but that is not reflected in the variable name. 
Also since this is only used in build_lflows, we could probably move 
this declaration to within build_lflows.

> +
>   /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
>    * constructing their contents based on the OVN_NB database. */
>   static void
> @@ -13333,8 +13430,15 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
>   
>       fast_hmap_size_for(&lflows.single_od, max_seen_lflow_size);
>       fast_hmap_size_for(&lflows.multiple_od, max_seen_lflow_size);
> -    if (use_parallel_build && use_logical_dp_groups) {
> -        update_hashrow_locks(&lflows.single_od, &lflow_locks);
> +    if (use_parallel_build && use_logical_dp_groups && needs_init) {
> +        ovs_rwlock_init(&flowtable_lock);
> +        /* This happens on first run with parallel+dp_groups.
> +         * db_run will re-read use_parallel_build from config and
> +         * reset it. This way we get correct sizing for
> +         * parallel + dp_groups by doing one single threaded run
> +         * on the first iteration. */
> +        use_parallel_build = false;
> +        needs_init = false;
>       }
>   
>   
> @@ -15293,7 +15397,6 @@ main(int argc, char *argv[])
>   
>       daemonize_complete();
>   
> -    init_hash_row_locks(&lflow_locks);
>       use_parallel_build = can_parallelize_hashes(false);
>   
>       /* We want to detect (almost) all changes to the ovn-nb db. */
> 

-----
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 4537c55dd..81ad929ec 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -4394,142 +4394,143 @@ static void ovn_make_multi_lflow(struct 
ovn_lflow *old_lflow,
  #pragma clang diagnostic ignored "-Wthread-safety"
  #endif

-/* Adds a row with the specified contents to the Logical_Flow table.
- * Version to use when locking is required.
- */
  static struct ovn_lflow *
-do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
-                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
-                 const char *match, const char *actions, const char 
*io_port,
-                 const struct ovsdb_idl_row *stage_hint,
-                 const char *where, const char *ctrl_meter)
+do_ovn_lflow_add_simple(struct lflow_state *lflow_map, uint32_t hash,
+                        enum ovn_stage stage, uint16_t priority,
+                        const char *match, const char *actions,
+                        const char *io_port,
+                        const struct ovsdb_idl_row *stage_hint,
+                        const char *where, const char *ctrl_meter)
+
+{
+    struct ovn_lflow *lflow;
+
+    lflow = xmalloc(sizeof *lflow);
+
+    /* While adding new logical flows we are not setting single datapath,
+     * but collecting a group.  'od' will be updated later for all flows
+     * with only one datapath in a group, so it could be hashed correctly.
+     */
+    ovn_lflow_init(lflow, NULL, stage, priority,
+                   xstrdup(match), xstrdup(actions),
+                   io_port ? xstrdup(io_port) : NULL,
+                   nullable_xstrdup(ctrl_meter),
+                   ovn_lflow_hint(stage_hint), where);
+    if (use_parallel_build) {
+        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
+    } else {
+        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
+    }
+
+    return lflow;
+}
+
+static struct ovn_lflow *
+do_ovn_lflow_add_with_groups(struct lflow_state *lflow_map,
+                             struct ovn_datapath *od, uint32_t hash,
+                             enum ovn_stage stage, uint16_t priority,
+                             const char *match, const char *actions,
+                             const char *io_port,
+                             const struct ovsdb_idl_row *stage_hint,
+                             const char *where, const char *ctrl_meter)
  {
+    struct ovn_lflow *lflow;
+    ovs_assert(use_logical_dp_groups);
+
+    lflow = do_ovn_lflow_find(&lflow_map->single_od,
+                              NULL, stage, priority, match,
+                              actions, ctrl_meter, hash);

-    struct ovn_lflow *old_lflow;
+    if (lflow) {
+        ovn_make_multi_lflow(lflow, od, lflow_map, hash);
+    } else {
+        lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
+                                  stage, priority, match,
+                                  actions, ctrl_meter, hash);
+    }
+
+    if (!lflow) {
+        lflow = do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
+                                        match, actions, io_port, 
stage_hint,
+                                        where, ctrl_meter);
+    }
+
+    if (!hmapx_contains(&lflow->od_group, od)) {
+        hmapx_add(&lflow->od_group, od);
+    }
+
+    return lflow;
+}
+
+static struct ovn_lflow *
+do_ovn_lflow_add_parallel_with_groups(struct lflow_state *lflow_map,
+                                      struct ovn_datapath *od,
+                                      uint32_t hash, enum ovn_stage stage,
+                                      uint16_t priority, const char *match,
+                                      const char *actions, const char 
*io_port,
+                                      const struct ovsdb_idl_row 
*stage_hint,
+                                      const char *where, const char 
*ctrl_meter)
+{
      struct ovn_lflow *lflow;

+    ovs_assert(use_parallel_build && use_logical_dp_groups);
+
      /* Fast Path.
       * See if we can get away without writing - grab a rdlock and check
       * if we can get away with as little work as possible.
       */
-
-    if (use_logical_dp_groups) {
-        if (use_parallel_build) {
-            ovs_rwlock_rdlock(&flowtable_lock);
-        }
-        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
-                                      NULL, stage, priority, match,
-                                      actions, ctrl_meter, hash);
-        if (old_lflow) {
-            if (!hmapx_contains(&old_lflow->od_group, od)) {
-                /* od not in od_group, we need to add it and move to
-                 * multiple. */
-                if (use_parallel_build) {
-                    /* Upgrade the lock to write, we are likely to
-                     * modify data. */
-                    ovs_rwlock_unlock(&flowtable_lock);
-                    ovs_rwlock_wrlock(&flowtable_lock);
-
-                    /* Check if someone got ahead of us and the flow is 
already
-                     * in multiple. */
-                    if (!hmap_contains(&lflow_map->single_od,
-                                       &old_lflow->hmap_node)) {
-                        /* Someone did get ahead of us, add the od to the
-                         * group. */
-                        hmapx_add(&old_lflow->od_group, od);
-                        goto done_update_unlock;
-                    }
-                }
-                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
-                goto done_update_unlock;
-            }
-        }
-        if (!old_lflow) {
-            /* Not found in single, lookup in multiple od. */
-            old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
-                                          stage, priority, match,
-                                          actions, ctrl_meter, hash);
-            if (old_lflow) {
-                if (!hmapx_contains(&old_lflow->od_group, od)) {
-                    if (use_parallel_build) {
-                        /* Upgrade lock to write.*/
-                        ovs_rwlock_unlock(&flowtable_lock);
-                        ovs_rwlock_wrlock(&flowtable_lock);
-                    }
-                    hmapx_add(&old_lflow->od_group, od);
-                }
-            }
-        }
-done_update_unlock:
-        if (use_parallel_build) {
-            ovs_rwlock_unlock(&flowtable_lock);
-        }
-        if (old_lflow) {
-            return old_lflow;
-        }
+    ovs_rwlock_rdlock(&flowtable_lock);
+    lflow = do_ovn_lflow_find(&lflow_map->single_od,
+                                  NULL, stage, priority, match,
+                                  actions, ctrl_meter, hash);
+    if (lflow && hmapx_contains(&lflow->od_group, od)) {
+        /* This flow already has this datapath in the group. Just 
return it.*/
+        goto end;
      }

+    ovs_rwlock_unlock(&flowtable_lock);
+
      /* Slow Path.
       * We could not get away with minimal mostly ro amount of work,
       * lock with rw and try to do an insert (may end up repeating
       * some of what we do for ro). */
+    ovs_rwlock_wrlock(&flowtable_lock);
+
+    lflow = do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
+                                         priority, match, actions, io_port,
+                                         stage_hint, where, ctrl_meter);
+
+end:
+    ovs_rwlock_unlock(&flowtable_lock);
+
+    return lflow;
+}
+
+/* Adds a row with the specified contents to the Logical_Flow table.
+ * Version to use when locking is required.
+ */
+static struct ovn_lflow *
+do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
+                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
+                 const char *match, const char *actions, const char 
*io_port,
+                 const struct ovsdb_idl_row *stage_hint,
+                 const char *where, const char *ctrl_meter)
+{

      if (use_logical_dp_groups && use_parallel_build) {
-        ovs_rwlock_wrlock(&flowtable_lock);
-    }
-    if (!use_parallel_build) {
-        lflow = xmalloc(sizeof *lflow);
-        /* While adding new logical flows we are not setting single 
datapath,
-         * but collecting a group.  'od' will be updated later for all 
flows
-         * with only one datapath in a group, so it could be hashed 
correctly.
-         */
-        ovn_lflow_init(lflow, NULL, stage, priority,
-                       xstrdup(match), xstrdup(actions),
-                       io_port ? xstrdup(io_port) : NULL,
-                       nullable_xstrdup(ctrl_meter),
-                       ovn_lflow_hint(stage_hint), where);
-        hmapx_add(&lflow->od_group, od);
-        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
+        return do_ovn_lflow_add_parallel_with_groups(lflow_map, od, 
hash, stage,
+                                                     priority, match, 
actions,
+                                                     io_port, 
stage_hint, where,
+                                                     ctrl_meter);
+    } else if (use_logical_dp_groups) {
+        return do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
+                                            priority, match, actions, 
io_port,
+                                            stage_hint, where, ctrl_meter);
      } else {
-        if (use_logical_dp_groups) {
-            /* Search again in case someone else got here first. */
-            lflow = do_ovn_lflow_find(&lflow_map->single_od,
-                                          NULL, stage, priority, match,
-                                          actions, ctrl_meter, hash);
-            if (lflow) {
-                if (!hmapx_contains(&lflow->od_group, od)) {
-                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
-                }
-                goto done_add_unlock;
-            }
-            /* Unlikely, but possible, more than one thread got here
-             * ahead of us while we were wating to acquire a write lock. */
-            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
-                                          stage, priority, match,
-                                          actions, ctrl_meter, hash);
-            if (lflow) {
-                hmapx_add(&lflow->od_group, od);
-                goto done_add_unlock;
-            }
-        }
-        lflow = xmalloc(sizeof *lflow);
-        /* While adding new logical flows we're not setting single 
datapath,
-         * but collecting a group.  'od' will be updated later for all
-         * flows with only * one datapath in a group, so it could be hashed
-         * correctly. */
-        ovn_lflow_init(lflow, NULL, stage, priority,
-                       xstrdup(match), xstrdup(actions),
-                       io_port ? xstrdup(io_port) : NULL,
-                       nullable_xstrdup(ctrl_meter),
-                       ovn_lflow_hint(stage_hint), where);
-        hmapx_add(&lflow->od_group, od);
-        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
+        return do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
+                                       match, actions, io_port, stage_hint,
+                                       where, ctrl_meter);
      }
-done_add_unlock:
-    if (use_logical_dp_groups && use_parallel_build) {
-        ovs_rwlock_unlock(&flowtable_lock);
-    }
-    return lflow;
  }

  static struct ovn_lflow *
----
Anton Ivanov Sept. 10, 2021, 7:02 a.m. UTC | #2
On 09/09/2021 22:02, Mark Michelson wrote:
> Hi Anton,
>
> On a high level, this change results in some parts of the parallelized 
> hashmap being unused. Should we keep the hashrow_locks structure and 
> its APIs, for instance?

It is a general method of fine grain locking of hashes for parallel 
processing. While it is no longer used in lflow generation, it may stay 
as it may come handy elsewhere.

Some renaming is probably in order - the names are too lflow specific.

A.

>
> See below for more comments.
>
> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Restore parallel build with dp groups using rwlock instead
>> of per row locking as an underlying mechanism.
>>
>> This provides improvement ~ 10% end-to-end on ovn-heater
>> under virutalization despite awakening some qemu gremlin
>> which makes qemu climb to silly CPU usage. The gain on
>> bare metal is likely to be higher.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 159 insertions(+), 56 deletions(-)
>>
>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>> index 1f903882b..4537c55dd 100644
>> --- a/northd/ovn-northd.c
>> +++ b/northd/ovn-northd.c
>> @@ -59,6 +59,7 @@
>>   #include "unixctl.h"
>>   #include "util.h"
>>   #include "uuid.h"
>> +#include "ovs-thread.h"
>>   #include "openvswitch/vlog.h"
>>     VLOG_DEFINE_THIS_MODULE(ovn_northd);
>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
>> ovn_datapath *od,
>>   static bool use_logical_dp_groups = false;
>>   static bool use_parallel_build = true;
>>   -static struct hashrow_locks lflow_locks;
>> +static struct ovs_rwlock flowtable_lock;
>
> With the change from the custom hashrow_locks to using an ovs_rwlock, 
> I think it's important to document what data this lock is intending to 
> protect.
>
> From what I can tell, this lock specifically is intended to protect 
> access to the hmaps in the global lflow_state structure, and it's also 
> intended to protect all ovn_datapaths' od_group hmaps. This is not 
> something that is immediately obvious just from a global rwlock 
> declaration.
>
>> +
>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>> +                              struct ovn_datapath *od,
>> +                              struct lflow_state *lflow_map,
>> +                              uint32_t hash)
>> +{
>> +    hmapx_add(&old_lflow->od_group, od);
>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>> +    if (use_parallel_build) {
>> +        hmap_insert_fast(&lflow_map->multiple_od, 
>> &old_lflow->hmap_node, hash);
>> +    } else {
>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, 
>> hash);
>> +    }
>> +}
>> +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>
> The section above needs a comment to explain why -Wthread-safety is 
> ignored.
>
>>     /* Adds a row with the specified contents to the Logical_Flow table.
>>    * Version to use when locking is required.
>> @@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state 
>> *lflow_map, struct ovn_datapath *od,
>>       struct ovn_lflow *old_lflow;
>>       struct ovn_lflow *lflow;
>>   +    /* Fast Path.
>> +     * See if we can get away without writing - grab a rdlock and check
>> +     * if we can get away with as little work as possible.
>> +     */
>> +
>>       if (use_logical_dp_groups) {
>> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od, NULL, 
>> stage,
>> -                                      priority, match,
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_rdlock(&flowtable_lock);
>> +        }
>> +        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                      NULL, stage, priority, match,
>>                                         actions, ctrl_meter, hash);
>>           if (old_lflow) {
>> -            hmapx_add(&old_lflow->od_group, od);
>> -            /* Found, different, od count went up. Move to multiple 
>> od. */
>> -            if (hmapx_count(&old_lflow->od_group) > 1) {
>> -                hmap_remove(&lflow_map->single_od, 
>> &old_lflow->hmap_node);
>> +            if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                /* od not in od_group, we need to add it and move to
>> +                 * multiple. */
>>                   if (use_parallel_build) {
>> - hmap_insert_fast(&lflow_map->multiple_od,
>> - &old_lflow->hmap_node, hash);
>> -                } else {
>> -                    hmap_insert(&lflow_map->multiple_od,
>> -                                &old_lflow->hmap_node, hash);
>> +                    /* Upgrade the lock to write, we are likely to
>> +                     * modify data. */
>> +                    ovs_rwlock_unlock(&flowtable_lock);
>> +                    ovs_rwlock_wrlock(&flowtable_lock);
>> +
>> +                    /* Check if someone got ahead of us and the flow 
>> is already
>> +                     * in multiple. */
>> +                    if (!hmap_contains(&lflow_map->single_od,
>> + &old_lflow->hmap_node)) {
>
> The logic here is fine, but that comment paired with that if statement 
> is strange. Either
>
> a) Change the comment to say "Check if someone got ahead of us and the 
> flow has been removed from single."
>
> or
>
> b) Change the if statement to "if 
> (hmap_contains(&lflow_map->multiple_od, &old_lflow->hmap_node))"
>
>> +                        /* Someone did get ahead of us, add the od 
>> to the
>> +                         * group. */
>> +                        hmapx_add(&old_lflow->od_group, od);
>> +                        goto done_update_unlock;
>> +                    }
>>                   }
>> +                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
>> +                goto done_update_unlock;
>>               }
>> -        } else {
>> -            /* Not found, lookup in multiple od. */
>> +        }
>> +        if (!old_lflow) {
>
> I think this "if (!old_lflow)" can be left as an "else" instead. This 
> would allow for you to remove one of the goto statements in the 
> previous section.
>
>> +            /* Not found in single, lookup in multiple od. */
>>               old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, 
>> NULL,
>>                                             stage, priority, match,
>>                                             actions, ctrl_meter, hash);
>>               if (old_lflow) {
>> -                hmapx_add(&old_lflow->od_group, od);
>> +                if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                    if (use_parallel_build) {
>> +                        /* Upgrade lock to write.*/
>> +                        ovs_rwlock_unlock(&flowtable_lock);
>> +                        ovs_rwlock_wrlock(&flowtable_lock);
>> +                    }
>> +                    hmapx_add(&old_lflow->od_group, od);
>> +                }
>>               }
>>           }
>> +done_update_unlock:
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +        }
>>           if (old_lflow) {
>>               return old_lflow;
>>           }
>>       }
>>   -    lflow = xmalloc(sizeof *lflow);
>> -    /* While adding new logical flows we're not setting single 
>> datapath, but
>> -     * collecting a group.  'od' will be updated later for all flows 
>> with only
>> -     * one datapath in a group, so it could be hashed correctly. */
>> -    ovn_lflow_init(lflow, NULL, stage, priority,
>> -                   xstrdup(match), xstrdup(actions),
>> -                   io_port ? xstrdup(io_port) : NULL,
>> -                   nullable_xstrdup(ctrl_meter),
>> -                   ovn_lflow_hint(stage_hint), where);
>> -    hmapx_add(&lflow->od_group, od);
>> -
>> -    /* Insert "fresh" lflows into single_od. */
>> +    /* Slow Path.
>> +     * We could not get away with minimal mostly ro amount of work,
>> +     * lock with rw and try to do an insert (may end up repeating
>> +     * some of what we do for ro). */
>>   +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_wrlock(&flowtable_lock);
>> +    }
>>       if (!use_parallel_build) {
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we are not setting single 
>> datapath,
>> +         * but collecting a group.  'od' will be updated later for 
>> all flows
>> +         * with only one datapath in a group, so it could be hashed 
>> correctly.
>> +         */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>>           hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       } else {
>> +        if (use_logical_dp_groups) {
>> +            /* Search again in case someone else got here first. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                          NULL, stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                if (!hmapx_contains(&lflow->od_group, od)) {
>> +                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
>> +                }
>> +                goto done_add_unlock;
>> +            }
>> +            /* Unlikely, but possible, more than one thread got here
>> +             * ahead of us while we were wating to acquire a write 
>> lock. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
>> +                                          stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                hmapx_add(&lflow->od_group, od);
>> +                goto done_add_unlock;
>> +            }
>> +        }
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we're not setting single 
>> datapath,
>> +         * but collecting a group.  'od' will be updated later for all
>> +         * flows with only * one datapath in a group, so it could be 
>> hashed
>> +         * correctly. */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>
> This section can run if we are not using logical dp groups. Should 
> this hmapx_add() call have "if (use_logical_dp_groups)" as a 
> precondition?
>
>> hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       }
>> +done_add_unlock:
>> +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_unlock(&flowtable_lock);
>> +    }
>>       return lflow;
>>   }
>
> I found the logic of do_ovn_lflow_add() to be very difficult to 
> follow. It was hard to keep in mind what conditions resulted in locks 
> being held, and what the current state of the locks was. It also 
> contains a lot of repeated code. I have a suggested rewrite of this 
> function that I am placing at the bottom of this email. I think I have 
> the logic the same as what you have, but I think it's a lot easier to 
> follow what's happening. Let me know what you think.
>
>>   @@ -4449,22 +4541,16 @@ ovn_lflow_add_at_with_hash(struct 
>> lflow_state *lflow_map,
>>                              const struct ovsdb_idl_row *stage_hint,
>>                              const char *where, uint32_t hash)
>>   {
>> -    struct ovn_lflow *lflow;
>> -
>>       ovs_assert(ovn_stage_to_datapath_type(stage) == 
>> ovn_datapath_get_type(od));
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, 
>> priority, match,
>> +    return do_ovn_lflow_add(lflow_map, od, hash, stage, priority, 
>> match,
>>                                    actions, io_port, stage_hint, where,
>>                                    ctrl_meter);
>> -        unlock_hash_row(&lflow_locks, hash);
>> -    } else {
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, 
>> priority, match,
>> -                         actions, io_port, stage_hint, where, 
>> ctrl_meter);
>> -    }
>> -    return lflow;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow 
>> table. */
>>   static void
>>   ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath 
>> *od,
>> @@ -4483,19 +4569,29 @@ ovn_lflow_add_at(struct lflow_state 
>> *lflow_map, struct ovn_datapath *od,
>>                                  io_port, ctrl_meter, stage_hint, 
>> where, hash);
>>   }
>>   +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>> +
>>   static bool
>>   ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
>> -                                struct ovn_datapath *od,
>> -                                uint32_t hash)
>> +                                struct ovn_datapath *od)
>>   {
>>       if (!use_logical_dp_groups || !lflow_ref) {
>>           return false;
>>       }
>>         if (use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        hmapx_add(&lflow_ref->od_group, od);
>> -        unlock_hash_row(&lflow_locks, hash);
>> +        ovs_rwlock_rdlock(&flowtable_lock);
>> +        if (!hmapx_contains(&lflow_ref->od_group, od)) {
>> +            /* Upgrade lock to write. */
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +            ovs_rwlock_wrlock(&flowtable_lock);
>> +            hmapx_add(&lflow_ref->od_group, od);
>> +        }
>> +        ovs_rwlock_unlock(&flowtable_lock);
>>       } else {
>>           hmapx_add(&lflow_ref->od_group, od);
>>       }
>> @@ -4503,6 +4599,11 @@ ovn_dp_group_add_with_reference(struct 
>> ovn_lflow *lflow_ref,
>>       return true;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow 
>> table. */
>>   #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, 
>> MATCH, \
>>                                     ACTIONS, IN_OUT_PORT, CTRL_METER, \
>> @@ -4594,15 +4695,9 @@ hmap_safe_remove(struct hmap *hmap, struct 
>> hmap_node *node)
>>   static void
>>   remove_lflow_from_lflows(struct lflow_state *lflows, struct 
>> ovn_lflow *lflow)
>>   {
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>       if (!hmap_safe_remove(&lflows->multiple_od, &lflow->hmap_node)) {
>>           hmap_remove(&lflows->single_od, &lflow->hmap_node);
>>       }
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        unlock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>   }
>>     static void
>> @@ -6511,7 +6606,7 @@ build_lb_rules(struct lflow_state *lflows, 
>> struct ovn_northd_lb *lb,
>>               if (reject) {
>>                   meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
>>                                          meter_groups);
>> -            } else if (ovn_dp_group_add_with_reference(lflow_ref, 
>> od, hash)) {
>> +            } else if (ovn_dp_group_add_with_reference(lflow_ref, 
>> od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9565,7 +9660,7 @@ build_lrouter_defrag_flows_for_lb(struct 
>> ovn_northd_lb *lb,
>>                   ds_cstr(match), ds_cstr(&defrag_actions));
>>           for (size_t j = 0; j < lb->n_nb_lr; j++) {
>>               struct ovn_datapath *od = lb->nb_lr[j];
>> -            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9629,7 +9724,7 @@ build_lflows_for_unreachable_vips(struct 
>> ovn_northd_lb *lb,
>>                   continue;
>>               }
>>   -            if (ovn_dp_group_add_with_reference(lflow_ref, 
>> peer->od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
>> @@ -13081,7 +13176,7 @@ build_lswitch_and_lrouter_flows(struct hmap 
>> *datapaths, struct hmap *ports,
>>           }
>>       }
>>   -    if (use_parallel_build && (!use_logical_dp_groups)) {
>> +    if (use_parallel_build) {
>>           struct lflow_state *lflow_segs;
>>           struct lswitch_flow_build_info *lsiv;
>>           int index;
>> @@ -13319,6 +13414,8 @@ reconcile_lflow(struct ovn_lflow *lflow, 
>> struct northd_context *ctx,
>>       ovn_lflow_destroy(lflows, lflow);
>>   }
>>   +static bool needs_init = true;
>
> I think this variable should be renamed. "needs_init" directly relates 
> to the flowtable_lock, but that is not reflected in the variable name. 
> Also since this is only used in build_lflows, we could probably move 
> this declaration to within build_lflows.
>
>> +
>>   /* Updates the Logical_Flow and Multicast_Group tables in the 
>> OVN_SB database,
>>    * constructing their contents based on the OVN_NB database. */
>>   static void
>> @@ -13333,8 +13430,15 @@ build_lflows(struct northd_context *ctx, 
>> struct hmap *datapaths,
>>         fast_hmap_size_for(&lflows.single_od, max_seen_lflow_size);
>>       fast_hmap_size_for(&lflows.multiple_od, max_seen_lflow_size);
>> -    if (use_parallel_build && use_logical_dp_groups) {
>> -        update_hashrow_locks(&lflows.single_od, &lflow_locks);
>> +    if (use_parallel_build && use_logical_dp_groups && needs_init) {
>> +        ovs_rwlock_init(&flowtable_lock);
>> +        /* This happens on first run with parallel+dp_groups.
>> +         * db_run will re-read use_parallel_build from config and
>> +         * reset it. This way we get correct sizing for
>> +         * parallel + dp_groups by doing one single threaded run
>> +         * on the first iteration. */
>> +        use_parallel_build = false;
>> +        needs_init = false;
>>       }
>>     @@ -15293,7 +15397,6 @@ main(int argc, char *argv[])
>>         daemonize_complete();
>>   -    init_hash_row_locks(&lflow_locks);
>>       use_parallel_build = can_parallelize_hashes(false);
>>         /* We want to detect (almost) all changes to the ovn-nb db. */
>>
>
> -----
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 4537c55dd..81ad929ec 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4394,142 +4394,143 @@ static void ovn_make_multi_lflow(struct 
> ovn_lflow *old_lflow,
>  #pragma clang diagnostic ignored "-Wthread-safety"
>  #endif
>
> -/* Adds a row with the specified contents to the Logical_Flow table.
> - * Version to use when locking is required.
> - */
>  static struct ovn_lflow *
> -do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> -                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> -                 const char *match, const char *actions, const char 
> *io_port,
> -                 const struct ovsdb_idl_row *stage_hint,
> -                 const char *where, const char *ctrl_meter)
> +do_ovn_lflow_add_simple(struct lflow_state *lflow_map, uint32_t hash,
> +                        enum ovn_stage stage, uint16_t priority,
> +                        const char *match, const char *actions,
> +                        const char *io_port,
> +                        const struct ovsdb_idl_row *stage_hint,
> +                        const char *where, const char *ctrl_meter)
> +
> +{
> +    struct ovn_lflow *lflow;
> +
> +    lflow = xmalloc(sizeof *lflow);
> +
> +    /* While adding new logical flows we are not setting single 
> datapath,
> +     * but collecting a group.  'od' will be updated later for all flows
> +     * with only one datapath in a group, so it could be hashed 
> correctly.
> +     */
> +    ovn_lflow_init(lflow, NULL, stage, priority,
> +                   xstrdup(match), xstrdup(actions),
> +                   io_port ? xstrdup(io_port) : NULL,
> +                   nullable_xstrdup(ctrl_meter),
> +                   ovn_lflow_hint(stage_hint), where);
> +    if (use_parallel_build) {
> +        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, 
> hash);
> +    } else {
> +        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_with_groups(struct lflow_state *lflow_map,
> +                             struct ovn_datapath *od, uint32_t hash,
> +                             enum ovn_stage stage, uint16_t priority,
> +                             const char *match, const char *actions,
> +                             const char *io_port,
> +                             const struct ovsdb_idl_row *stage_hint,
> +                             const char *where, const char *ctrl_meter)
>  {
> +    struct ovn_lflow *lflow;
> +    ovs_assert(use_logical_dp_groups);
> +
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                              NULL, stage, priority, match,
> +                              actions, ctrl_meter, hash);
>
> -    struct ovn_lflow *old_lflow;
> +    if (lflow) {
> +        ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> +    } else {
> +        lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> +                                  stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    }
> +
> +    if (!lflow) {
> +        lflow = do_ovn_lflow_add_simple(lflow_map, hash, stage, 
> priority,
> +                                        match, actions, io_port, 
> stage_hint,
> +                                        where, ctrl_meter);
> +    }
> +
> +    if (!hmapx_contains(&lflow->od_group, od)) {
> +        hmapx_add(&lflow->od_group, od);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_parallel_with_groups(struct lflow_state *lflow_map,
> +                                      struct ovn_datapath *od,
> +                                      uint32_t hash, enum ovn_stage 
> stage,
> +                                      uint16_t priority, const char 
> *match,
> +                                      const char *actions, const char 
> *io_port,
> +                                      const struct ovsdb_idl_row 
> *stage_hint,
> +                                      const char *where, const char 
> *ctrl_meter)
> +{
>      struct ovn_lflow *lflow;
>
> +    ovs_assert(use_parallel_build && use_logical_dp_groups);
> +
>      /* Fast Path.
>       * See if we can get away without writing - grab a rdlock and check
>       * if we can get away with as little work as possible.
>       */
> -
> -    if (use_logical_dp_groups) {
> -        if (use_parallel_build) {
> -            ovs_rwlock_rdlock(&flowtable_lock);
> -        }
> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                      NULL, stage, priority, match,
> -                                      actions, ctrl_meter, hash);
> -        if (old_lflow) {
> -            if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                /* od not in od_group, we need to add it and move to
> -                 * multiple. */
> -                if (use_parallel_build) {
> -                    /* Upgrade the lock to write, we are likely to
> -                     * modify data. */
> -                    ovs_rwlock_unlock(&flowtable_lock);
> -                    ovs_rwlock_wrlock(&flowtable_lock);
> -
> -                    /* Check if someone got ahead of us and the flow 
> is already
> -                     * in multiple. */
> -                    if (!hmap_contains(&lflow_map->single_od,
> - &old_lflow->hmap_node)) {
> -                        /* Someone did get ahead of us, add the od to 
> the
> -                         * group. */
> -                        hmapx_add(&old_lflow->od_group, od);
> -                        goto done_update_unlock;
> -                    }
> -                }
> -                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
> -                goto done_update_unlock;
> -            }
> -        }
> -        if (!old_lflow) {
> -            /* Not found in single, lookup in multiple od. */
> -            old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (old_lflow) {
> -                if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                    if (use_parallel_build) {
> -                        /* Upgrade lock to write.*/
> -                        ovs_rwlock_unlock(&flowtable_lock);
> -                        ovs_rwlock_wrlock(&flowtable_lock);
> -                    }
> -                    hmapx_add(&old_lflow->od_group, od);
> -                }
> -            }
> -        }
> -done_update_unlock:
> -        if (use_parallel_build) {
> -            ovs_rwlock_unlock(&flowtable_lock);
> -        }
> -        if (old_lflow) {
> -            return old_lflow;
> -        }
> +    ovs_rwlock_rdlock(&flowtable_lock);
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                                  NULL, stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    if (lflow && hmapx_contains(&lflow->od_group, od)) {
> +        /* This flow already has this datapath in the group. Just 
> return it.*/
> +        goto end;
>      }
>
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
>      /* Slow Path.
>       * We could not get away with minimal mostly ro amount of work,
>       * lock with rw and try to do an insert (may end up repeating
>       * some of what we do for ro). */
> +    ovs_rwlock_wrlock(&flowtable_lock);
> +
> +    lflow = do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                         priority, match, actions, 
> io_port,
> +                                         stage_hint, where, ctrl_meter);
> +
> +end:
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
> +    return lflow;
> +}
> +
> +/* Adds a row with the specified contents to the Logical_Flow table.
> + * Version to use when locking is required.
> + */
> +static struct ovn_lflow *
> +do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> +                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> +                 const char *match, const char *actions, const char 
> *io_port,
> +                 const struct ovsdb_idl_row *stage_hint,
> +                 const char *where, const char *ctrl_meter)
> +{
>
>      if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_wrlock(&flowtable_lock);
> -    }
> -    if (!use_parallel_build) {
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we are not setting single 
> datapath,
> -         * but collecting a group.  'od' will be updated later for 
> all flows
> -         * with only one datapath in a group, so it could be hashed 
> correctly.
> -         */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +        return do_ovn_lflow_add_parallel_with_groups(lflow_map, od, 
> hash, stage,
> +                                                     priority, match, 
> actions,
> +                                                     io_port, 
> stage_hint, where,
> +                                                     ctrl_meter);
> +    } else if (use_logical_dp_groups) {
> +        return do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                            priority, match, actions, 
> io_port,
> +                                            stage_hint, where, 
> ctrl_meter);
>      } else {
> -        if (use_logical_dp_groups) {
> -            /* Search again in case someone else got here first. */
> -            lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                          NULL, stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                if (!hmapx_contains(&lflow->od_group, od)) {
> -                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> -                }
> -                goto done_add_unlock;
> -            }
> -            /* Unlikely, but possible, more than one thread got here
> -             * ahead of us while we were wating to acquire a write 
> lock. */
> -            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                hmapx_add(&lflow->od_group, od);
> -                goto done_add_unlock;
> -            }
> -        }
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we're not setting single 
> datapath,
> -         * but collecting a group.  'od' will be updated later for all
> -         * flows with only * one datapath in a group, so it could be 
> hashed
> -         * correctly. */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, 
> hash);
> +        return do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
> +                                       match, actions, io_port, 
> stage_hint,
> +                                       where, ctrl_meter);
>      }
> -done_add_unlock:
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_unlock(&flowtable_lock);
> -    }
> -    return lflow;
>  }
>
>  static struct ovn_lflow *
> ----
>
>
Anton Ivanov Sept. 10, 2021, 7:09 a.m. UTC | #3
On 09/09/2021 22:02, Mark Michelson wrote:
> Hi Anton,
>
> On a high level, this change results in some parts of the parallelized 
> hashmap being unused. Should we keep the hashrow_locks structure and 
> its APIs, for instance?
>
> See below for more comments.

Addressing those separately from the API discussion.

>
> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Restore parallel build with dp groups using rwlock instead
>> of per row locking as an underlying mechanism.
>>
>> This provides improvement ~ 10% end-to-end on ovn-heater
>> under virutalization despite awakening some qemu gremlin
>> which makes qemu climb to silly CPU usage. The gain on
>> bare metal is likely to be higher.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 159 insertions(+), 56 deletions(-)
>>
>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>> index 1f903882b..4537c55dd 100644
>> --- a/northd/ovn-northd.c
>> +++ b/northd/ovn-northd.c
>> @@ -59,6 +59,7 @@
>>   #include "unixctl.h"
>>   #include "util.h"
>>   #include "uuid.h"
>> +#include "ovs-thread.h"
>>   #include "openvswitch/vlog.h"
>>     VLOG_DEFINE_THIS_MODULE(ovn_northd);
>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
>> ovn_datapath *od,
>>   static bool use_logical_dp_groups = false;
>>   static bool use_parallel_build = true;
>>   -static struct hashrow_locks lflow_locks;
>> +static struct ovs_rwlock flowtable_lock;
>
> With the change from the custom hashrow_locks to using an ovs_rwlock, 
> I think it's important to document what data this lock is intending to 
> protect.
>
> From what I can tell, this lock specifically is intended to protect 
> access to the hmaps in the global lflow_state structure, and it's also 
> intended to protect all ovn_datapaths' od_group hmaps. This is not 
> something that is immediately obvious just from a global rwlock 
> declaration.


Ack.


>
>> +
>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>> +                              struct ovn_datapath *od,
>> +                              struct lflow_state *lflow_map,
>> +                              uint32_t hash)
>> +{
>> +    hmapx_add(&old_lflow->od_group, od);
>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>> +    if (use_parallel_build) {
>> +        hmap_insert_fast(&lflow_map->multiple_od, 
>> &old_lflow->hmap_node, hash);
>> +    } else {
>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, 
>> hash);
>> +    }
>> +}
>> +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>
> The section above needs a comment to explain why -Wthread-safety is 
> ignored.


Ack.


>
>>     /* Adds a row with the specified contents to the Logical_Flow table.
>>    * Version to use when locking is required.
>> @@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state 
>> *lflow_map, struct ovn_datapath *od,
>>       struct ovn_lflow *old_lflow;
>>       struct ovn_lflow *lflow;
>>   +    /* Fast Path.
>> +     * See if we can get away without writing - grab a rdlock and check
>> +     * if we can get away with as little work as possible.
>> +     */
>> +
>>       if (use_logical_dp_groups) {
>> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od, NULL, 
>> stage,
>> -                                      priority, match,
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_rdlock(&flowtable_lock);
>> +        }
>> +        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                      NULL, stage, priority, match,
>>                                         actions, ctrl_meter, hash);
>>           if (old_lflow) {
>> -            hmapx_add(&old_lflow->od_group, od);
>> -            /* Found, different, od count went up. Move to multiple 
>> od. */
>> -            if (hmapx_count(&old_lflow->od_group) > 1) {
>> -                hmap_remove(&lflow_map->single_od, 
>> &old_lflow->hmap_node);
>> +            if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                /* od not in od_group, we need to add it and move to
>> +                 * multiple. */
>>                   if (use_parallel_build) {
>> - hmap_insert_fast(&lflow_map->multiple_od,
>> - &old_lflow->hmap_node, hash);
>> -                } else {
>> -                    hmap_insert(&lflow_map->multiple_od,
>> -                                &old_lflow->hmap_node, hash);
>> +                    /* Upgrade the lock to write, we are likely to
>> +                     * modify data. */
>> +                    ovs_rwlock_unlock(&flowtable_lock);
>> +                    ovs_rwlock_wrlock(&flowtable_lock);
>> +
>> +                    /* Check if someone got ahead of us and the flow 
>> is already
>> +                     * in multiple. */
>> +                    if (!hmap_contains(&lflow_map->single_od,
>> + &old_lflow->hmap_node)) {
>
> The logic here is fine, but that comment paired with that if statement 
> is strange. Either
>
> a) Change the comment to say "Check if someone got ahead of us and the 
> flow has been removed from single."

Ack.

>
> or
>
> b) Change the if statement to "if 
> (hmap_contains(&lflow_map->multiple_od, &old_lflow->hmap_node))"
>
>> +                        /* Someone did get ahead of us, add the od 
>> to the
>> +                         * group. */
>> +                        hmapx_add(&old_lflow->od_group, od);
>> +                        goto done_update_unlock;
>> +                    }
>>                   }
>> +                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
>> +                goto done_update_unlock;
>>               }
>> -        } else {
>> -            /* Not found, lookup in multiple od. */
>> +        }
>> +        if (!old_lflow) {
>
> I think this "if (!old_lflow)" can be left as an "else" instead. This 
> would allow for you to remove one of the goto statements in the 
> previous section.

Yes. That is a remnant from an earlier version where there was an extra 
lookup.

>
>> +            /* Not found in single, lookup in multiple od. */
>>               old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, 
>> NULL,
>>                                             stage, priority, match,
>>                                             actions, ctrl_meter, hash);
>>               if (old_lflow) {
>> -                hmapx_add(&old_lflow->od_group, od);
>> +                if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                    if (use_parallel_build) {
>> +                        /* Upgrade lock to write.*/
>> +                        ovs_rwlock_unlock(&flowtable_lock);
>> +                        ovs_rwlock_wrlock(&flowtable_lock);
>> +                    }
>> +                    hmapx_add(&old_lflow->od_group, od);
>> +                }
>>               }
>>           }
>> +done_update_unlock:
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +        }
>>           if (old_lflow) {
>>               return old_lflow;
>>           }
>>       }
>>   -    lflow = xmalloc(sizeof *lflow);
>> -    /* While adding new logical flows we're not setting single 
>> datapath, but
>> -     * collecting a group.  'od' will be updated later for all flows 
>> with only
>> -     * one datapath in a group, so it could be hashed correctly. */
>> -    ovn_lflow_init(lflow, NULL, stage, priority,
>> -                   xstrdup(match), xstrdup(actions),
>> -                   io_port ? xstrdup(io_port) : NULL,
>> -                   nullable_xstrdup(ctrl_meter),
>> -                   ovn_lflow_hint(stage_hint), where);
>> -    hmapx_add(&lflow->od_group, od);
>> -
>> -    /* Insert "fresh" lflows into single_od. */
>> +    /* Slow Path.
>> +     * We could not get away with minimal mostly ro amount of work,
>> +     * lock with rw and try to do an insert (may end up repeating
>> +     * some of what we do for ro). */
>>   +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_wrlock(&flowtable_lock);
>> +    }
>>       if (!use_parallel_build) {
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we are not setting single 
>> datapath,
>> +         * but collecting a group.  'od' will be updated later for 
>> all flows
>> +         * with only one datapath in a group, so it could be hashed 
>> correctly.
>> +         */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>>           hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       } else {
>> +        if (use_logical_dp_groups) {
>> +            /* Search again in case someone else got here first. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                          NULL, stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                if (!hmapx_contains(&lflow->od_group, od)) {
>> +                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
>> +                }
>> +                goto done_add_unlock;
>> +            }
>> +            /* Unlikely, but possible, more than one thread got here
>> +             * ahead of us while we were wating to acquire a write 
>> lock. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
>> +                                          stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                hmapx_add(&lflow->od_group, od);
>> +                goto done_add_unlock;
>> +            }
>> +        }
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we're not setting single 
>> datapath,
>> +         * but collecting a group.  'od' will be updated later for all
>> +         * flows with only * one datapath in a group, so it could be 
>> hashed
>> +         * correctly. */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>
> This section can run if we are not using logical dp groups. Should 
> this hmapx_add() call have "if (use_logical_dp_groups)" as a 
> precondition?

That is the case in the original code before the optimization to make 
multiple and single generated on the fly. I did not want to change it as 
I do not fully understand all the places where this is used. You need to 
ask Ilya on this.

>
>> hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       }
>> +done_add_unlock:
>> +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_unlock(&flowtable_lock);
>> +    }
>>       return lflow;
>>   }
>
> I found the logic of do_ovn_lflow_add() to be very difficult to 
> follow. It was hard to keep in mind what conditions resulted in locks 
> being held, and what the current state of the locks was. It also 
> contains a lot of repeated code. I have a suggested rewrite of this 
> function that I am placing at the bottom of this email. I think I have 
> the logic the same as what you have, but I think it's a lot easier to 
> follow what's happening. Let me know what you think.

Such is life in the world where someone can steal an item from under 
your feet in a different thread and move it to a different hash.

It can be documented in a more thorough fashion, but it will be 
difficult to make it more readable without sacrificing performance.

>
>>   @@ -4449,22 +4541,16 @@ ovn_lflow_add_at_with_hash(struct 
>> lflow_state *lflow_map,
>>                              const struct ovsdb_idl_row *stage_hint,
>>                              const char *where, uint32_t hash)
>>   {
>> -    struct ovn_lflow *lflow;
>> -
>>       ovs_assert(ovn_stage_to_datapath_type(stage) == 
>> ovn_datapath_get_type(od));
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, 
>> priority, match,
>> +    return do_ovn_lflow_add(lflow_map, od, hash, stage, priority, 
>> match,
>>                                    actions, io_port, stage_hint, where,
>>                                    ctrl_meter);
>> -        unlock_hash_row(&lflow_locks, hash);
>> -    } else {
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, 
>> priority, match,
>> -                         actions, io_port, stage_hint, where, 
>> ctrl_meter);
>> -    }
>> -    return lflow;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow 
>> table. */
>>   static void
>>   ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath 
>> *od,
>> @@ -4483,19 +4569,29 @@ ovn_lflow_add_at(struct lflow_state 
>> *lflow_map, struct ovn_datapath *od,
>>                                  io_port, ctrl_meter, stage_hint, 
>> where, hash);
>>   }
>>   +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>> +
>>   static bool
>>   ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
>> -                                struct ovn_datapath *od,
>> -                                uint32_t hash)
>> +                                struct ovn_datapath *od)
>>   {
>>       if (!use_logical_dp_groups || !lflow_ref) {
>>           return false;
>>       }
>>         if (use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        hmapx_add(&lflow_ref->od_group, od);
>> -        unlock_hash_row(&lflow_locks, hash);
>> +        ovs_rwlock_rdlock(&flowtable_lock);
>> +        if (!hmapx_contains(&lflow_ref->od_group, od)) {
>> +            /* Upgrade lock to write. */
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +            ovs_rwlock_wrlock(&flowtable_lock);
>> +            hmapx_add(&lflow_ref->od_group, od);
>> +        }
>> +        ovs_rwlock_unlock(&flowtable_lock);
>>       } else {
>>           hmapx_add(&lflow_ref->od_group, od);
>>       }
>> @@ -4503,6 +4599,11 @@ ovn_dp_group_add_with_reference(struct 
>> ovn_lflow *lflow_ref,
>>       return true;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow 
>> table. */
>>   #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, 
>> MATCH, \
>>                                     ACTIONS, IN_OUT_PORT, CTRL_METER, \
>> @@ -4594,15 +4695,9 @@ hmap_safe_remove(struct hmap *hmap, struct 
>> hmap_node *node)
>>   static void
>>   remove_lflow_from_lflows(struct lflow_state *lflows, struct 
>> ovn_lflow *lflow)
>>   {
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>       if (!hmap_safe_remove(&lflows->multiple_od, &lflow->hmap_node)) {
>>           hmap_remove(&lflows->single_od, &lflow->hmap_node);
>>       }
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        unlock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>   }
>>     static void
>> @@ -6511,7 +6606,7 @@ build_lb_rules(struct lflow_state *lflows, 
>> struct ovn_northd_lb *lb,
>>               if (reject) {
>>                   meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
>>                                          meter_groups);
>> -            } else if (ovn_dp_group_add_with_reference(lflow_ref, 
>> od, hash)) {
>> +            } else if (ovn_dp_group_add_with_reference(lflow_ref, 
>> od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9565,7 +9660,7 @@ build_lrouter_defrag_flows_for_lb(struct 
>> ovn_northd_lb *lb,
>>                   ds_cstr(match), ds_cstr(&defrag_actions));
>>           for (size_t j = 0; j < lb->n_nb_lr; j++) {
>>               struct ovn_datapath *od = lb->nb_lr[j];
>> -            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9629,7 +9724,7 @@ build_lflows_for_unreachable_vips(struct 
>> ovn_northd_lb *lb,
>>                   continue;
>>               }
>>   -            if (ovn_dp_group_add_with_reference(lflow_ref, 
>> peer->od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
>> @@ -13081,7 +13176,7 @@ build_lswitch_and_lrouter_flows(struct hmap 
>> *datapaths, struct hmap *ports,
>>           }
>>       }
>>   -    if (use_parallel_build && (!use_logical_dp_groups)) {
>> +    if (use_parallel_build) {
>>           struct lflow_state *lflow_segs;
>>           struct lswitch_flow_build_info *lsiv;
>>           int index;
>> @@ -13319,6 +13414,8 @@ reconcile_lflow(struct ovn_lflow *lflow, 
>> struct northd_context *ctx,
>>       ovn_lflow_destroy(lflows, lflow);
>>   }
>>   +static bool needs_init = true;
>
> I think this variable should be renamed. "needs_init" directly relates 
> to the flowtable_lock, but that is not reflected in the variable name. 
> Also since this is only used in build_lflows, we could probably move 
> this declaration to within build_lflows.

Ack.

>
>> +
>>   /* Updates the Logical_Flow and Multicast_Group tables in the 
>> OVN_SB database,
>>    * constructing their contents based on the OVN_NB database. */
>>   static void
>> @@ -13333,8 +13430,15 @@ build_lflows(struct northd_context *ctx, 
>> struct hmap *datapaths,
>>         fast_hmap_size_for(&lflows.single_od, max_seen_lflow_size);
>>       fast_hmap_size_for(&lflows.multiple_od, max_seen_lflow_size);
>> -    if (use_parallel_build && use_logical_dp_groups) {
>> -        update_hashrow_locks(&lflows.single_od, &lflow_locks);
>> +    if (use_parallel_build && use_logical_dp_groups && needs_init) {
>> +        ovs_rwlock_init(&flowtable_lock);
>> +        /* This happens on first run with parallel+dp_groups.
>> +         * db_run will re-read use_parallel_build from config and
>> +         * reset it. This way we get correct sizing for
>> +         * parallel + dp_groups by doing one single threaded run
>> +         * on the first iteration. */
>> +        use_parallel_build = false;
>> +        needs_init = false;
>>       }
>>     @@ -15293,7 +15397,6 @@ main(int argc, char *argv[])
>>         daemonize_complete();
>>   -    init_hash_row_locks(&lflow_locks);
>>       use_parallel_build = can_parallelize_hashes(false);
>>         /* We want to detect (almost) all changes to the ovn-nb db. */
>>
>
> -----
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 4537c55dd..81ad929ec 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4394,142 +4394,143 @@ static void ovn_make_multi_lflow(struct 
> ovn_lflow *old_lflow,
>  #pragma clang diagnostic ignored "-Wthread-safety"
>  #endif
>
> -/* Adds a row with the specified contents to the Logical_Flow table.
> - * Version to use when locking is required.
> - */
>  static struct ovn_lflow *
> -do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> -                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> -                 const char *match, const char *actions, const char 
> *io_port,
> -                 const struct ovsdb_idl_row *stage_hint,
> -                 const char *where, const char *ctrl_meter)
> +do_ovn_lflow_add_simple(struct lflow_state *lflow_map, uint32_t hash,
> +                        enum ovn_stage stage, uint16_t priority,
> +                        const char *match, const char *actions,
> +                        const char *io_port,
> +                        const struct ovsdb_idl_row *stage_hint,
> +                        const char *where, const char *ctrl_meter)
> +
> +{
> +    struct ovn_lflow *lflow;
> +
> +    lflow = xmalloc(sizeof *lflow);
> +
> +    /* While adding new logical flows we are not setting single 
> datapath,
> +     * but collecting a group.  'od' will be updated later for all flows
> +     * with only one datapath in a group, so it could be hashed 
> correctly.
> +     */
> +    ovn_lflow_init(lflow, NULL, stage, priority,
> +                   xstrdup(match), xstrdup(actions),
> +                   io_port ? xstrdup(io_port) : NULL,
> +                   nullable_xstrdup(ctrl_meter),
> +                   ovn_lflow_hint(stage_hint), where);
> +    if (use_parallel_build) {
> +        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, 
> hash);
> +    } else {
> +        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_with_groups(struct lflow_state *lflow_map,
> +                             struct ovn_datapath *od, uint32_t hash,
> +                             enum ovn_stage stage, uint16_t priority,
> +                             const char *match, const char *actions,
> +                             const char *io_port,
> +                             const struct ovsdb_idl_row *stage_hint,
> +                             const char *where, const char *ctrl_meter)
>  {
> +    struct ovn_lflow *lflow;
> +    ovs_assert(use_logical_dp_groups);
> +
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                              NULL, stage, priority, match,
> +                              actions, ctrl_meter, hash);
>
> -    struct ovn_lflow *old_lflow;
> +    if (lflow) {
> +        ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> +    } else {
> +        lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> +                                  stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    }
> +
> +    if (!lflow) {
> +        lflow = do_ovn_lflow_add_simple(lflow_map, hash, stage, 
> priority,
> +                                        match, actions, io_port, 
> stage_hint,
> +                                        where, ctrl_meter);
> +    }
> +
> +    if (!hmapx_contains(&lflow->od_group, od)) {
> +        hmapx_add(&lflow->od_group, od);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_parallel_with_groups(struct lflow_state *lflow_map,
> +                                      struct ovn_datapath *od,
> +                                      uint32_t hash, enum ovn_stage 
> stage,
> +                                      uint16_t priority, const char 
> *match,
> +                                      const char *actions, const char 
> *io_port,
> +                                      const struct ovsdb_idl_row 
> *stage_hint,
> +                                      const char *where, const char 
> *ctrl_meter)
> +{
>      struct ovn_lflow *lflow;
>
> +    ovs_assert(use_parallel_build && use_logical_dp_groups);
> +
>      /* Fast Path.
>       * See if we can get away without writing - grab a rdlock and check
>       * if we can get away with as little work as possible.
>       */
> -
> -    if (use_logical_dp_groups) {
> -        if (use_parallel_build) {
> -            ovs_rwlock_rdlock(&flowtable_lock);
> -        }
> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                      NULL, stage, priority, match,
> -                                      actions, ctrl_meter, hash);
> -        if (old_lflow) {
> -            if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                /* od not in od_group, we need to add it and move to
> -                 * multiple. */
> -                if (use_parallel_build) {
> -                    /* Upgrade the lock to write, we are likely to
> -                     * modify data. */
> -                    ovs_rwlock_unlock(&flowtable_lock);
> -                    ovs_rwlock_wrlock(&flowtable_lock);
> -
> -                    /* Check if someone got ahead of us and the flow 
> is already
> -                     * in multiple. */
> -                    if (!hmap_contains(&lflow_map->single_od,
> - &old_lflow->hmap_node)) {
> -                        /* Someone did get ahead of us, add the od to 
> the
> -                         * group. */
> -                        hmapx_add(&old_lflow->od_group, od);
> -                        goto done_update_unlock;
> -                    }
> -                }
> -                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
> -                goto done_update_unlock;
> -            }
> -        }
> -        if (!old_lflow) {
> -            /* Not found in single, lookup in multiple od. */
> -            old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (old_lflow) {
> -                if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                    if (use_parallel_build) {
> -                        /* Upgrade lock to write.*/
> -                        ovs_rwlock_unlock(&flowtable_lock);
> -                        ovs_rwlock_wrlock(&flowtable_lock);
> -                    }
> -                    hmapx_add(&old_lflow->od_group, od);
> -                }
> -            }
> -        }
> -done_update_unlock:
> -        if (use_parallel_build) {
> -            ovs_rwlock_unlock(&flowtable_lock);
> -        }
> -        if (old_lflow) {
> -            return old_lflow;
> -        }
> +    ovs_rwlock_rdlock(&flowtable_lock);
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                                  NULL, stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    if (lflow && hmapx_contains(&lflow->od_group, od)) {
> +        /* This flow already has this datapath in the group. Just 
> return it.*/
> +        goto end;
>      }
>
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
>      /* Slow Path.
>       * We could not get away with minimal mostly ro amount of work,
>       * lock with rw and try to do an insert (may end up repeating
>       * some of what we do for ro). */
> +    ovs_rwlock_wrlock(&flowtable_lock);
> +
> +    lflow = do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                         priority, match, actions, 
> io_port,
> +                                         stage_hint, where, ctrl_meter);
> +
> +end:
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
> +    return lflow;
> +}
> +
> +/* Adds a row with the specified contents to the Logical_Flow table.
> + * Version to use when locking is required.
> + */
> +static struct ovn_lflow *
> +do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> +                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> +                 const char *match, const char *actions, const char 
> *io_port,
> +                 const struct ovsdb_idl_row *stage_hint,
> +                 const char *where, const char *ctrl_meter)
> +{
>
>      if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_wrlock(&flowtable_lock);
> -    }
> -    if (!use_parallel_build) {
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we are not setting single 
> datapath,
> -         * but collecting a group.  'od' will be updated later for 
> all flows
> -         * with only one datapath in a group, so it could be hashed 
> correctly.
> -         */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +        return do_ovn_lflow_add_parallel_with_groups(lflow_map, od, 
> hash, stage,
> +                                                     priority, match, 
> actions,
> +                                                     io_port, 
> stage_hint, where,
> +                                                     ctrl_meter);
> +    } else if (use_logical_dp_groups) {
> +        return do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                            priority, match, actions, 
> io_port,
> +                                            stage_hint, where, 
> ctrl_meter);
>      } else {
> -        if (use_logical_dp_groups) {
> -            /* Search again in case someone else got here first. */
> -            lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                          NULL, stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                if (!hmapx_contains(&lflow->od_group, od)) {
> -                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> -                }
> -                goto done_add_unlock;
> -            }
> -            /* Unlikely, but possible, more than one thread got here
> -             * ahead of us while we were wating to acquire a write 
> lock. */
> -            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                hmapx_add(&lflow->od_group, od);
> -                goto done_add_unlock;
> -            }
> -        }
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we're not setting single 
> datapath,
> -         * but collecting a group.  'od' will be updated later for all
> -         * flows with only * one datapath in a group, so it could be 
> hashed
> -         * correctly. */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, 
> hash);
> +        return do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
> +                                       match, actions, io_port, 
> stage_hint,
> +                                       where, ctrl_meter);
>      }
> -done_add_unlock:
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_unlock(&flowtable_lock);
> -    }
> -    return lflow;
>  }
>
>  static struct ovn_lflow *
> ----
>
>
Anton Ivanov Sept. 10, 2021, 8:17 a.m. UTC | #4
Hi Mark,

I have it rebased and amended to address the comments on my branch with the "Thread API changes"  where the thread pools can be adjusted and that is ready to be posted.

I can do a version off master as well, but that will need a rebase if we merge the thread api changes.

All up to you - which one do you want and in which order?

A.

On 09/09/2021 22:02, Mark Michelson wrote:
> Hi Anton,
>
> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>
> See below for more comments.
>
> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Restore parallel build with dp groups using rwlock instead
>> of per row locking as an underlying mechanism.
>>
>> This provides improvement ~ 10% end-to-end on ovn-heater
>> under virutalization despite awakening some qemu gremlin
>> which makes qemu climb to silly CPU usage. The gain on
>> bare metal is likely to be higher.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 159 insertions(+), 56 deletions(-)
>>
>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>> index 1f903882b..4537c55dd 100644
>> --- a/northd/ovn-northd.c
>> +++ b/northd/ovn-northd.c
>> @@ -59,6 +59,7 @@
>>   #include "unixctl.h"
>>   #include "util.h"
>>   #include "uuid.h"
>> +#include "ovs-thread.h"
>>   #include "openvswitch/vlog.h"
>>     VLOG_DEFINE_THIS_MODULE(ovn_northd);
>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>   static bool use_logical_dp_groups = false;
>>   static bool use_parallel_build = true;
>>   -static struct hashrow_locks lflow_locks;
>> +static struct ovs_rwlock flowtable_lock;
>
> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>
> From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>
>> +
>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>> +                              struct ovn_datapath *od,
>> +                              struct lflow_state *lflow_map,
>> +                              uint32_t hash)
>> +{
>> +    hmapx_add(&old_lflow->od_group, od);
>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>> +    if (use_parallel_build) {
>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>> +    } else {
>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>> +    }
>> +}
>> +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>
> The section above needs a comment to explain why -Wthread-safety is ignored.
>
>>     /* Adds a row with the specified contents to the Logical_Flow table.
>>    * Version to use when locking is required.
>> @@ -4388,55 +4408,127 @@ do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
>>       struct ovn_lflow *old_lflow;
>>       struct ovn_lflow *lflow;
>>   +    /* Fast Path.
>> +     * See if we can get away without writing - grab a rdlock and check
>> +     * if we can get away with as little work as possible.
>> +     */
>> +
>>       if (use_logical_dp_groups) {
>> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od, NULL, stage,
>> -                                      priority, match,
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_rdlock(&flowtable_lock);
>> +        }
>> +        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                      NULL, stage, priority, match,
>>                                         actions, ctrl_meter, hash);
>>           if (old_lflow) {
>> -            hmapx_add(&old_lflow->od_group, od);
>> -            /* Found, different, od count went up. Move to multiple od. */
>> -            if (hmapx_count(&old_lflow->od_group) > 1) {
>> -                hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>> +            if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                /* od not in od_group, we need to add it and move to
>> +                 * multiple. */
>>                   if (use_parallel_build) {
>> - hmap_insert_fast(&lflow_map->multiple_od,
>> - &old_lflow->hmap_node, hash);
>> -                } else {
>> -                    hmap_insert(&lflow_map->multiple_od,
>> -                                &old_lflow->hmap_node, hash);
>> +                    /* Upgrade the lock to write, we are likely to
>> +                     * modify data. */
>> +                    ovs_rwlock_unlock(&flowtable_lock);
>> +                    ovs_rwlock_wrlock(&flowtable_lock);
>> +
>> +                    /* Check if someone got ahead of us and the flow is already
>> +                     * in multiple. */
>> +                    if (!hmap_contains(&lflow_map->single_od,
>> + &old_lflow->hmap_node)) {
>
> The logic here is fine, but that comment paired with that if statement is strange. Either
>
> a) Change the comment to say "Check if someone got ahead of us and the flow has been removed from single."
>
> or
>
> b) Change the if statement to "if (hmap_contains(&lflow_map->multiple_od, &old_lflow->hmap_node))"
>
>> +                        /* Someone did get ahead of us, add the od to the
>> +                         * group. */
>> +                        hmapx_add(&old_lflow->od_group, od);
>> +                        goto done_update_unlock;
>> +                    }
>>                   }
>> +                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
>> +                goto done_update_unlock;
>>               }
>> -        } else {
>> -            /* Not found, lookup in multiple od. */
>> +        }
>> +        if (!old_lflow) {
>
> I think this "if (!old_lflow)" can be left as an "else" instead. This would allow for you to remove one of the goto statements in the previous section.
>
>> +            /* Not found in single, lookup in multiple od. */
>>               old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
>>                                             stage, priority, match,
>>                                             actions, ctrl_meter, hash);
>>               if (old_lflow) {
>> -                hmapx_add(&old_lflow->od_group, od);
>> +                if (!hmapx_contains(&old_lflow->od_group, od)) {
>> +                    if (use_parallel_build) {
>> +                        /* Upgrade lock to write.*/
>> +                        ovs_rwlock_unlock(&flowtable_lock);
>> +                        ovs_rwlock_wrlock(&flowtable_lock);
>> +                    }
>> +                    hmapx_add(&old_lflow->od_group, od);
>> +                }
>>               }
>>           }
>> +done_update_unlock:
>> +        if (use_parallel_build) {
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +        }
>>           if (old_lflow) {
>>               return old_lflow;
>>           }
>>       }
>>   -    lflow = xmalloc(sizeof *lflow);
>> -    /* While adding new logical flows we're not setting single datapath, but
>> -     * collecting a group.  'od' will be updated later for all flows with only
>> -     * one datapath in a group, so it could be hashed correctly. */
>> -    ovn_lflow_init(lflow, NULL, stage, priority,
>> -                   xstrdup(match), xstrdup(actions),
>> -                   io_port ? xstrdup(io_port) : NULL,
>> -                   nullable_xstrdup(ctrl_meter),
>> -                   ovn_lflow_hint(stage_hint), where);
>> -    hmapx_add(&lflow->od_group, od);
>> -
>> -    /* Insert "fresh" lflows into single_od. */
>> +    /* Slow Path.
>> +     * We could not get away with minimal mostly ro amount of work,
>> +     * lock with rw and try to do an insert (may end up repeating
>> +     * some of what we do for ro). */
>>   +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_wrlock(&flowtable_lock);
>> +    }
>>       if (!use_parallel_build) {
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we are not setting single datapath,
>> +         * but collecting a group.  'od' will be updated later for all flows
>> +         * with only one datapath in a group, so it could be hashed correctly.
>> +         */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>>           hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       } else {
>> +        if (use_logical_dp_groups) {
>> +            /* Search again in case someone else got here first. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->single_od,
>> +                                          NULL, stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                if (!hmapx_contains(&lflow->od_group, od)) {
>> +                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
>> +                }
>> +                goto done_add_unlock;
>> +            }
>> +            /* Unlikely, but possible, more than one thread got here
>> +             * ahead of us while we were wating to acquire a write lock. */
>> +            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
>> +                                          stage, priority, match,
>> +                                          actions, ctrl_meter, hash);
>> +            if (lflow) {
>> +                hmapx_add(&lflow->od_group, od);
>> +                goto done_add_unlock;
>> +            }
>> +        }
>> +        lflow = xmalloc(sizeof *lflow);
>> +        /* While adding new logical flows we're not setting single datapath,
>> +         * but collecting a group.  'od' will be updated later for all
>> +         * flows with only * one datapath in a group, so it could be hashed
>> +         * correctly. */
>> +        ovn_lflow_init(lflow, NULL, stage, priority,
>> +                       xstrdup(match), xstrdup(actions),
>> +                       io_port ? xstrdup(io_port) : NULL,
>> +                       nullable_xstrdup(ctrl_meter),
>> +                       ovn_lflow_hint(stage_hint), where);
>> +        hmapx_add(&lflow->od_group, od);
>
> This section can run if we are not using logical dp groups. Should this hmapx_add() call have "if (use_logical_dp_groups)" as a precondition?
>
>> hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
>>       }
>> +done_add_unlock:
>> +    if (use_logical_dp_groups && use_parallel_build) {
>> +        ovs_rwlock_unlock(&flowtable_lock);
>> +    }
>>       return lflow;
>>   }
>
> I found the logic of do_ovn_lflow_add() to be very difficult to follow. It was hard to keep in mind what conditions resulted in locks being held, and what the current state of the locks was. It also contains a lot of repeated code. I have a suggested rewrite of this function that I am placing at the bottom of this email. I think I have the logic the same as what you have, but I think it's a lot easier to follow what's happening. Let me know what you think.
>
>>   @@ -4449,22 +4541,16 @@ ovn_lflow_add_at_with_hash(struct lflow_state *lflow_map,
>>                              const struct ovsdb_idl_row *stage_hint,
>>                              const char *where, uint32_t hash)
>>   {
>> -    struct ovn_lflow *lflow;
>> -
>>       ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
>> +    return do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
>>                                    actions, io_port, stage_hint, where,
>>                                    ctrl_meter);
>> -        unlock_hash_row(&lflow_locks, hash);
>> -    } else {
>> -        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
>> -                         actions, io_port, stage_hint, where, ctrl_meter);
>> -    }
>> -    return lflow;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow table. */
>>   static void
>>   ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
>> @@ -4483,19 +4569,29 @@ ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
>>                                  io_port, ctrl_meter, stage_hint, where, hash);
>>   }
>>   +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
>> +
>>   static bool
>>   ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
>> -                                struct ovn_datapath *od,
>> -                                uint32_t hash)
>> +                                struct ovn_datapath *od)
>>   {
>>       if (!use_logical_dp_groups || !lflow_ref) {
>>           return false;
>>       }
>>         if (use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, hash);
>> -        hmapx_add(&lflow_ref->od_group, od);
>> -        unlock_hash_row(&lflow_locks, hash);
>> +        ovs_rwlock_rdlock(&flowtable_lock);
>> +        if (!hmapx_contains(&lflow_ref->od_group, od)) {
>> +            /* Upgrade lock to write. */
>> +            ovs_rwlock_unlock(&flowtable_lock);
>> +            ovs_rwlock_wrlock(&flowtable_lock);
>> +            hmapx_add(&lflow_ref->od_group, od);
>> +        }
>> +        ovs_rwlock_unlock(&flowtable_lock);
>>       } else {
>>           hmapx_add(&lflow_ref->od_group, od);
>>       }
>> @@ -4503,6 +4599,11 @@ ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
>>       return true;
>>   }
>>   +#ifdef __clang__
>> +#pragma clang diagnostic pop
>> +#endif
>> +
>> +
>>   /* Adds a row with the specified contents to the Logical_Flow table. */
>>   #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
>>                                     ACTIONS, IN_OUT_PORT, CTRL_METER, \
>> @@ -4594,15 +4695,9 @@ hmap_safe_remove(struct hmap *hmap, struct hmap_node *node)
>>   static void
>>   remove_lflow_from_lflows(struct lflow_state *lflows, struct ovn_lflow *lflow)
>>   {
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        lock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>       if (!hmap_safe_remove(&lflows->multiple_od, &lflow->hmap_node)) {
>>           hmap_remove(&lflows->single_od, &lflow->hmap_node);
>>       }
>> -    if (use_logical_dp_groups && use_parallel_build) {
>> -        unlock_hash_row(&lflow_locks, lflow->hmap_node.hash);
>> -    }
>>   }
>>     static void
>> @@ -6511,7 +6606,7 @@ build_lb_rules(struct lflow_state *lflows, struct ovn_northd_lb *lb,
>>               if (reject) {
>>                   meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
>>                                          meter_groups);
>> -            } else if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
>> +            } else if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9565,7 +9660,7 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
>>                   ds_cstr(match), ds_cstr(&defrag_actions));
>>           for (size_t j = 0; j < lb->n_nb_lr; j++) {
>>               struct ovn_datapath *od = lb->nb_lr[j];
>> -            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
>> @@ -9629,7 +9724,7 @@ build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
>>                   continue;
>>               }
>>   -            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od, hash)) {
>> +            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
>>                   continue;
>>               }
>>               lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
>> @@ -13081,7 +13176,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>>           }
>>       }
>>   -    if (use_parallel_build && (!use_logical_dp_groups)) {
>> +    if (use_parallel_build) {
>>           struct lflow_state *lflow_segs;
>>           struct lswitch_flow_build_info *lsiv;
>>           int index;
>> @@ -13319,6 +13414,8 @@ reconcile_lflow(struct ovn_lflow *lflow, struct northd_context *ctx,
>>       ovn_lflow_destroy(lflows, lflow);
>>   }
>>   +static bool needs_init = true;
>
> I think this variable should be renamed. "needs_init" directly relates to the flowtable_lock, but that is not reflected in the variable name. Also since this is only used in build_lflows, we could probably move this declaration to within build_lflows.
>
>> +
>>   /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
>>    * constructing their contents based on the OVN_NB database. */
>>   static void
>> @@ -13333,8 +13430,15 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
>>         fast_hmap_size_for(&lflows.single_od, max_seen_lflow_size);
>>       fast_hmap_size_for(&lflows.multiple_od, max_seen_lflow_size);
>> -    if (use_parallel_build && use_logical_dp_groups) {
>> -        update_hashrow_locks(&lflows.single_od, &lflow_locks);
>> +    if (use_parallel_build && use_logical_dp_groups && needs_init) {
>> +        ovs_rwlock_init(&flowtable_lock);
>> +        /* This happens on first run with parallel+dp_groups.
>> +         * db_run will re-read use_parallel_build from config and
>> +         * reset it. This way we get correct sizing for
>> +         * parallel + dp_groups by doing one single threaded run
>> +         * on the first iteration. */
>> +        use_parallel_build = false;
>> +        needs_init = false;
>>       }
>>     @@ -15293,7 +15397,6 @@ main(int argc, char *argv[])
>>         daemonize_complete();
>>   -    init_hash_row_locks(&lflow_locks);
>>       use_parallel_build = can_parallelize_hashes(false);
>>         /* We want to detect (almost) all changes to the ovn-nb db. */
>>
>
> -----
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 4537c55dd..81ad929ec 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -4394,142 +4394,143 @@ static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>  #pragma clang diagnostic ignored "-Wthread-safety"
>  #endif
>
> -/* Adds a row with the specified contents to the Logical_Flow table.
> - * Version to use when locking is required.
> - */
>  static struct ovn_lflow *
> -do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> -                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> -                 const char *match, const char *actions, const char *io_port,
> -                 const struct ovsdb_idl_row *stage_hint,
> -                 const char *where, const char *ctrl_meter)
> +do_ovn_lflow_add_simple(struct lflow_state *lflow_map, uint32_t hash,
> +                        enum ovn_stage stage, uint16_t priority,
> +                        const char *match, const char *actions,
> +                        const char *io_port,
> +                        const struct ovsdb_idl_row *stage_hint,
> +                        const char *where, const char *ctrl_meter)
> +
> +{
> +    struct ovn_lflow *lflow;
> +
> +    lflow = xmalloc(sizeof *lflow);
> +
> +    /* While adding new logical flows we are not setting single datapath,
> +     * but collecting a group.  'od' will be updated later for all flows
> +     * with only one datapath in a group, so it could be hashed correctly.
> +     */
> +    ovn_lflow_init(lflow, NULL, stage, priority,
> +                   xstrdup(match), xstrdup(actions),
> +                   io_port ? xstrdup(io_port) : NULL,
> +                   nullable_xstrdup(ctrl_meter),
> +                   ovn_lflow_hint(stage_hint), where);
> +    if (use_parallel_build) {
> +        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
> +    } else {
> +        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_with_groups(struct lflow_state *lflow_map,
> +                             struct ovn_datapath *od, uint32_t hash,
> +                             enum ovn_stage stage, uint16_t priority,
> +                             const char *match, const char *actions,
> +                             const char *io_port,
> +                             const struct ovsdb_idl_row *stage_hint,
> +                             const char *where, const char *ctrl_meter)
>  {
> +    struct ovn_lflow *lflow;
> +    ovs_assert(use_logical_dp_groups);
> +
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                              NULL, stage, priority, match,
> +                              actions, ctrl_meter, hash);
>
> -    struct ovn_lflow *old_lflow;
> +    if (lflow) {
> +        ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> +    } else {
> +        lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> +                                  stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    }
> +
> +    if (!lflow) {
> +        lflow = do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
> +                                        match, actions, io_port, stage_hint,
> +                                        where, ctrl_meter);
> +    }
> +
> +    if (!hmapx_contains(&lflow->od_group, od)) {
> +        hmapx_add(&lflow->od_group, od);
> +    }
> +
> +    return lflow;
> +}
> +
> +static struct ovn_lflow *
> +do_ovn_lflow_add_parallel_with_groups(struct lflow_state *lflow_map,
> +                                      struct ovn_datapath *od,
> +                                      uint32_t hash, enum ovn_stage stage,
> +                                      uint16_t priority, const char *match,
> +                                      const char *actions, const char *io_port,
> +                                      const struct ovsdb_idl_row *stage_hint,
> +                                      const char *where, const char *ctrl_meter)
> +{
>      struct ovn_lflow *lflow;
>
> +    ovs_assert(use_parallel_build && use_logical_dp_groups);
> +
>      /* Fast Path.
>       * See if we can get away without writing - grab a rdlock and check
>       * if we can get away with as little work as possible.
>       */
> -
> -    if (use_logical_dp_groups) {
> -        if (use_parallel_build) {
> -            ovs_rwlock_rdlock(&flowtable_lock);
> -        }
> -        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                      NULL, stage, priority, match,
> -                                      actions, ctrl_meter, hash);
> -        if (old_lflow) {
> -            if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                /* od not in od_group, we need to add it and move to
> -                 * multiple. */
> -                if (use_parallel_build) {
> -                    /* Upgrade the lock to write, we are likely to
> -                     * modify data. */
> -                    ovs_rwlock_unlock(&flowtable_lock);
> -                    ovs_rwlock_wrlock(&flowtable_lock);
> -
> -                    /* Check if someone got ahead of us and the flow is already
> -                     * in multiple. */
> -                    if (!hmap_contains(&lflow_map->single_od,
> - &old_lflow->hmap_node)) {
> -                        /* Someone did get ahead of us, add the od to the
> -                         * group. */
> -                        hmapx_add(&old_lflow->od_group, od);
> -                        goto done_update_unlock;
> -                    }
> -                }
> -                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
> -                goto done_update_unlock;
> -            }
> -        }
> -        if (!old_lflow) {
> -            /* Not found in single, lookup in multiple od. */
> -            old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (old_lflow) {
> -                if (!hmapx_contains(&old_lflow->od_group, od)) {
> -                    if (use_parallel_build) {
> -                        /* Upgrade lock to write.*/
> -                        ovs_rwlock_unlock(&flowtable_lock);
> -                        ovs_rwlock_wrlock(&flowtable_lock);
> -                    }
> -                    hmapx_add(&old_lflow->od_group, od);
> -                }
> -            }
> -        }
> -done_update_unlock:
> -        if (use_parallel_build) {
> -            ovs_rwlock_unlock(&flowtable_lock);
> -        }
> -        if (old_lflow) {
> -            return old_lflow;
> -        }
> +    ovs_rwlock_rdlock(&flowtable_lock);
> +    lflow = do_ovn_lflow_find(&lflow_map->single_od,
> +                                  NULL, stage, priority, match,
> +                                  actions, ctrl_meter, hash);
> +    if (lflow && hmapx_contains(&lflow->od_group, od)) {
> +        /* This flow already has this datapath in the group. Just return it.*/
> +        goto end;
>      }
>
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
>      /* Slow Path.
>       * We could not get away with minimal mostly ro amount of work,
>       * lock with rw and try to do an insert (may end up repeating
>       * some of what we do for ro). */
> +    ovs_rwlock_wrlock(&flowtable_lock);
> +
> +    lflow = do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                         priority, match, actions, io_port,
> +                                         stage_hint, where, ctrl_meter);
> +
> +end:
> +    ovs_rwlock_unlock(&flowtable_lock);
> +
> +    return lflow;
> +}
> +
> +/* Adds a row with the specified contents to the Logical_Flow table.
> + * Version to use when locking is required.
> + */
> +static struct ovn_lflow *
> +do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
> +                 uint32_t hash, enum ovn_stage stage, uint16_t priority,
> +                 const char *match, const char *actions, const char *io_port,
> +                 const struct ovsdb_idl_row *stage_hint,
> +                 const char *where, const char *ctrl_meter)
> +{
>
>      if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_wrlock(&flowtable_lock);
> -    }
> -    if (!use_parallel_build) {
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we are not setting single datapath,
> -         * but collecting a group.  'od' will be updated later for all flows
> -         * with only one datapath in a group, so it could be hashed correctly.
> -         */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
> +        return do_ovn_lflow_add_parallel_with_groups(lflow_map, od, hash, stage,
> +                                                     priority, match, actions,
> +                                                     io_port, stage_hint, where,
> +                                                     ctrl_meter);
> +    } else if (use_logical_dp_groups) {
> +        return do_ovn_lflow_add_with_groups(lflow_map, od, hash, stage,
> +                                            priority, match, actions, io_port,
> +                                            stage_hint, where, ctrl_meter);
>      } else {
> -        if (use_logical_dp_groups) {
> -            /* Search again in case someone else got here first. */
> -            lflow = do_ovn_lflow_find(&lflow_map->single_od,
> -                                          NULL, stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                if (!hmapx_contains(&lflow->od_group, od)) {
> -                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
> -                }
> -                goto done_add_unlock;
> -            }
> -            /* Unlikely, but possible, more than one thread got here
> -             * ahead of us while we were wating to acquire a write lock. */
> -            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
> -                                          stage, priority, match,
> -                                          actions, ctrl_meter, hash);
> -            if (lflow) {
> -                hmapx_add(&lflow->od_group, od);
> -                goto done_add_unlock;
> -            }
> -        }
> -        lflow = xmalloc(sizeof *lflow);
> -        /* While adding new logical flows we're not setting single datapath,
> -         * but collecting a group.  'od' will be updated later for all
> -         * flows with only * one datapath in a group, so it could be hashed
> -         * correctly. */
> -        ovn_lflow_init(lflow, NULL, stage, priority,
> -                       xstrdup(match), xstrdup(actions),
> -                       io_port ? xstrdup(io_port) : NULL,
> -                       nullable_xstrdup(ctrl_meter),
> -                       ovn_lflow_hint(stage_hint), where);
> -        hmapx_add(&lflow->od_group, od);
> -        hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
> +        return do_ovn_lflow_add_simple(lflow_map, hash, stage, priority,
> +                                       match, actions, io_port, stage_hint,
> +                                       where, ctrl_meter);
>      }
> -done_add_unlock:
> -    if (use_logical_dp_groups && use_parallel_build) {
> -        ovs_rwlock_unlock(&flowtable_lock);
> -    }
> -    return lflow;
>  }
>
>  static struct ovn_lflow *
> ----
>
>
Ilya Maximets Sept. 10, 2021, 11:43 a.m. UTC | #5
On 9/9/21 11:02 PM, Mark Michelson wrote:
> Hi Anton,
> 
> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
> 
> See below for more comments.
> 
> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Restore parallel build with dp groups using rwlock instead
>> of per row locking as an underlying mechanism.
>>
>> This provides improvement ~ 10% end-to-end on ovn-heater
>> under virutalization despite awakening some qemu gremlin
>> which makes qemu climb to silly CPU usage. The gain on
>> bare metal is likely to be higher.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>> ---
>>   northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>   1 file changed, 159 insertions(+), 56 deletions(-)
>>
>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>> index 1f903882b..4537c55dd 100644
>> --- a/northd/ovn-northd.c
>> +++ b/northd/ovn-northd.c
>> @@ -59,6 +59,7 @@
>>   #include "unixctl.h"
>>   #include "util.h"
>>   #include "uuid.h"
>> +#include "ovs-thread.h"
>>   #include "openvswitch/vlog.h"
>>     VLOG_DEFINE_THIS_MODULE(ovn_northd);
>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>   static bool use_logical_dp_groups = false;
>>   static bool use_parallel_build = true;
>>   -static struct hashrow_locks lflow_locks;
>> +static struct ovs_rwlock flowtable_lock;
> 
> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
> 
> From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
> 
>> +
>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>> +                              struct ovn_datapath *od,
>> +                              struct lflow_state *lflow_map,
>> +                              uint32_t hash)
>> +{
>> +    hmapx_add(&old_lflow->od_group, od);
>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>> +    if (use_parallel_build) {
>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>> +    } else {
>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>> +    }
>> +}
>> +
>> +#ifdef __clang__
>> +#pragma clang diagnostic push
>> +#pragma clang diagnostic ignored "-Wthread-safety"
>> +#endif
> 
> The section above needs a comment to explain why -Wthread-safety is ignored.

Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
the diagnostics.


On a side note, I ran some tests with this patch set applied and it drops
performance of non-parallel case by 20% on my laptop with databases from
the ovn-heater's 120 node density test.

I see, you mentioned 10% improvement in the commit message.  Assuming it's
with parallelization enabled, right?

In any case, some performance testing without parallelization required before
accepting the change, as it's a default configuration.

Best regards, Ilya Maximets.
Anton Ivanov Sept. 10, 2021, 12:18 p.m. UTC | #6
On 10/09/2021 12:43, Ilya Maximets wrote:
> On 9/9/21 11:02 PM, Mark Michelson wrote:
>> Hi Anton,
>>
>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>
>> See below for more comments.
>>
>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> Restore parallel build with dp groups using rwlock instead
>>> of per row locking as an underlying mechanism.
>>>
>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>> under virutalization despite awakening some qemu gremlin
>>> which makes qemu climb to silly CPU usage. The gain on
>>> bare metal is likely to be higher.
>>>
>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>> ---
>>>    northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>    1 file changed, 159 insertions(+), 56 deletions(-)
>>>
>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>> index 1f903882b..4537c55dd 100644
>>> --- a/northd/ovn-northd.c
>>> +++ b/northd/ovn-northd.c
>>> @@ -59,6 +59,7 @@
>>>    #include "unixctl.h"
>>>    #include "util.h"
>>>    #include "uuid.h"
>>> +#include "ovs-thread.h"
>>>    #include "openvswitch/vlog.h"
>>>      VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>    static bool use_logical_dp_groups = false;
>>>    static bool use_parallel_build = true;
>>>    -static struct hashrow_locks lflow_locks;
>>> +static struct ovs_rwlock flowtable_lock;
>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>
>>  From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>
>>> +
>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>> +                              struct ovn_datapath *od,
>>> +                              struct lflow_state *lflow_map,
>>> +                              uint32_t hash)
>>> +{
>>> +    hmapx_add(&old_lflow->od_group, od);
>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>> +    if (use_parallel_build) {
>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>> +    } else {
>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>> +    }
>>> +}
>>> +
>>> +#ifdef __clang__
>>> +#pragma clang diagnostic push
>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>> +#endif
>> The section above needs a comment to explain why -Wthread-safety is ignored.
> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
> the diagnostics.
>
>
> On a side note, I ran some tests with this patch set applied and it drops
> performance of non-parallel case by 20% on my laptop with databases from
> the ovn-heater's 120 node density test.
>
> I see, you mentioned 10% improvement in the commit message.  Assuming it's
> with parallelization enabled, right?

Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.

>
> In any case, some performance testing without parallelization required before
> accepting the change, as it's a default configuration.

They were done. I am somewhat surprised by your measurement, can you please provide some more details.

You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?

>
> Best regards, Ilya Maximets.
>
Anton Ivanov Sept. 10, 2021, 12:23 p.m. UTC | #7
On 10/09/2021 13:18, Anton Ivanov wrote:
>
> On 10/09/2021 12:43, Ilya Maximets wrote:
>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>> Hi Anton,
>>>
>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>
>>> See below for more comments.
>>>
>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>
>>>> Restore parallel build with dp groups using rwlock instead
>>>> of per row locking as an underlying mechanism.
>>>>
>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>> under virutalization despite awakening some qemu gremlin
>>>> which makes qemu climb to silly CPU usage. The gain on
>>>> bare metal is likely to be higher.
>>>>
>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>> ---
>>>>    northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>    1 file changed, 159 insertions(+), 56 deletions(-)
>>>>
>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>> index 1f903882b..4537c55dd 100644
>>>> --- a/northd/ovn-northd.c
>>>> +++ b/northd/ovn-northd.c
>>>> @@ -59,6 +59,7 @@
>>>>    #include "unixctl.h"
>>>>    #include "util.h"
>>>>    #include "uuid.h"
>>>> +#include "ovs-thread.h"
>>>>    #include "openvswitch/vlog.h"
>>>>      VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>    static bool use_logical_dp_groups = false;
>>>>    static bool use_parallel_build = true;
>>>>    -static struct hashrow_locks lflow_locks;
>>>> +static struct ovs_rwlock flowtable_lock;
>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>
>>>  From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>
>>>> +
>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>> +                              struct ovn_datapath *od,
>>>> +                              struct lflow_state *lflow_map,
>>>> +                              uint32_t hash)
>>>> +{
>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>> +    if (use_parallel_build) {
>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>> +    } else {
>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>> +    }
>>>> +}
>>>> +
>>>> +#ifdef __clang__
>>>> +#pragma clang diagnostic push
>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>> +#endif
>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>> the diagnostics.
>>
>>
>> On a side note, I ran some tests with this patch set applied and it drops
>> performance of non-parallel case by 20% on my laptop with databases from
>> the ovn-heater's 120 node density test.
>>
>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>> with parallelization enabled, right?
>
> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>
>>
>> In any case, some performance testing without parallelization required before
>> accepting the change, as it's a default configuration.
>
> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>
> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?

Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.

A.

>
>>
>> Best regards, Ilya Maximets.
>>
Ilya Maximets Sept. 10, 2021, 12:29 p.m. UTC | #8
On 9/10/21 2:23 PM, Anton Ivanov wrote:
> 
> On 10/09/2021 13:18, Anton Ivanov wrote:
>>
>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>> Hi Anton,
>>>>
>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>
>>>> See below for more comments.
>>>>
>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>
>>>>> Restore parallel build with dp groups using rwlock instead
>>>>> of per row locking as an underlying mechanism.
>>>>>
>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>> under virutalization despite awakening some qemu gremlin
>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>> bare metal is likely to be higher.
>>>>>
>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>> ---
>>>>>    northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>    1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>
>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>> index 1f903882b..4537c55dd 100644
>>>>> --- a/northd/ovn-northd.c
>>>>> +++ b/northd/ovn-northd.c
>>>>> @@ -59,6 +59,7 @@
>>>>>    #include "unixctl.h"
>>>>>    #include "util.h"
>>>>>    #include "uuid.h"
>>>>> +#include "ovs-thread.h"
>>>>>    #include "openvswitch/vlog.h"
>>>>>      VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>    static bool use_logical_dp_groups = false;
>>>>>    static bool use_parallel_build = true;
>>>>>    -static struct hashrow_locks lflow_locks;
>>>>> +static struct ovs_rwlock flowtable_lock;
>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>
>>>>  From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>
>>>>> +
>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>> +                              struct ovn_datapath *od,
>>>>> +                              struct lflow_state *lflow_map,
>>>>> +                              uint32_t hash)
>>>>> +{
>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>> +    if (use_parallel_build) {
>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>> +    } else {
>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +#ifdef __clang__
>>>>> +#pragma clang diagnostic push
>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>> +#endif
>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>> the diagnostics.
>>>
>>>
>>> On a side note, I ran some tests with this patch set applied and it drops
>>> performance of non-parallel case by 20% on my laptop with databases from
>>> the ovn-heater's 120 node density test.
>>>
>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>> with parallelization enabled, right?
>>
>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>
>>>
>>> In any case, some performance testing without parallelization required before
>>> accepting the change, as it's a default configuration.
>>
>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.

I have databases from a previous 120 node ovn-heater run.
I'm loading them to the OVN sandbox like this:

make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"

And performing a couple of small operations with northbound database just to
trigger the northd processing loop.

Configuration has dp-groups enabled and parallelization disabled.

>>
>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
> 
> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.

Not only stopwatches.  Poll intervals in general grew too.

> 
> A.
> 
>>
>>>
>>> Best regards, Ilya Maximets.
>>>
Anton Ivanov Sept. 10, 2021, 12:34 p.m. UTC | #9
On 10/09/2021 13:29, Ilya Maximets wrote:
> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>> Hi Anton,
>>>>>
>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>
>>>>> See below for more comments.
>>>>>
>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>
>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>> of per row locking as an underlying mechanism.
>>>>>>
>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>> bare metal is likely to be higher.
>>>>>>
>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>> ---
>>>>>>     northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>     1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>
>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>> index 1f903882b..4537c55dd 100644
>>>>>> --- a/northd/ovn-northd.c
>>>>>> +++ b/northd/ovn-northd.c
>>>>>> @@ -59,6 +59,7 @@
>>>>>>     #include "unixctl.h"
>>>>>>     #include "util.h"
>>>>>>     #include "uuid.h"
>>>>>> +#include "ovs-thread.h"
>>>>>>     #include "openvswitch/vlog.h"
>>>>>>       VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>     static bool use_logical_dp_groups = false;
>>>>>>     static bool use_parallel_build = true;
>>>>>>     -static struct hashrow_locks lflow_locks;
>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>
>>>>>   From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>
>>>>>> +
>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>> +                              struct ovn_datapath *od,
>>>>>> +                              struct lflow_state *lflow_map,
>>>>>> +                              uint32_t hash)
>>>>>> +{
>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>> +    if (use_parallel_build) {
>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>> +    } else {
>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +#ifdef __clang__
>>>>>> +#pragma clang diagnostic push
>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>> +#endif
>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>> the diagnostics.
>>>>
>>>>
>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>> the ovn-heater's 120 node density test.
>>>>
>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>> with parallelization enabled, right?
>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>
>>>> In any case, some performance testing without parallelization required before
>>>> accepting the change, as it's a default configuration.
>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
> I have databases from a previous 120 node ovn-heater run.
> I'm loading them to the OVN sandbox like this:
>
> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>
> And performing a couple of small operations with northbound database just to
> trigger the northd processing loop.
>
> Configuration has dp-groups enabled and parallelization disabled.
>
>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
> Not only stopwatches.  Poll intervals in general grew too.

OK. Looking at it. There may be a slight increase due to extra lookup in ovn_lflow_destroy, but it should not be anywhere near 10%.

>
>> A.
>>
>>>> Best regards, Ilya Maximets.
>>>>
>
Anton Ivanov Sept. 10, 2021, 12:49 p.m. UTC | #10
On 10/09/2021 13:34, Anton Ivanov wrote:
>
> On 10/09/2021 13:29, Ilya Maximets wrote:
>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>> Hi Anton,
>>>>>>
>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>
>>>>>> See below for more comments.
>>>>>>
>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>
>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>
>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>> bare metal is likely to be higher.
>>>>>>>
>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>> ---
>>>>>>>     northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>     1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>
>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>> --- a/northd/ovn-northd.c
>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>     #include "unixctl.h"
>>>>>>>     #include "util.h"
>>>>>>>     #include "uuid.h"
>>>>>>> +#include "ovs-thread.h"
>>>>>>>     #include "openvswitch/vlog.h"
>>>>>>>       VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>     static bool use_logical_dp_groups = false;
>>>>>>>     static bool use_parallel_build = true;
>>>>>>>     -static struct hashrow_locks lflow_locks;
>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>
>>>>>>   From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>
>>>>>>> +
>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>> +                              struct ovn_datapath *od,
>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>> +                              uint32_t hash)
>>>>>>> +{
>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>> +    if (use_parallel_build) {
>>>>>>> + hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>> +    } else {
>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +#ifdef __clang__
>>>>>>> +#pragma clang diagnostic push
>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>> +#endif
>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>> the diagnostics.
>>>>>
>>>>>
>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>> the ovn-heater's 120 node density test.
>>>>>
>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>> with parallelization enabled, right?
>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>
>>>>> In any case, some performance testing without parallelization required before
>>>>> accepting the change, as it's a default configuration.
>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>> I have databases from a previous 120 node ovn-heater run.
>> I'm loading them to the OVN sandbox like this:
>>
>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>
>> And performing a couple of small operations with northbound database just to
>> trigger the northd processing loop.
>>
>> Configuration has dp-groups enabled and parallelization disabled.
>>
>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>> Not only stopwatches.  Poll intervals in general grew too.
>
> OK. Looking at it. There may be a slight increase due to extra lookup in ovn_lflow_destroy, but it should not be anywhere near 10%.

The other one is that while there are more single od flows, in most cases ovn_lookup_flow will return a multiple od flows. That is how the arguments to ovn_lookup_flow pan out. Ditto for flow insertion. That lookup is probably in the wrong order. It should lookup

>
>>
>>> A.
>>>
>>>>> Best regards, Ilya Maximets.
>>>>>
>>
Anton Ivanov Sept. 10, 2021, 12:50 p.m. UTC | #11
On 10/09/2021 13:49, Anton Ivanov wrote:
>
> On 10/09/2021 13:34, Anton Ivanov wrote:
>>
>> On 10/09/2021 13:29, Ilya Maximets wrote:
>>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>>> Hi Anton,
>>>>>>>
>>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>>
>>>>>>> See below for more comments.
>>>>>>>
>>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>
>>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>>
>>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>>> bare metal is likely to be higher.
>>>>>>>>
>>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>> ---
>>>>>>>>     northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>>     1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>>> --- a/northd/ovn-northd.c
>>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>>     #include "unixctl.h"
>>>>>>>>     #include "util.h"
>>>>>>>>     #include "uuid.h"
>>>>>>>> +#include "ovs-thread.h"
>>>>>>>>     #include "openvswitch/vlog.h"
>>>>>>>>       VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>>     static bool use_logical_dp_groups = false;
>>>>>>>>     static bool use_parallel_build = true;
>>>>>>>>     -static struct hashrow_locks lflow_locks;
>>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>>
>>>>>>>   From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>>
>>>>>>>> +
>>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>>> +                              struct ovn_datapath *od,
>>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>>> +                              uint32_t hash)
>>>>>>>> +{
>>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>>> +    if (use_parallel_build) {
>>>>>>>> + hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    } else {
>>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#ifdef __clang__
>>>>>>>> +#pragma clang diagnostic push
>>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>>> +#endif
>>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>>> the diagnostics.
>>>>>>
>>>>>>
>>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>>> the ovn-heater's 120 node density test.
>>>>>>
>>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>>> with parallelization enabled, right?
>>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>>
>>>>>> In any case, some performance testing without parallelization required before
>>>>>> accepting the change, as it's a default configuration.
>>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>>> I have databases from a previous 120 node ovn-heater run.
>>> I'm loading them to the OVN sandbox like this:
>>>
>>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>>
>>> And performing a couple of small operations with northbound database just to
>>> trigger the northd processing loop.
>>>
>>> Configuration has dp-groups enabled and parallelization disabled.
>>>
>>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>>> Not only stopwatches.  Poll intervals in general grew too.
>>
>> OK. Looking at it. There may be a slight increase due to extra lookup in ovn_lflow_destroy, but it should not be anywhere near 10%.
>
> The other one is that while there are more single od flows, in most cases ovn_lookup_flow will return a multiple od flows. That is how the arguments to ovn_lookup_flow pan out. Ditto for flow insertion. That lookup is probably in the wrong order. It should lookup

Apologies, pressed send by mistake.

Wanted to say - it should look at multiple flows first, then at single (though it will be good to measure this properly).

A.


>
>>
>>>
>>>> A.
>>>>
>>>>>> Best regards, Ilya Maximets.
>>>>>>
>>>
Anton Ivanov Sept. 10, 2021, 2:19 p.m. UTC | #12
On 10/09/2021 13:29, Ilya Maximets wrote:
> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>> Hi Anton,
>>>>>
>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>
>>>>> See below for more comments.
>>>>>
>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>
>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>> of per row locking as an underlying mechanism.
>>>>>>
>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>> bare metal is likely to be higher.
>>>>>>
>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>> ---
>>>>>>     northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>     1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>
>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>> index 1f903882b..4537c55dd 100644
>>>>>> --- a/northd/ovn-northd.c
>>>>>> +++ b/northd/ovn-northd.c
>>>>>> @@ -59,6 +59,7 @@
>>>>>>     #include "unixctl.h"
>>>>>>     #include "util.h"
>>>>>>     #include "uuid.h"
>>>>>> +#include "ovs-thread.h"
>>>>>>     #include "openvswitch/vlog.h"
>>>>>>       VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>     static bool use_logical_dp_groups = false;
>>>>>>     static bool use_parallel_build = true;
>>>>>>     -static struct hashrow_locks lflow_locks;
>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>
>>>>>   From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>
>>>>>> +
>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>> +                              struct ovn_datapath *od,
>>>>>> +                              struct lflow_state *lflow_map,
>>>>>> +                              uint32_t hash)
>>>>>> +{
>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>> +    if (use_parallel_build) {
>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>> +    } else {
>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +#ifdef __clang__
>>>>>> +#pragma clang diagnostic push
>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>> +#endif
>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>> the diagnostics.
>>>>
>>>>
>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>> the ovn-heater's 120 node density test.
>>>>
>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>> with parallelization enabled, right?
>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>
>>>> In any case, some performance testing without parallelization required before
>>>> accepting the change, as it's a default configuration.
>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
> I have databases from a previous 120 node ovn-heater run.
> I'm loading them to the OVN sandbox like this:
>
> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>
> And performing a couple of small operations with northbound database just to
> trigger the northd processing loop.
>
> Configuration has dp-groups enabled and parallelization disabled.

Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.

I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.

If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.

A.

>
>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
> Not only stopwatches.  Poll intervals in general grew too.
>
>> A.
>>
>>>> Best regards, Ilya Maximets.
>>>>
>
Dumitru Ceara Sept. 10, 2021, 3:37 p.m. UTC | #13
On 9/10/21 2:18 PM, Anton Ivanov wrote:
>> On a side note, I ran some tests with this patch set applied and it drops
>> performance of non-parallel case by 20% on my laptop with databases from
>> the ovn-heater's 120 node density test.
>>
>> I see, you mentioned 10% improvement in the commit message.  Assuming
>> it's
>> with parallelization enabled, right?
> 
> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.

Just joining the conversation to mention that ovn-heater was
significantly changed/improved and its tests are way different compared
to a few months ago.

If you're not running the new version of ovn-heater (that doesn't rely
on rally-ovs/ovn-scale-test) it's available here:

https://github.com/dceara/ovn-heater

And the test Ilya referred to is:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml

To run it (just replace 20 with 120):
https://github.com/dceara/ovn-heater#example-run-20-nodes-density-heavy

We run these tests weekly on a setup with 30 physical servers on which
we provision 120 "fake" nodes:

https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-light.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-cluster-density.yml
https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-np-multitenant.yml

Regards,
Dumitru
Anton Ivanov Sept. 10, 2021, 3:40 p.m. UTC | #14
Thanks, I will update to the newer version.

A.

On 10/09/2021 16:37, Dumitru Ceara wrote:
> On 9/10/21 2:18 PM, Anton Ivanov wrote:
>>> On a side note, I ran some tests with this patch set applied and it drops
>>> performance of non-parallel case by 20% on my laptop with databases from
>>> the ovn-heater's 120 node density test.
>>>
>>> I see, you mentioned 10% improvement in the commit message.  Assuming
>>> it's
>>> with parallelization enabled, right?
>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
> Just joining the conversation to mention that ovn-heater was
> significantly changed/improved and its tests are way different compared
> to a few months ago.
>
> If you're not running the new version of ovn-heater (that doesn't rely
> on rally-ovs/ovn-scale-test) it's available here:
>
> https://github.com/dceara/ovn-heater
>
> And the test Ilya referred to is:
>
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
>
> To run it (just replace 20 with 120):
> https://github.com/dceara/ovn-heater#example-run-20-nodes-density-heavy
>
> We run these tests weekly on a setup with 30 physical servers on which
> we provision 120 "fake" nodes:
>
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-light.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-cluster-density.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-np-multitenant.yml
>
> Regards,
> Dumitru
>
>
Ilya Maximets Sept. 10, 2021, 4:13 p.m. UTC | #15
On 9/10/21 4:19 PM, Anton Ivanov wrote:
> 
> On 10/09/2021 13:29, Ilya Maximets wrote:
>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>> Hi Anton,
>>>>>>
>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>
>>>>>> See below for more comments.
>>>>>>
>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>
>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>
>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>> bare metal is likely to be higher.
>>>>>>>
>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>> ---
>>>>>>>     northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>     1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>
>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>> --- a/northd/ovn-northd.c
>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>     #include "unixctl.h"
>>>>>>>     #include "util.h"
>>>>>>>     #include "uuid.h"
>>>>>>> +#include "ovs-thread.h"
>>>>>>>     #include "openvswitch/vlog.h"
>>>>>>>       VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>     static bool use_logical_dp_groups = false;
>>>>>>>     static bool use_parallel_build = true;
>>>>>>>     -static struct hashrow_locks lflow_locks;
>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>
>>>>>>   From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>
>>>>>>> +
>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>> +                              struct ovn_datapath *od,
>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>> +                              uint32_t hash)
>>>>>>> +{
>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>> +    if (use_parallel_build) {
>>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>> +    } else {
>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +#ifdef __clang__
>>>>>>> +#pragma clang diagnostic push
>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>> +#endif
>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>> the diagnostics.
>>>>>
>>>>>
>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>> the ovn-heater's 120 node density test.
>>>>>
>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>> with parallelization enabled, right?
>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>
>>>>> In any case, some performance testing without parallelization required before
>>>>> accepting the change, as it's a default configuration.
>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>> I have databases from a previous 120 node ovn-heater run.
>> I'm loading them to the OVN sandbox like this:
>>
>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>
>> And performing a couple of small operations with northbound database just to
>> trigger the northd processing loop.
>>
>> Configuration has dp-groups enabled and parallelization disabled.
> 
> Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.

v7 seems only slightly better.  By a couple of percents.
Nowhere close to the current master.

And stopwatches in v7 are still messed up.

> 
> I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.
> 
> If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.
> 
> A.
> 
>>
>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>> Not only stopwatches.  Poll intervals in general grew too.
>>
>>> A.
>>>
>>>>> Best regards, Ilya Maximets.
>>>>>
>>
Anton Ivanov Sept. 10, 2021, 4:20 p.m. UTC | #16
On 10/09/2021 17:13, Ilya Maximets wrote:
> On 9/10/21 4:19 PM, Anton Ivanov wrote:
>> On 10/09/2021 13:29, Ilya Maximets wrote:
>>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>>> Hi Anton,
>>>>>>>
>>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>>
>>>>>>> See below for more comments.
>>>>>>>
>>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>
>>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>>
>>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>>> bare metal is likely to be higher.
>>>>>>>>
>>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>> ---
>>>>>>>>      northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>>      1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>>> --- a/northd/ovn-northd.c
>>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>>      #include "unixctl.h"
>>>>>>>>      #include "util.h"
>>>>>>>>      #include "uuid.h"
>>>>>>>> +#include "ovs-thread.h"
>>>>>>>>      #include "openvswitch/vlog.h"
>>>>>>>>        VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>>      static bool use_logical_dp_groups = false;
>>>>>>>>      static bool use_parallel_build = true;
>>>>>>>>      -static struct hashrow_locks lflow_locks;
>>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>>
>>>>>>>    From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>>
>>>>>>>> +
>>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>>> +                              struct ovn_datapath *od,
>>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>>> +                              uint32_t hash)
>>>>>>>> +{
>>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>>> +    if (use_parallel_build) {
>>>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    } else {
>>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#ifdef __clang__
>>>>>>>> +#pragma clang diagnostic push
>>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>>> +#endif
>>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>>> the diagnostics.
>>>>>>
>>>>>>
>>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>>> the ovn-heater's 120 node density test.
>>>>>>
>>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>>> with parallelization enabled, right?
>>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>>
>>>>>> In any case, some performance testing without parallelization required before
>>>>>> accepting the change, as it's a default configuration.
>>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>>> I have databases from a previous 120 node ovn-heater run.
>>> I'm loading them to the OVN sandbox like this:
>>>
>>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>>
>>> And performing a couple of small operations with northbound database just to
>>> trigger the northd processing loop.
>>>
>>> Configuration has dp-groups enabled and parallelization disabled.
>> Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.
> v7 seems only slightly better.  By a couple of percents.
> Nowhere close to the current master.
>
> And stopwatches in v7 are still messed up.

Thanks, I will continue digging.

Brgds,

A.

>> I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.
>>
>> If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.
>>
>> A.
>>
>>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>>> Not only stopwatches.  Poll intervals in general grew too.
>>>
>>>> A.
>>>>
>>>>>> Best regards, Ilya Maximets.
>>>>>>
>
Anton Ivanov Sept. 13, 2021, 4:57 p.m. UTC | #17
The culprit has been found and neutralized.

It is now marginally faster (~ 0.5%) than master.

I will do full retests and scale tests for all combinations including parallel and if it tests out OK post v8 tomorrow.

A.

On 10/09/2021 17:13, Ilya Maximets wrote:
> On 9/10/21 4:19 PM, Anton Ivanov wrote:
>> On 10/09/2021 13:29, Ilya Maximets wrote:
>>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>>> Hi Anton,
>>>>>>>
>>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>>
>>>>>>> See below for more comments.
>>>>>>>
>>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>
>>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>>
>>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>>> bare metal is likely to be higher.
>>>>>>>>
>>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>> ---
>>>>>>>>      northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>>      1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>>> --- a/northd/ovn-northd.c
>>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>>      #include "unixctl.h"
>>>>>>>>      #include "util.h"
>>>>>>>>      #include "uuid.h"
>>>>>>>> +#include "ovs-thread.h"
>>>>>>>>      #include "openvswitch/vlog.h"
>>>>>>>>        VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>>      static bool use_logical_dp_groups = false;
>>>>>>>>      static bool use_parallel_build = true;
>>>>>>>>      -static struct hashrow_locks lflow_locks;
>>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>>
>>>>>>>    From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>>
>>>>>>>> +
>>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>>> +                              struct ovn_datapath *od,
>>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>>> +                              uint32_t hash)
>>>>>>>> +{
>>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>>> +    if (use_parallel_build) {
>>>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    } else {
>>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#ifdef __clang__
>>>>>>>> +#pragma clang diagnostic push
>>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>>> +#endif
>>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>>> the diagnostics.
>>>>>>
>>>>>>
>>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>>> the ovn-heater's 120 node density test.
>>>>>>
>>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>>> with parallelization enabled, right?
>>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>>
>>>>>> In any case, some performance testing without parallelization required before
>>>>>> accepting the change, as it's a default configuration.
>>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>>> I have databases from a previous 120 node ovn-heater run.
>>> I'm loading them to the OVN sandbox like this:
>>>
>>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>>
>>> And performing a couple of small operations with northbound database just to
>>> trigger the northd processing loop.
>>>
>>> Configuration has dp-groups enabled and parallelization disabled.
>> Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.
> v7 seems only slightly better.  By a couple of percents.
> Nowhere close to the current master.
>
> And stopwatches in v7 are still messed up.
>
>> I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.
>>
>> If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.
>>
>> A.
>>
>>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>>> Not only stopwatches.  Poll intervals in general grew too.
>>>
>>>> A.
>>>>
>>>>>> Best regards, Ilya Maximets.
>>>>>>
>
Anton Ivanov Sept. 13, 2021, 5:02 p.m. UTC | #18
On 10/09/2021 16:37, Dumitru Ceara wrote:
> On 9/10/21 2:18 PM, Anton Ivanov wrote:
>>> On a side note, I ran some tests with this patch set applied and it drops
>>> performance of non-parallel case by 20% on my laptop with databases from
>>> the ovn-heater's 120 node density test.
>>>
>>> I see, you mentioned 10% improvement in the commit message.  Assuming
>>> it's
>>> with parallelization enabled, right?
>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
> Just joining the conversation to mention that ovn-heater was
> significantly changed/improved and its tests are way different compared
> to a few months ago.
>
> If you're not running the new version of ovn-heater (that doesn't rely
> on rally-ovs/ovn-scale-test) it's available here:

Thanks, I updated and it is much better than the old one.

I scaled down the config for 20 heavy to 11 match my rig (I try to run one node - one worker).

Brgds,

>
> https://github.com/dceara/ovn-heater
>
> And the test Ilya referred to is:
>
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
>
> To run it (just replace 20 with 120):
> https://github.com/dceara/ovn-heater#example-run-20-nodes-density-heavy
>
> We run these tests weekly on a setup with 30 physical servers on which
> we provision 120 "fake" nodes:
>
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-light.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-density-heavy.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-cluster-density.yml
> https://github.com/dceara/ovn-heater/blob/master/test-scenarios/ocp-120-np-multitenant.yml
>
> Regards,
> Dumitru
>
>
Anton Ivanov Sept. 15, 2021, 12:45 p.m. UTC | #19
I have dropped the split processing of lflows for now.

On 10/09/2021 17:13, Ilya Maximets wrote:
> On 9/10/21 4:19 PM, Anton Ivanov wrote:
>> On 10/09/2021 13:29, Ilya Maximets wrote:
>>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>>> Hi Anton,
>>>>>>>
>>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>>
>>>>>>> See below for more comments.
>>>>>>>
>>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>
>>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>>
>>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>>> bare metal is likely to be higher.
>>>>>>>>
>>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>> ---
>>>>>>>>      northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>>      1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>>> --- a/northd/ovn-northd.c
>>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>>      #include "unixctl.h"
>>>>>>>>      #include "util.h"
>>>>>>>>      #include "uuid.h"
>>>>>>>> +#include "ovs-thread.h"
>>>>>>>>      #include "openvswitch/vlog.h"
>>>>>>>>        VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>>      static bool use_logical_dp_groups = false;
>>>>>>>>      static bool use_parallel_build = true;
>>>>>>>>      -static struct hashrow_locks lflow_locks;
>>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>>
>>>>>>>    From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>>
>>>>>>>> +
>>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>>> +                              struct ovn_datapath *od,
>>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>>> +                              uint32_t hash)
>>>>>>>> +{
>>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>>> +    if (use_parallel_build) {
>>>>>>>> +        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    } else {
>>>>>>>> +        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +#ifdef __clang__
>>>>>>>> +#pragma clang diagnostic push
>>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>>> +#endif
>>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>>> the diagnostics.
>>>>>>
>>>>>>
>>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>>> the ovn-heater's 120 node density test.
>>>>>>
>>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>>> with parallelization enabled, right?
>>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>>
>>>>>> In any case, some performance testing without parallelization required before
>>>>>> accepting the change, as it's a default configuration.
>>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>>> I have databases from a previous 120 node ovn-heater run.
>>> I'm loading them to the OVN sandbox like this:
>>>
>>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>>
>>> And performing a couple of small operations with northbound database just to
>>> trigger the northd processing loop.
>>>
>>> Configuration has dp-groups enabled and parallelization disabled.
>> Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.
> v7 seems only slightly better.  By a couple of percents.
> Nowhere close to the current master.
>
> And stopwatches in v7 are still messed up.
>
>> I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.
>>
>> If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.
>>
>> A.
>>
>>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>>> Not only stopwatches.  Poll intervals in general grew too.
>>>
>>>> A.
>>>>
>>>>>> Best regards, Ilya Maximets.
>>>>>>
>
Anton Ivanov Sept. 15, 2021, 12:47 p.m. UTC | #20
On 15/09/2021 13:45, Anton Ivanov wrote:
> I have dropped the split processing of lflows for now.

Sorry, pressed send to early by mistake.

V8 has the same code as the existing ovn for single thread and code derived from the my split processing work for multi-thread.

It is tested to be identical to master in single thread and significantly faster (albeit at CPU cost) than single thread if you have at least

8+ threads to work on it.

A.

>
> On 10/09/2021 17:13, Ilya Maximets wrote:
>> On 9/10/21 4:19 PM, Anton Ivanov wrote:
>>> On 10/09/2021 13:29, Ilya Maximets wrote:
>>>> On 9/10/21 2:23 PM, Anton Ivanov wrote:
>>>>> On 10/09/2021 13:18, Anton Ivanov wrote:
>>>>>> On 10/09/2021 12:43, Ilya Maximets wrote:
>>>>>>> On 9/9/21 11:02 PM, Mark Michelson wrote:
>>>>>>>> Hi Anton,
>>>>>>>>
>>>>>>>> On a high level, this change results in some parts of the parallelized hashmap being unused. Should we keep the hashrow_locks structure and its APIs, for instance?
>>>>>>>>
>>>>>>>> See below for more comments.
>>>>>>>>
>>>>>>>> On 9/2/21 9:27 AM, anton.ivanov@cambridgegreys.com wrote:
>>>>>>>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>>
>>>>>>>>> Restore parallel build with dp groups using rwlock instead
>>>>>>>>> of per row locking as an underlying mechanism.
>>>>>>>>>
>>>>>>>>> This provides improvement ~ 10% end-to-end on ovn-heater
>>>>>>>>> under virutalization despite awakening some qemu gremlin
>>>>>>>>> which makes qemu climb to silly CPU usage. The gain on
>>>>>>>>> bare metal is likely to be higher.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>>>>>>> ---
>>>>>>>>>      northd/ovn-northd.c | 215 ++++++++++++++++++++++++++++++++------------
>>>>>>>>>      1 file changed, 159 insertions(+), 56 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
>>>>>>>>> index 1f903882b..4537c55dd 100644
>>>>>>>>> --- a/northd/ovn-northd.c
>>>>>>>>> +++ b/northd/ovn-northd.c
>>>>>>>>> @@ -59,6 +59,7 @@
>>>>>>>>>      #include "unixctl.h"
>>>>>>>>>      #include "util.h"
>>>>>>>>>      #include "uuid.h"
>>>>>>>>> +#include "ovs-thread.h"
>>>>>>>>>      #include "openvswitch/vlog.h"
>>>>>>>>>        VLOG_DEFINE_THIS_MODULE(ovn_northd);
>>>>>>>>> @@ -4372,7 +4373,26 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
>>>>>>>>>      static bool use_logical_dp_groups = false;
>>>>>>>>>      static bool use_parallel_build = true;
>>>>>>>>>      -static struct hashrow_locks lflow_locks;
>>>>>>>>> +static struct ovs_rwlock flowtable_lock;
>>>>>>>> With the change from the custom hashrow_locks to using an ovs_rwlock, I think it's important to document what data this lock is intending to protect.
>>>>>>>>
>>>>>>>>    From what I can tell, this lock specifically is intended to protect access to the hmaps in the global lflow_state structure, and it's also intended to protect all ovn_datapaths' od_group hmaps. This is not something that is immediately obvious just from a global rwlock declaration.
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
>>>>>>>>> +                              struct ovn_datapath *od,
>>>>>>>>> +                              struct lflow_state *lflow_map,
>>>>>>>>> +                              uint32_t hash)
>>>>>>>>> +{
>>>>>>>>> +    hmapx_add(&old_lflow->od_group, od);
>>>>>>>>> +    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
>>>>>>>>> +    if (use_parallel_build) {
>>>>>>>>> + hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>>> +    } else {
>>>>>>>>> + hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
>>>>>>>>> +    }
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +#ifdef __clang__
>>>>>>>>> +#pragma clang diagnostic push
>>>>>>>>> +#pragma clang diagnostic ignored "-Wthread-safety"
>>>>>>>>> +#endif
>>>>>>>> The section above needs a comment to explain why -Wthread-safety is ignored.
>>>>>>> Please, use OVS_NO_THREAD_SAFETY_ANALYSIS marker instead of manually disabling
>>>>>>> the diagnostics.
>>>>>>>
>>>>>>>
>>>>>>> On a side note, I ran some tests with this patch set applied and it drops
>>>>>>> performance of non-parallel case by 20% on my laptop with databases from
>>>>>>> the ovn-heater's 120 node density test.
>>>>>>>
>>>>>>> I see, you mentioned 10% improvement in the commit message. Assuming it's
>>>>>>> with parallelization enabled, right?
>>>>>> Yes. And no difference on heater tests up to 36 fake nodes with 7200 ports.
>>>>>>
>>>>>>> In any case, some performance testing without parallelization required before
>>>>>>> accepting the change, as it's a default configuration.
>>>>>> They were done. I am somewhat surprised by your measurement, can you please provide some more details.
>>>> I have databases from a previous 120 node ovn-heater run.
>>>> I'm loading them to the OVN sandbox like this:
>>>>
>>>> make sandbox SANDBOXFLAGS="--nbdb-source=/tmp/ovnnb_db.db --sbdb-source=/tmp/ovnsb_db.db"
>>>>
>>>> And performing a couple of small operations with northbound database just to
>>>> trigger the northd processing loop.
>>>>
>>>> Configuration has dp-groups enabled and parallelization disabled.
>>> Can you run v7 versus that. It should fix 3 performance regressions I found in v6. Each of them is relatively small, but they may add up for a large database.
>> v7 seems only slightly better.  By a couple of percents.
>> Nowhere close to the current master.
>>
>> And stopwatches in v7 are still messed up.
>>
>>> I will run an ovn-heater with this, but to be honest - I do not expect any difference. I did not see any difference before at the scales I was testing.
>>>
>>> If it is still slower than the original code, it will be interesting to find the culprit. They should be nearly identical.
>>>
>>> A.
>>>
>>>>>> You are saying "database from 120 nodes" - are we talking a stopwatch measurement here?
>>>>> Which is the case. The patch moved it where it should not be. Will be fixed in the next revision.
>>>> Not only stopwatches.  Poll intervals in general grew too.
>>>>
>>>>> A.
>>>>>
>>>>>>> Best regards, Ilya Maximets.
>>>>>>>
>>
diff mbox series

Patch

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 1f903882b..4537c55dd 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@ 
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4372,7 +4373,26 @@  ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
 static bool use_logical_dp_groups = false;
 static bool use_parallel_build = true;
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
+
+static void ovn_make_multi_lflow(struct ovn_lflow *old_lflow,
+                              struct ovn_datapath *od,
+                              struct lflow_state *lflow_map,
+                              uint32_t hash)
+{
+    hmapx_add(&old_lflow->od_group, od);
+    hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
+    if (use_parallel_build) {
+        hmap_insert_fast(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
+    } else {
+        hmap_insert(&lflow_map->multiple_od, &old_lflow->hmap_node, hash);
+    }
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif
 
 /* Adds a row with the specified contents to the Logical_Flow table.
  * Version to use when locking is required.
@@ -4388,55 +4408,127 @@  do_ovn_lflow_add(struct lflow_state *lflow_map, struct ovn_datapath *od,
     struct ovn_lflow *old_lflow;
     struct ovn_lflow *lflow;
 
+    /* Fast Path.
+     * See if we can get away without writing - grab a rdlock and check
+     * if we can get away with as little work as possible.
+     */
+
     if (use_logical_dp_groups) {
-        old_lflow = do_ovn_lflow_find(&lflow_map->single_od, NULL, stage,
-                                      priority, match,
+        if (use_parallel_build) {
+            ovs_rwlock_rdlock(&flowtable_lock);
+        }
+        old_lflow = do_ovn_lflow_find(&lflow_map->single_od,
+                                      NULL, stage, priority, match,
                                       actions, ctrl_meter, hash);
         if (old_lflow) {
-            hmapx_add(&old_lflow->od_group, od);
-            /* Found, different, od count went up. Move to multiple od. */
-            if (hmapx_count(&old_lflow->od_group) > 1) {
-                hmap_remove(&lflow_map->single_od, &old_lflow->hmap_node);
+            if (!hmapx_contains(&old_lflow->od_group, od)) {
+                /* od not in od_group, we need to add it and move to
+                 * multiple. */
                 if (use_parallel_build) {
-                    hmap_insert_fast(&lflow_map->multiple_od,
-                                     &old_lflow->hmap_node, hash);
-                } else {
-                    hmap_insert(&lflow_map->multiple_od,
-                                &old_lflow->hmap_node, hash);
+                    /* Upgrade the lock to write, we are likely to
+                     * modify data. */
+                    ovs_rwlock_unlock(&flowtable_lock);
+                    ovs_rwlock_wrlock(&flowtable_lock);
+
+                    /* Check if someone got ahead of us and the flow is already
+                     * in multiple. */
+                    if (!hmap_contains(&lflow_map->single_od,
+                                       &old_lflow->hmap_node)) {
+                        /* Someone did get ahead of us, add the od to the
+                         * group. */
+                        hmapx_add(&old_lflow->od_group, od);
+                        goto done_update_unlock;
+                    }
                 }
+                ovn_make_multi_lflow(old_lflow, od, lflow_map, hash);
+                goto done_update_unlock;
             }
-        } else {
-            /* Not found, lookup in multiple od. */
+        }
+        if (!old_lflow) {
+            /* Not found in single, lookup in multiple od. */
             old_lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
                                           stage, priority, match,
                                           actions, ctrl_meter, hash);
             if (old_lflow) {
-                hmapx_add(&old_lflow->od_group, od);
+                if (!hmapx_contains(&old_lflow->od_group, od)) {
+                    if (use_parallel_build) {
+                        /* Upgrade lock to write.*/
+                        ovs_rwlock_unlock(&flowtable_lock);
+                        ovs_rwlock_wrlock(&flowtable_lock);
+                    }
+                    hmapx_add(&old_lflow->od_group, od);
+                }
             }
         }
+done_update_unlock:
+        if (use_parallel_build) {
+            ovs_rwlock_unlock(&flowtable_lock);
+        }
         if (old_lflow) {
             return old_lflow;
         }
     }
 
-    lflow = xmalloc(sizeof *lflow);
-    /* While adding new logical flows we're not setting single datapath, but
-     * collecting a group.  'od' will be updated later for all flows with only
-     * one datapath in a group, so it could be hashed correctly. */
-    ovn_lflow_init(lflow, NULL, stage, priority,
-                   xstrdup(match), xstrdup(actions),
-                   io_port ? xstrdup(io_port) : NULL,
-                   nullable_xstrdup(ctrl_meter),
-                   ovn_lflow_hint(stage_hint), where);
-    hmapx_add(&lflow->od_group, od);
-
-    /* Insert "fresh" lflows into single_od. */
+    /* Slow Path.
+     * We could not get away with minimal mostly ro amount of work,
+     * lock with rw and try to do an insert (may end up repeating
+     * some of what we do for ro). */
 
+    if (use_logical_dp_groups && use_parallel_build) {
+        ovs_rwlock_wrlock(&flowtable_lock);
+    }
     if (!use_parallel_build) {
+        lflow = xmalloc(sizeof *lflow);
+        /* While adding new logical flows we are not setting single datapath,
+         * but collecting a group.  'od' will be updated later for all flows
+         * with only one datapath in a group, so it could be hashed correctly.
+         */
+        ovn_lflow_init(lflow, NULL, stage, priority,
+                       xstrdup(match), xstrdup(actions),
+                       io_port ? xstrdup(io_port) : NULL,
+                       nullable_xstrdup(ctrl_meter),
+                       ovn_lflow_hint(stage_hint), where);
+        hmapx_add(&lflow->od_group, od);
         hmap_insert(&lflow_map->single_od, &lflow->hmap_node, hash);
     } else {
+        if (use_logical_dp_groups) {
+            /* Search again in case someone else got here first. */
+            lflow = do_ovn_lflow_find(&lflow_map->single_od,
+                                          NULL, stage, priority, match,
+                                          actions, ctrl_meter, hash);
+            if (lflow) {
+                if (!hmapx_contains(&lflow->od_group, od)) {
+                    ovn_make_multi_lflow(lflow, od, lflow_map, hash);
+                }
+                goto done_add_unlock;
+            }
+            /* Unlikely, but possible, more than one thread got here
+             * ahead of us while we were wating to acquire a write lock. */
+            lflow = do_ovn_lflow_find(&lflow_map->multiple_od, NULL,
+                                          stage, priority, match,
+                                          actions, ctrl_meter, hash);
+            if (lflow) {
+                hmapx_add(&lflow->od_group, od);
+                goto done_add_unlock;
+            }
+        }
+        lflow = xmalloc(sizeof *lflow);
+        /* While adding new logical flows we're not setting single datapath,
+         * but collecting a group.  'od' will be updated later for all
+         * flows with only * one datapath in a group, so it could be hashed
+         * correctly. */
+        ovn_lflow_init(lflow, NULL, stage, priority,
+                       xstrdup(match), xstrdup(actions),
+                       io_port ? xstrdup(io_port) : NULL,
+                       nullable_xstrdup(ctrl_meter),
+                       ovn_lflow_hint(stage_hint), where);
+        hmapx_add(&lflow->od_group, od);
         hmap_insert_fast(&lflow_map->single_od, &lflow->hmap_node, hash);
     }
+done_add_unlock:
+    if (use_logical_dp_groups && use_parallel_build) {
+        ovs_rwlock_unlock(&flowtable_lock);
+    }
     return lflow;
 }
 
@@ -4449,22 +4541,16 @@  ovn_lflow_add_at_with_hash(struct lflow_state *lflow_map,
                            const struct ovsdb_idl_row *stage_hint,
                            const char *where, uint32_t hash)
 {
-    struct ovn_lflow *lflow;
-
     ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
-    if (use_logical_dp_groups && use_parallel_build) {
-        lock_hash_row(&lflow_locks, hash);
-        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
+    return do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
                                  actions, io_port, stage_hint, where,
                                  ctrl_meter);
-        unlock_hash_row(&lflow_locks, hash);
-    } else {
-        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
-                         actions, io_port, stage_hint, where, ctrl_meter);
-    }
-    return lflow;
 }
 
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 static void
 ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
@@ -4483,19 +4569,29 @@  ovn_lflow_add_at(struct lflow_state *lflow_map, struct ovn_datapath *od,
                                io_port, ctrl_meter, stage_hint, where, hash);
 }
 
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wthread-safety"
+#endif
+
 static bool
 ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
-                                struct ovn_datapath *od,
-                                uint32_t hash)
+                                struct ovn_datapath *od)
 {
     if (!use_logical_dp_groups || !lflow_ref) {
         return false;
     }
 
     if (use_parallel_build) {
-        lock_hash_row(&lflow_locks, hash);
-        hmapx_add(&lflow_ref->od_group, od);
-        unlock_hash_row(&lflow_locks, hash);
+        ovs_rwlock_rdlock(&flowtable_lock);
+        if (!hmapx_contains(&lflow_ref->od_group, od)) {
+            /* Upgrade lock to write. */
+            ovs_rwlock_unlock(&flowtable_lock);
+            ovs_rwlock_wrlock(&flowtable_lock);
+            hmapx_add(&lflow_ref->od_group, od);
+        }
+        ovs_rwlock_unlock(&flowtable_lock);
     } else {
         hmapx_add(&lflow_ref->od_group, od);
     }
@@ -4503,6 +4599,11 @@  ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
     return true;
 }
 
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+
 /* Adds a row with the specified contents to the Logical_Flow table. */
 #define ovn_lflow_add_with_hint__(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
                                   ACTIONS, IN_OUT_PORT, CTRL_METER, \
@@ -4594,15 +4695,9 @@  hmap_safe_remove(struct hmap *hmap, struct hmap_node *node)
 static void
 remove_lflow_from_lflows(struct lflow_state *lflows, struct ovn_lflow *lflow)
 {
-    if (use_logical_dp_groups && use_parallel_build) {
-        lock_hash_row(&lflow_locks, lflow->hmap_node.hash);
-    }
     if (!hmap_safe_remove(&lflows->multiple_od, &lflow->hmap_node)) {
         hmap_remove(&lflows->single_od, &lflow->hmap_node);
     }
-    if (use_logical_dp_groups && use_parallel_build) {
-        unlock_hash_row(&lflow_locks, lflow->hmap_node.hash);
-    }
 }
 
 static void
@@ -6511,7 +6606,7 @@  build_lb_rules(struct lflow_state *lflows, struct ovn_northd_lb *lb,
             if (reject) {
                 meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
                                        meter_groups);
-            } else if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
+            } else if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
@@ -9565,7 +9660,7 @@  build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb *lb,
                 ds_cstr(match), ds_cstr(&defrag_actions));
         for (size_t j = 0; j < lb->n_nb_lr; j++) {
             struct ovn_datapath *od = lb->nb_lr[j];
-            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
+            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
@@ -9629,7 +9724,7 @@  build_lflows_for_unreachable_vips(struct ovn_northd_lb *lb,
                 continue;
             }
 
-            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od, hash)) {
+            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
@@ -13081,7 +13176,7 @@  build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
         }
     }
 
-    if (use_parallel_build && (!use_logical_dp_groups)) {
+    if (use_parallel_build) {
         struct lflow_state *lflow_segs;
         struct lswitch_flow_build_info *lsiv;
         int index;
@@ -13319,6 +13414,8 @@  reconcile_lflow(struct ovn_lflow *lflow, struct northd_context *ctx,
     ovn_lflow_destroy(lflows, lflow);
 }
 
+static bool needs_init = true;
+
 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
  * constructing their contents based on the OVN_NB database. */
 static void
@@ -13333,8 +13430,15 @@  build_lflows(struct northd_context *ctx, struct hmap *datapaths,
 
     fast_hmap_size_for(&lflows.single_od, max_seen_lflow_size);
     fast_hmap_size_for(&lflows.multiple_od, max_seen_lflow_size);
-    if (use_parallel_build && use_logical_dp_groups) {
-        update_hashrow_locks(&lflows.single_od, &lflow_locks);
+    if (use_parallel_build && use_logical_dp_groups && needs_init) {
+        ovs_rwlock_init(&flowtable_lock);
+        /* This happens on first run with parallel+dp_groups.
+         * db_run will re-read use_parallel_build from config and
+         * reset it. This way we get correct sizing for
+         * parallel + dp_groups by doing one single threaded run
+         * on the first iteration. */
+        use_parallel_build = false;
+        needs_init = false;
     }
 
 
@@ -15293,7 +15397,6 @@  main(int argc, char *argv[])
 
     daemonize_complete();
 
-    init_hash_row_locks(&lflow_locks);
     use_parallel_build = can_parallelize_hashes(false);
 
     /* We want to detect (almost) all changes to the ovn-nb db. */