diff mbox series

[ovs-dev,v4,5/5] controller: Improve ct zone handling.

Message ID 20210802204913.2548081-1-numans@ovn.org
State Accepted
Headers show
Series pflow_output and ct_zone engine improvements. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Numan Siddique Aug. 2, 2021, 8:49 p.m. UTC
From: Numan Siddique <numans@ovn.org>

Prior to this patch, ovn-controller generates a zone id for each
OVS interface which has external_ids:iface-id set even if there is
no corresponding logical port for it.  This patch now changes the
way we allocate the zone id.  A zone id is allocated only if
there is an OVS interface with external_ids:iface-id and the
corresponding logical port is claimed.  We use the runtime data
(rt_data->lbinding_data.lports) for this.

This patch also improves the ct_zones_runtime_data_handler()
by using the tracked datapath data to allocate the zone ids
for newly claimed lports or to free up the zone id for
the released lports instead of triggering a full recompute.

And finally this patch also adds a ct zone handler for pflow_output
engine.  This handler falls back to recompute if the ct zone engine
was recomputed.  Otherwise it returns true.

Acked-by: Han Zhou <hzhou@ovn.org>
Reviewed-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
---
 controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
 lib/inc-proc-eng.h          |   4 ++
 tests/ovn.at                |   2 +-
 3 files changed, 108 insertions(+), 33 deletions(-)

Comments

Odintsov Vladislav Aug. 20, 2021, 11:12 p.m. UTC | #1
Hi Numan,

It looks like this patch introduces a regression.
In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
Debugging my problem I’ve found next problem:

If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.

Recompute is also solves a problem in my setup.
I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
I tried to look through the code but couldn’t understand the reason for this behaviour.

Can you please take a look if there can be a possible bug?

[1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf

Regards,
Vladislav Odintsov

> On 2 Aug 2021, at 23:49, numans@ovn.org wrote:
> 
> From: Numan Siddique <numans@ovn.org>
> 
> Prior to this patch, ovn-controller generates a zone id for each
> OVS interface which has external_ids:iface-id set even if there is
> no corresponding logical port for it.  This patch now changes the
> way we allocate the zone id.  A zone id is allocated only if
> there is an OVS interface with external_ids:iface-id and the
> corresponding logical port is claimed.  We use the runtime data
> (rt_data->lbinding_data.lports) for this.
> 
> This patch also improves the ct_zones_runtime_data_handler()
> by using the tracked datapath data to allocate the zone ids
> for newly claimed lports or to free up the zone id for
> the released lports instead of triggering a full recompute.
> 
> And finally this patch also adds a ct zone handler for pflow_output
> engine.  This handler falls back to recompute if the ct zone engine
> was recomputed.  Otherwise it returns true.
> 
> Acked-by: Han Zhou <hzhou@ovn.org>
> Reviewed-by: Mark Michelson <mmichels@redhat.com>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
> controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
> lib/inc-proc-eng.h          |   4 ++
> tests/ovn.at                |   2 +-
> 3 files changed, 108 insertions(+), 33 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index d69147c04..ee28f2612 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
>     }
> }
> 
> +static bool
> +alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
> +                    unsigned long *ct_zone_bitmap, int *scan_start,
> +                    struct shash *pending_ct_zones)
> +{
> +    /* We assume that there are 64K zones and that we own them all. */
> +    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
> +    if (zone == MAX_CT_ZONES + 1) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_WARN_RL(&rl, "exhausted all ct zones");
> +        return false;
> +    }
> +
> +    *scan_start = zone + 1;
> +
> +    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> +                              zone, true, zone_name);
> +
> +    bitmap_set1(ct_zone_bitmap, zone);
> +    simap_put(ct_zones, zone_name, zone);
> +    return true;
> +}
> +
> static void
> -update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> +update_ct_zones(const struct shash *binding_lports,
> +                const struct hmap *local_datapaths,
>                 struct simap *ct_zones, unsigned long *ct_zone_bitmap,
>                 struct shash *pending_ct_zones)
> {
> @@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
>     struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
>     unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
> 
> -    SSET_FOR_EACH(user, lports) {
> -        sset_add(&all_users, user);
> +    struct shash_node *shash_node;
> +    SHASH_FOR_EACH (shash_node, binding_lports) {
> +        sset_add(&all_users, shash_node->name);
>     }
> 
>     /* Local patched datapath (gateway routers) need zones assigned. */
> @@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
>     /* Assign a unique zone id for each logical port and two zones
>      * to a gateway router. */
>     SSET_FOR_EACH(user, &all_users) {
> -        int zone;
> -
>         if (simap_contains(ct_zones, user)) {
>             continue;
>         }
> 
> -        /* We assume that there are 64K zones and that we own them all. */
> -        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
> -        if (zone == MAX_CT_ZONES + 1) {
> -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> -            VLOG_WARN_RL(&rl, "exhausted all ct zones");
> -            break;
> -        }
> -        scan_start = zone + 1;
> -
> -        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> -                                  zone, true, user);
> -
> -        bitmap_set1(ct_zone_bitmap, zone);
> -        simap_put(ct_zones, user, zone);
> +        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
> +                            pending_ct_zones);
>     }
> 
>     simap_destroy(&req_snat_zones);
> @@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
>     unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
>     struct shash pending;
>     struct simap current;
> +
> +    /* Tracked data. */
> +    bool recomputed;
> };
> 
> static void *
> @@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
>     return data;
> }
> 
> +static void
> +en_ct_zones_clear_tracked_data(void *data_)
> +{
> +    struct ed_type_ct_zones *data = data_;
> +    data->recomputed = false;
> +}
> +
> static void
> en_ct_zones_cleanup(void *data)
> {
> @@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
>     struct ed_type_runtime_data *rt_data =
>         engine_get_input_data("runtime_data", node);
> 
> -    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
> +    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
>                     &ct_zones_data->current, ct_zones_data->bitmap,
>                     &ct_zones_data->pending);
> 
> 
> +    ct_zones_data->recomputed = true;
>     engine_set_node_state(node, EN_UPDATED);
> }
> 
> @@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> }
> 
> static bool
> -ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> +ct_zones_runtime_data_handler(struct engine_node *node, void *data)
> {
>     struct ed_type_runtime_data *rt_data =
>         engine_get_input_data("runtime_data", node);
> @@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
>         return false;
>     }
> 
> -    /* If local_lports have changed then fall back to full recompute. */
> -    if (rt_data->local_lports_changed) {
> -        return false;
> -    }
> +    struct ed_type_ct_zones *ct_zones_data = data;
> 
>     struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
>     struct tracked_datapath *tdp;
> +    int scan_start = 0;
> +
> +    bool updated = false;
> +
>     HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
>         if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
>             /* A new datapath has been added. Fall back to full recompute. */
>             return false;
>         }
> 
> -        /* When an lport is claimed or released because of port binding,
> -         * changes we don't have to compute the ct zone entries for these.
> -         * That is because we generate the ct zone entries for each local
> -         * OVS interface which has external_ids:iface-id set.  For the local
> -         * OVS interface changes, rt_data->local_ports_changed will be true. */
> +        struct shash_node *shash_node;
> +        SHASH_FOR_EACH (shash_node, &tdp->lports) {
> +            struct tracked_lport *t_lport = shash_node->data;
> +            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
> +                if (!simap_contains(&ct_zones_data->current,
> +                                    t_lport->pb->logical_port)) {
> +                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
> +                                        &ct_zones_data->current,
> +                                        ct_zones_data->bitmap, &scan_start,
> +                                        &ct_zones_data->pending);
> +                    updated = true;
> +                }
> +            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
> +                struct simap_node *ct_zone =
> +                    simap_find(&ct_zones_data->current,
> +                               t_lport->pb->logical_port);
> +                if (ct_zone) {
> +                    add_pending_ct_zone_entry(
> +                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
> +                        ct_zone->data, false, ct_zone->name);
> +
> +                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
> +                    simap_delete(&ct_zones_data->current, ct_zone);
> +                    updated = true;
> +                }
> +            } else {
> +                OVS_NOT_REACHED();
> +            }
> +        }
> +    }
> +
> +    if (updated) {
> +        engine_set_node_state(node, EN_UPDATED);
>     }
> 
>     return true;
> @@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
>     return true;
> }
> 
> +static bool
> +pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
> +                                    void *data OVS_UNUSED)
> +{
> +    struct ed_type_ct_zones *ct_zones_data =
> +        engine_get_input_data("ct_zones", node);
> +
> +    /* If ct_zones engine node was recomputed, then fall back to full
> +     * recompute of pflow_output.  Otherwise there is no need to do
> +     * anything for the following reasons:
> +     *   - When an lport is claimed, ct zone handler for the
> +     *     runtime_data handler allocates the zone id for the lport (and it is
> +     *     saved in the br-int external_ids).
> +     *   - pflow_output handler for the runtime data  adds
> +     *     the physical flows for the claimed lport.
> +     * */
> +    return !ct_zones_data->recomputed;
> +}
> +
> static void *
> en_flow_output_init(struct engine_node *node OVS_UNUSED,
>                     struct engine_arg *arg OVS_UNUSED)
> @@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
>     stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
> 
>     /* Define inc-proc-engine nodes. */
> -    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
> +    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
>     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
>     ENGINE_NODE(non_vif_data, "non_vif_data");
>     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> @@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
>      */
>     engine_add_input(&en_pflow_output, &en_non_vif_data,
>                      NULL);
> -    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
> +    engine_add_input(&en_pflow_output, &en_ct_zones,
> +                     pflow_output_ct_zones_handler);
>     engine_add_input(&en_pflow_output, &en_sb_chassis,
>                      pflow_lflow_output_sb_chassis_handler);
> 
> diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> index 7e9f5bb70..859b30a71 100644
> --- a/lib/inc-proc-eng.h
> +++ b/lib/inc-proc-eng.h
> @@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
> #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
>     ENGINE_NODE_DEF(NAME, NAME_STR)
> 
> +#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
> +    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> +    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
> +
> #define ENGINE_NODE(NAME, NAME_STR) \
>     static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
>     ENGINE_NODE_DEF(NAME, NAME_STR)
> diff --git a/tests/ovn.at b/tests/ovn.at
> index a207f5e12..7ae136ad9 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
> reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
> 
> p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
> -AT_CHECK([test ! -z $p1_zoneid])
> +AT_CHECK([test -z $p1_zoneid])
> 
> OVN_CLEANUP([hv1])
> AT_CLEANUP
> -- 
> 2.31.1
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique Aug. 20, 2021, 11:28 p.m. UTC | #2
On Fri, Aug 20, 2021 at 7:19 PM Odintsov Vladislav <VlOdintsov@croc.ru> wrote:
>
> Hi Numan,
>
> It looks like this patch introduces a regression.
> In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
> Debugging my problem I’ve found next problem:
>
> If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
> If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.
>
> Recompute is also solves a problem in my setup.
> I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
> I tried to look through the code but couldn’t understand the reason for this behaviour.
>
> Can you please take a look if there can be a possible bug?
>
> [1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf

Hi Vladislav,

Thanks for reporting this.  If recompute solves your problem then
definitely there is an I-P bug.

Can you give a bit more details ?  Is this LR a normal router ? or
gateway router ?
Is there a gateway router port associated with the LR ?

Is there a script to reproduce the issue ?  I'll try to reproduce this locally.

Are you using OVN 21.06 ?  or master ?  Is it possible to try it out
with the master ?

Thanks
Numan

>
> Regards,
> Vladislav Odintsov
>
> > On 2 Aug 2021, at 23:49, numans@ovn.org wrote:
> >
> > From: Numan Siddique <numans@ovn.org>
> >
> > Prior to this patch, ovn-controller generates a zone id for each
> > OVS interface which has external_ids:iface-id set even if there is
> > no corresponding logical port for it.  This patch now changes the
> > way we allocate the zone id.  A zone id is allocated only if
> > there is an OVS interface with external_ids:iface-id and the
> > corresponding logical port is claimed.  We use the runtime data
> > (rt_data->lbinding_data.lports) for this.
> >
> > This patch also improves the ct_zones_runtime_data_handler()
> > by using the tracked datapath data to allocate the zone ids
> > for newly claimed lports or to free up the zone id for
> > the released lports instead of triggering a full recompute.
> >
> > And finally this patch also adds a ct zone handler for pflow_output
> > engine.  This handler falls back to recompute if the ct zone engine
> > was recomputed.  Otherwise it returns true.
> >
> > Acked-by: Han Zhou <hzhou@ovn.org>
> > Reviewed-by: Mark Michelson <mmichels@redhat.com>
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> > controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
> > lib/inc-proc-eng.h          |   4 ++
> > tests/ovn.at                |   2 +-
> > 3 files changed, 108 insertions(+), 33 deletions(-)
> >
> > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > index d69147c04..ee28f2612 100644
> > --- a/controller/ovn-controller.c
> > +++ b/controller/ovn-controller.c
> > @@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
> >     }
> > }
> >
> > +static bool
> > +alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
> > +                    unsigned long *ct_zone_bitmap, int *scan_start,
> > +                    struct shash *pending_ct_zones)
> > +{
> > +    /* We assume that there are 64K zones and that we own them all. */
> > +    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
> > +    if (zone == MAX_CT_ZONES + 1) {
> > +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > +        VLOG_WARN_RL(&rl, "exhausted all ct zones");
> > +        return false;
> > +    }
> > +
> > +    *scan_start = zone + 1;
> > +
> > +    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> > +                              zone, true, zone_name);
> > +
> > +    bitmap_set1(ct_zone_bitmap, zone);
> > +    simap_put(ct_zones, zone_name, zone);
> > +    return true;
> > +}
> > +
> > static void
> > -update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> > +update_ct_zones(const struct shash *binding_lports,
> > +                const struct hmap *local_datapaths,
> >                 struct simap *ct_zones, unsigned long *ct_zone_bitmap,
> >                 struct shash *pending_ct_zones)
> > {
> > @@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> >     struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
> >     unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
> >
> > -    SSET_FOR_EACH(user, lports) {
> > -        sset_add(&all_users, user);
> > +    struct shash_node *shash_node;
> > +    SHASH_FOR_EACH (shash_node, binding_lports) {
> > +        sset_add(&all_users, shash_node->name);
> >     }
> >
> >     /* Local patched datapath (gateway routers) need zones assigned. */
> > @@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> >     /* Assign a unique zone id for each logical port and two zones
> >      * to a gateway router. */
> >     SSET_FOR_EACH(user, &all_users) {
> > -        int zone;
> > -
> >         if (simap_contains(ct_zones, user)) {
> >             continue;
> >         }
> >
> > -        /* We assume that there are 64K zones and that we own them all. */
> > -        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
> > -        if (zone == MAX_CT_ZONES + 1) {
> > -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > -            VLOG_WARN_RL(&rl, "exhausted all ct zones");
> > -            break;
> > -        }
> > -        scan_start = zone + 1;
> > -
> > -        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> > -                                  zone, true, user);
> > -
> > -        bitmap_set1(ct_zone_bitmap, zone);
> > -        simap_put(ct_zones, user, zone);
> > +        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
> > +                            pending_ct_zones);
> >     }
> >
> >     simap_destroy(&req_snat_zones);
> > @@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
> >     unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
> >     struct shash pending;
> >     struct simap current;
> > +
> > +    /* Tracked data. */
> > +    bool recomputed;
> > };
> >
> > static void *
> > @@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
> >     return data;
> > }
> >
> > +static void
> > +en_ct_zones_clear_tracked_data(void *data_)
> > +{
> > +    struct ed_type_ct_zones *data = data_;
> > +    data->recomputed = false;
> > +}
> > +
> > static void
> > en_ct_zones_cleanup(void *data)
> > {
> > @@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
> >     struct ed_type_runtime_data *rt_data =
> >         engine_get_input_data("runtime_data", node);
> >
> > -    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
> > +    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
> >                     &ct_zones_data->current, ct_zones_data->bitmap,
> >                     &ct_zones_data->pending);
> >
> >
> > +    ct_zones_data->recomputed = true;
> >     engine_set_node_state(node, EN_UPDATED);
> > }
> >
> > @@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> > }
> >
> > static bool
> > -ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> > +ct_zones_runtime_data_handler(struct engine_node *node, void *data)
> > {
> >     struct ed_type_runtime_data *rt_data =
> >         engine_get_input_data("runtime_data", node);
> > @@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> >         return false;
> >     }
> >
> > -    /* If local_lports have changed then fall back to full recompute. */
> > -    if (rt_data->local_lports_changed) {
> > -        return false;
> > -    }
> > +    struct ed_type_ct_zones *ct_zones_data = data;
> >
> >     struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
> >     struct tracked_datapath *tdp;
> > +    int scan_start = 0;
> > +
> > +    bool updated = false;
> > +
> >     HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
> >         if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
> >             /* A new datapath has been added. Fall back to full recompute. */
> >             return false;
> >         }
> >
> > -        /* When an lport is claimed or released because of port binding,
> > -         * changes we don't have to compute the ct zone entries for these.
> > -         * That is because we generate the ct zone entries for each local
> > -         * OVS interface which has external_ids:iface-id set.  For the local
> > -         * OVS interface changes, rt_data->local_ports_changed will be true. */
> > +        struct shash_node *shash_node;
> > +        SHASH_FOR_EACH (shash_node, &tdp->lports) {
> > +            struct tracked_lport *t_lport = shash_node->data;
> > +            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
> > +                if (!simap_contains(&ct_zones_data->current,
> > +                                    t_lport->pb->logical_port)) {
> > +                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
> > +                                        &ct_zones_data->current,
> > +                                        ct_zones_data->bitmap, &scan_start,
> > +                                        &ct_zones_data->pending);
> > +                    updated = true;
> > +                }
> > +            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
> > +                struct simap_node *ct_zone =
> > +                    simap_find(&ct_zones_data->current,
> > +                               t_lport->pb->logical_port);
> > +                if (ct_zone) {
> > +                    add_pending_ct_zone_entry(
> > +                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
> > +                        ct_zone->data, false, ct_zone->name);
> > +
> > +                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
> > +                    simap_delete(&ct_zones_data->current, ct_zone);
> > +                    updated = true;
> > +                }
> > +            } else {
> > +                OVS_NOT_REACHED();
> > +            }
> > +        }
> > +    }
> > +
> > +    if (updated) {
> > +        engine_set_node_state(node, EN_UPDATED);
> >     }
> >
> >     return true;
> > @@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
> >     return true;
> > }
> >
> > +static bool
> > +pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
> > +                                    void *data OVS_UNUSED)
> > +{
> > +    struct ed_type_ct_zones *ct_zones_data =
> > +        engine_get_input_data("ct_zones", node);
> > +
> > +    /* If ct_zones engine node was recomputed, then fall back to full
> > +     * recompute of pflow_output.  Otherwise there is no need to do
> > +     * anything for the following reasons:
> > +     *   - When an lport is claimed, ct zone handler for the
> > +     *     runtime_data handler allocates the zone id for the lport (and it is
> > +     *     saved in the br-int external_ids).
> > +     *   - pflow_output handler for the runtime data  adds
> > +     *     the physical flows for the claimed lport.
> > +     * */
> > +    return !ct_zones_data->recomputed;
> > +}
> > +
> > static void *
> > en_flow_output_init(struct engine_node *node OVS_UNUSED,
> >                     struct engine_arg *arg OVS_UNUSED)
> > @@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
> >     stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
> >
> >     /* Define inc-proc-engine nodes. */
> > -    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
> > +    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
> >     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
> >     ENGINE_NODE(non_vif_data, "non_vif_data");
> >     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > @@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
> >      */
> >     engine_add_input(&en_pflow_output, &en_non_vif_data,
> >                      NULL);
> > -    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
> > +    engine_add_input(&en_pflow_output, &en_ct_zones,
> > +                     pflow_output_ct_zones_handler);
> >     engine_add_input(&en_pflow_output, &en_sb_chassis,
> >                      pflow_lflow_output_sb_chassis_handler);
> >
> > diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> > index 7e9f5bb70..859b30a71 100644
> > --- a/lib/inc-proc-eng.h
> > +++ b/lib/inc-proc-eng.h
> > @@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
> > #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> >     ENGINE_NODE_DEF(NAME, NAME_STR)
> >
> > +#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
> > +    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> > +    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
> > +
> > #define ENGINE_NODE(NAME, NAME_STR) \
> >     static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
> >     ENGINE_NODE_DEF(NAME, NAME_STR)
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index a207f5e12..7ae136ad9 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
> > reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
> >
> > p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
> > -AT_CHECK([test ! -z $p1_zoneid])
> > +AT_CHECK([test -z $p1_zoneid])
> >
> > OVN_CLEANUP([hv1])
> > AT_CLEANUP
> > --
> > 2.31.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique Aug. 20, 2021, 11:37 p.m. UTC | #3
On Fri, Aug 20, 2021 at 7:28 PM Numan Siddique <numans@ovn.org> wrote:
>
> On Fri, Aug 20, 2021 at 7:19 PM Odintsov Vladislav <VlOdintsov@croc.ru> wrote:
> >
> > Hi Numan,
> >
> > It looks like this patch introduces a regression.
> > In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
> > Debugging my problem I’ve found next problem:
> >
> > If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
> > If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.
> >
> > Recompute is also solves a problem in my setup.
> > I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
> > I tried to look through the code but couldn’t understand the reason for this behaviour.
> >
> > Can you please take a look if there can be a possible bug?
> >
> > [1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf
>
> Hi Vladislav,
>
> Thanks for reporting this.  If recompute solves your problem then
> definitely there is an I-P bug.
>
> Can you give a bit more details ?  Is this LR a normal router ? or
> gateway router ?
> Is there a gateway router port associated with the LR ?
>
> Is there a script to reproduce the issue ?  I'll try to reproduce this locally.
>
> Are you using OVN 21.06 ?  or master ?  Is it possible to try it out
> with the master ?

I was able to reproduce the issue.  I'll submit the fix.

Thanks
Numan

>
> Thanks
> Numan
>
> >
> > Regards,
> > Vladislav Odintsov
> >
> > > On 2 Aug 2021, at 23:49, numans@ovn.org wrote:
> > >
> > > From: Numan Siddique <numans@ovn.org>
> > >
> > > Prior to this patch, ovn-controller generates a zone id for each
> > > OVS interface which has external_ids:iface-id set even if there is
> > > no corresponding logical port for it.  This patch now changes the
> > > way we allocate the zone id.  A zone id is allocated only if
> > > there is an OVS interface with external_ids:iface-id and the
> > > corresponding logical port is claimed.  We use the runtime data
> > > (rt_data->lbinding_data.lports) for this.
> > >
> > > This patch also improves the ct_zones_runtime_data_handler()
> > > by using the tracked datapath data to allocate the zone ids
> > > for newly claimed lports or to free up the zone id for
> > > the released lports instead of triggering a full recompute.
> > >
> > > And finally this patch also adds a ct zone handler for pflow_output
> > > engine.  This handler falls back to recompute if the ct zone engine
> > > was recomputed.  Otherwise it returns true.
> > >
> > > Acked-by: Han Zhou <hzhou@ovn.org>
> > > Reviewed-by: Mark Michelson <mmichels@redhat.com>
> > > Signed-off-by: Numan Siddique <numans@ovn.org>
> > > ---
> > > controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
> > > lib/inc-proc-eng.h          |   4 ++
> > > tests/ovn.at                |   2 +-
> > > 3 files changed, 108 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> > > index d69147c04..ee28f2612 100644
> > > --- a/controller/ovn-controller.c
> > > +++ b/controller/ovn-controller.c
> > > @@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
> > >     }
> > > }
> > >
> > > +static bool
> > > +alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
> > > +                    unsigned long *ct_zone_bitmap, int *scan_start,
> > > +                    struct shash *pending_ct_zones)
> > > +{
> > > +    /* We assume that there are 64K zones and that we own them all. */
> > > +    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
> > > +    if (zone == MAX_CT_ZONES + 1) {
> > > +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > > +        VLOG_WARN_RL(&rl, "exhausted all ct zones");
> > > +        return false;
> > > +    }
> > > +
> > > +    *scan_start = zone + 1;
> > > +
> > > +    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> > > +                              zone, true, zone_name);
> > > +
> > > +    bitmap_set1(ct_zone_bitmap, zone);
> > > +    simap_put(ct_zones, zone_name, zone);
> > > +    return true;
> > > +}
> > > +
> > > static void
> > > -update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> > > +update_ct_zones(const struct shash *binding_lports,
> > > +                const struct hmap *local_datapaths,
> > >                 struct simap *ct_zones, unsigned long *ct_zone_bitmap,
> > >                 struct shash *pending_ct_zones)
> > > {
> > > @@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> > >     struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
> > >     unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
> > >
> > > -    SSET_FOR_EACH(user, lports) {
> > > -        sset_add(&all_users, user);
> > > +    struct shash_node *shash_node;
> > > +    SHASH_FOR_EACH (shash_node, binding_lports) {
> > > +        sset_add(&all_users, shash_node->name);
> > >     }
> > >
> > >     /* Local patched datapath (gateway routers) need zones assigned. */
> > > @@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> > >     /* Assign a unique zone id for each logical port and two zones
> > >      * to a gateway router. */
> > >     SSET_FOR_EACH(user, &all_users) {
> > > -        int zone;
> > > -
> > >         if (simap_contains(ct_zones, user)) {
> > >             continue;
> > >         }
> > >
> > > -        /* We assume that there are 64K zones and that we own them all. */
> > > -        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
> > > -        if (zone == MAX_CT_ZONES + 1) {
> > > -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> > > -            VLOG_WARN_RL(&rl, "exhausted all ct zones");
> > > -            break;
> > > -        }
> > > -        scan_start = zone + 1;
> > > -
> > > -        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> > > -                                  zone, true, user);
> > > -
> > > -        bitmap_set1(ct_zone_bitmap, zone);
> > > -        simap_put(ct_zones, user, zone);
> > > +        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
> > > +                            pending_ct_zones);
> > >     }
> > >
> > >     simap_destroy(&req_snat_zones);
> > > @@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
> > >     unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
> > >     struct shash pending;
> > >     struct simap current;
> > > +
> > > +    /* Tracked data. */
> > > +    bool recomputed;
> > > };
> > >
> > > static void *
> > > @@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
> > >     return data;
> > > }
> > >
> > > +static void
> > > +en_ct_zones_clear_tracked_data(void *data_)
> > > +{
> > > +    struct ed_type_ct_zones *data = data_;
> > > +    data->recomputed = false;
> > > +}
> > > +
> > > static void
> > > en_ct_zones_cleanup(void *data)
> > > {
> > > @@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
> > >     struct ed_type_runtime_data *rt_data =
> > >         engine_get_input_data("runtime_data", node);
> > >
> > > -    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
> > > +    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
> > >                     &ct_zones_data->current, ct_zones_data->bitmap,
> > >                     &ct_zones_data->pending);
> > >
> > >
> > > +    ct_zones_data->recomputed = true;
> > >     engine_set_node_state(node, EN_UPDATED);
> > > }
> > >
> > > @@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> > > }
> > >
> > > static bool
> > > -ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> > > +ct_zones_runtime_data_handler(struct engine_node *node, void *data)
> > > {
> > >     struct ed_type_runtime_data *rt_data =
> > >         engine_get_input_data("runtime_data", node);
> > > @@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> > >         return false;
> > >     }
> > >
> > > -    /* If local_lports have changed then fall back to full recompute. */
> > > -    if (rt_data->local_lports_changed) {
> > > -        return false;
> > > -    }
> > > +    struct ed_type_ct_zones *ct_zones_data = data;
> > >
> > >     struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
> > >     struct tracked_datapath *tdp;
> > > +    int scan_start = 0;
> > > +
> > > +    bool updated = false;
> > > +
> > >     HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
> > >         if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
> > >             /* A new datapath has been added. Fall back to full recompute. */
> > >             return false;
> > >         }
> > >
> > > -        /* When an lport is claimed or released because of port binding,
> > > -         * changes we don't have to compute the ct zone entries for these.
> > > -         * That is because we generate the ct zone entries for each local
> > > -         * OVS interface which has external_ids:iface-id set.  For the local
> > > -         * OVS interface changes, rt_data->local_ports_changed will be true. */
> > > +        struct shash_node *shash_node;
> > > +        SHASH_FOR_EACH (shash_node, &tdp->lports) {
> > > +            struct tracked_lport *t_lport = shash_node->data;
> > > +            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
> > > +                if (!simap_contains(&ct_zones_data->current,
> > > +                                    t_lport->pb->logical_port)) {
> > > +                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
> > > +                                        &ct_zones_data->current,
> > > +                                        ct_zones_data->bitmap, &scan_start,
> > > +                                        &ct_zones_data->pending);
> > > +                    updated = true;
> > > +                }
> > > +            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
> > > +                struct simap_node *ct_zone =
> > > +                    simap_find(&ct_zones_data->current,
> > > +                               t_lport->pb->logical_port);
> > > +                if (ct_zone) {
> > > +                    add_pending_ct_zone_entry(
> > > +                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
> > > +                        ct_zone->data, false, ct_zone->name);
> > > +
> > > +                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
> > > +                    simap_delete(&ct_zones_data->current, ct_zone);
> > > +                    updated = true;
> > > +                }
> > > +            } else {
> > > +                OVS_NOT_REACHED();
> > > +            }
> > > +        }
> > > +    }
> > > +
> > > +    if (updated) {
> > > +        engine_set_node_state(node, EN_UPDATED);
> > >     }
> > >
> > >     return true;
> > > @@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
> > >     return true;
> > > }
> > >
> > > +static bool
> > > +pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
> > > +                                    void *data OVS_UNUSED)
> > > +{
> > > +    struct ed_type_ct_zones *ct_zones_data =
> > > +        engine_get_input_data("ct_zones", node);
> > > +
> > > +    /* If ct_zones engine node was recomputed, then fall back to full
> > > +     * recompute of pflow_output.  Otherwise there is no need to do
> > > +     * anything for the following reasons:
> > > +     *   - When an lport is claimed, ct zone handler for the
> > > +     *     runtime_data handler allocates the zone id for the lport (and it is
> > > +     *     saved in the br-int external_ids).
> > > +     *   - pflow_output handler for the runtime data  adds
> > > +     *     the physical flows for the claimed lport.
> > > +     * */
> > > +    return !ct_zones_data->recomputed;
> > > +}
> > > +
> > > static void *
> > > en_flow_output_init(struct engine_node *node OVS_UNUSED,
> > >                     struct engine_arg *arg OVS_UNUSED)
> > > @@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
> > >     stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
> > >
> > >     /* Define inc-proc-engine nodes. */
> > > -    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
> > > +    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
> > >     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
> > >     ENGINE_NODE(non_vif_data, "non_vif_data");
> > >     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> > > @@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
> > >      */
> > >     engine_add_input(&en_pflow_output, &en_non_vif_data,
> > >                      NULL);
> > > -    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
> > > +    engine_add_input(&en_pflow_output, &en_ct_zones,
> > > +                     pflow_output_ct_zones_handler);
> > >     engine_add_input(&en_pflow_output, &en_sb_chassis,
> > >                      pflow_lflow_output_sb_chassis_handler);
> > >
> > > diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> > > index 7e9f5bb70..859b30a71 100644
> > > --- a/lib/inc-proc-eng.h
> > > +++ b/lib/inc-proc-eng.h
> > > @@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
> > > #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> > >     ENGINE_NODE_DEF(NAME, NAME_STR)
> > >
> > > +#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
> > > +    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> > > +    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
> > > +
> > > #define ENGINE_NODE(NAME, NAME_STR) \
> > >     static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
> > >     ENGINE_NODE_DEF(NAME, NAME_STR)
> > > diff --git a/tests/ovn.at b/tests/ovn.at
> > > index a207f5e12..7ae136ad9 100644
> > > --- a/tests/ovn.at
> > > +++ b/tests/ovn.at
> > > @@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
> > > reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
> > >
> > > p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
> > > -AT_CHECK([test ! -z $p1_zoneid])
> > > +AT_CHECK([test -z $p1_zoneid])
> > >
> > > OVN_CLEANUP([hv1])
> > > AT_CLEANUP
> > > --
> > > 2.31.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Odintsov Vladislav Aug. 21, 2021, 8:10 a.m. UTC | #4
Thanks Numan for the quick fix.

Though you’ve already sent a patch, just in case info:
I was using a normal distributed router, LRP with no gateway chassis.
Codebase - master branch.

Regards,
Vladislav Odintsov

On 21 Aug 2021, at 02:37, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:

On Fri, Aug 20, 2021 at 7:28 PM Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:

On Fri, Aug 20, 2021 at 7:19 PM Odintsov Vladislav <VlOdintsov@croc.ru<mailto:VlOdintsov@croc.ru>> wrote:

Hi Numan,

It looks like this patch introduces a regression.
In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
Debugging my problem I’ve found next problem:

If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.

Recompute is also solves a problem in my setup.
I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
I tried to look through the code but couldn’t understand the reason for this behaviour.

Can you please take a look if there can be a possible bug?

[1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf

Hi Vladislav,

Thanks for reporting this.  If recompute solves your problem then
definitely there is an I-P bug.

Can you give a bit more details ?  Is this LR a normal router ? or
gateway router ?
Is there a gateway router port associated with the LR ?

Is there a script to reproduce the issue ?  I'll try to reproduce this locally.

Are you using OVN 21.06 ?  or master ?  Is it possible to try it out
with the master ?

I was able to reproduce the issue.  I'll submit the fix.

Thanks
Numan


Thanks
Numan


Regards,
Vladislav Odintsov

On 2 Aug 2021, at 23:49, numans@ovn.org<mailto:numans@ovn.org> wrote:

From: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>>

Prior to this patch, ovn-controller generates a zone id for each
OVS interface which has external_ids:iface-id set even if there is
no corresponding logical port for it.  This patch now changes the
way we allocate the zone id.  A zone id is allocated only if
there is an OVS interface with external_ids:iface-id and the
corresponding logical port is claimed.  We use the runtime data
(rt_data->lbinding_data.lports) for this.

This patch also improves the ct_zones_runtime_data_handler()
by using the tracked datapath data to allocate the zone ids
for newly claimed lports or to free up the zone id for
the released lports instead of triggering a full recompute.

And finally this patch also adds a ct zone handler for pflow_output
engine.  This handler falls back to recompute if the ct zone engine
was recomputed.  Otherwise it returns true.

Acked-by: Han Zhou <hzhou@ovn.org<mailto:hzhou@ovn.org>>
Reviewed-by: Mark Michelson <mmichels@redhat.com<mailto:mmichels@redhat.com>>
Signed-off-by: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>>
---
controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
lib/inc-proc-eng.h          |   4 ++
tests/ovn.at<http://ovn.at>                |   2 +-
3 files changed, 108 insertions(+), 33 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index d69147c04..ee28f2612 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
   }
}

+static bool
+alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
+                    unsigned long *ct_zone_bitmap, int *scan_start,
+                    struct shash *pending_ct_zones)
+{
+    /* We assume that there are 64K zones and that we own them all. */
+    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
+    if (zone == MAX_CT_ZONES + 1) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "exhausted all ct zones");
+        return false;
+    }
+
+    *scan_start = zone + 1;
+
+    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
+                              zone, true, zone_name);
+
+    bitmap_set1(ct_zone_bitmap, zone);
+    simap_put(ct_zones, zone_name, zone);
+    return true;
+}
+
static void
-update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
+update_ct_zones(const struct shash *binding_lports,
+                const struct hmap *local_datapaths,
               struct simap *ct_zones, unsigned long *ct_zone_bitmap,
               struct shash *pending_ct_zones)
{
@@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
   struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
   unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];

-    SSET_FOR_EACH(user, lports) {
-        sset_add(&all_users, user);
+    struct shash_node *shash_node;
+    SHASH_FOR_EACH (shash_node, binding_lports) {
+        sset_add(&all_users, shash_node->name);
   }

   /* Local patched datapath (gateway routers) need zones assigned. */
@@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
   /* Assign a unique zone id for each logical port and two zones
    * to a gateway router. */
   SSET_FOR_EACH(user, &all_users) {
-        int zone;
-
       if (simap_contains(ct_zones, user)) {
           continue;
       }

-        /* We assume that there are 64K zones and that we own them all. */
-        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
-        if (zone == MAX_CT_ZONES + 1) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-            VLOG_WARN_RL(&rl, "exhausted all ct zones");
-            break;
-        }
-        scan_start = zone + 1;
-
-        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
-                                  zone, true, user);
-
-        bitmap_set1(ct_zone_bitmap, zone);
-        simap_put(ct_zones, user, zone);
+        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
+                            pending_ct_zones);
   }

   simap_destroy(&req_snat_zones);
@@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
   unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
   struct shash pending;
   struct simap current;
+
+    /* Tracked data. */
+    bool recomputed;
};

static void *
@@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
   return data;
}

+static void
+en_ct_zones_clear_tracked_data(void *data_)
+{
+    struct ed_type_ct_zones *data = data_;
+    data->recomputed = false;
+}
+
static void
en_ct_zones_cleanup(void *data)
{
@@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
   struct ed_type_runtime_data *rt_data =
       engine_get_input_data("runtime_data", node);

-    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
+    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
                   &ct_zones_data->current, ct_zones_data->bitmap,
                   &ct_zones_data->pending);


+    ct_zones_data->recomputed = true;
   engine_set_node_state(node, EN_UPDATED);
}

@@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
}

static bool
-ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
+ct_zones_runtime_data_handler(struct engine_node *node, void *data)
{
   struct ed_type_runtime_data *rt_data =
       engine_get_input_data("runtime_data", node);
@@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
       return false;
   }

-    /* If local_lports have changed then fall back to full recompute. */
-    if (rt_data->local_lports_changed) {
-        return false;
-    }
+    struct ed_type_ct_zones *ct_zones_data = data;

   struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
   struct tracked_datapath *tdp;
+    int scan_start = 0;
+
+    bool updated = false;
+
   HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
       if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
           /* A new datapath has been added. Fall back to full recompute. */
           return false;
       }

-        /* When an lport is claimed or released because of port binding,
-         * changes we don't have to compute the ct zone entries for these.
-         * That is because we generate the ct zone entries for each local
-         * OVS interface which has external_ids:iface-id set.  For the local
-         * OVS interface changes, rt_data->local_ports_changed will be true. */
+        struct shash_node *shash_node;
+        SHASH_FOR_EACH (shash_node, &tdp->lports) {
+            struct tracked_lport *t_lport = shash_node->data;
+            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
+                if (!simap_contains(&ct_zones_data->current,
+                                    t_lport->pb->logical_port)) {
+                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
+                                        &ct_zones_data->current,
+                                        ct_zones_data->bitmap, &scan_start,
+                                        &ct_zones_data->pending);
+                    updated = true;
+                }
+            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
+                struct simap_node *ct_zone =
+                    simap_find(&ct_zones_data->current,
+                               t_lport->pb->logical_port);
+                if (ct_zone) {
+                    add_pending_ct_zone_entry(
+                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
+                        ct_zone->data, false, ct_zone->name);
+
+                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
+                    simap_delete(&ct_zones_data->current, ct_zone);
+                    updated = true;
+                }
+            } else {
+                OVS_NOT_REACHED();
+            }
+        }
+    }
+
+    if (updated) {
+        engine_set_node_state(node, EN_UPDATED);
   }

   return true;
@@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
   return true;
}

+static bool
+pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
+                                    void *data OVS_UNUSED)
+{
+    struct ed_type_ct_zones *ct_zones_data =
+        engine_get_input_data("ct_zones", node);
+
+    /* If ct_zones engine node was recomputed, then fall back to full
+     * recompute of pflow_output.  Otherwise there is no need to do
+     * anything for the following reasons:
+     *   - When an lport is claimed, ct zone handler for the
+     *     runtime_data handler allocates the zone id for the lport (and it is
+     *     saved in the br-int external_ids).
+     *   - pflow_output handler for the runtime data  adds
+     *     the physical flows for the claimed lport.
+     * */
+    return !ct_zones_data->recomputed;
+}
+
static void *
en_flow_output_init(struct engine_node *node OVS_UNUSED,
                   struct engine_arg *arg OVS_UNUSED)
@@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
   stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);

   /* Define inc-proc-engine nodes. */
-    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
+    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
   ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
   ENGINE_NODE(non_vif_data, "non_vif_data");
   ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
@@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
    */
   engine_add_input(&en_pflow_output, &en_non_vif_data,
                    NULL);
-    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
+    engine_add_input(&en_pflow_output, &en_ct_zones,
+                     pflow_output_ct_zones_handler);
   engine_add_input(&en_pflow_output, &en_sb_chassis,
                    pflow_lflow_output_sb_chassis_handler);

diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 7e9f5bb70..859b30a71 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
#define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
   ENGINE_NODE_DEF(NAME, NAME_STR)

+#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
+    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
+    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
+
#define ENGINE_NODE(NAME, NAME_STR) \
   static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
   ENGINE_NODE_DEF(NAME, NAME_STR)
diff --git a/tests/ovn.at<http://ovn.at> b/tests/ovn.at<http://ovn.at>
index a207f5e12..7ae136ad9 100644
--- a/tests/ovn.at<http://ovn.at>
+++ b/tests/ovn.at<http://ovn.at>
@@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])

p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
-AT_CHECK([test ! -z $p1_zoneid])
+AT_CHECK([test -z $p1_zoneid])

OVN_CLEANUP([hv1])
AT_CLEANUP
--
2.31.1
Numan Siddique Aug. 23, 2021, 3:04 p.m. UTC | #5
On Sat, Aug 21, 2021 at 4:11 AM Odintsov Vladislav <VlOdintsov@croc.ru> wrote:
>
> Thanks Numan for the quick fix.
>
> Though you’ve already sent a patch, just in case info:
> I was using a normal distributed router, LRP with no gateway chassis.
> Codebase - master branch.

Thanks.  It would be great if you can test out the patch I submitted for review.

Thanks
Numan

>
> Regards,
> Vladislav Odintsov
>
> On 21 Aug 2021, at 02:37, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:
>
> On Fri, Aug 20, 2021 at 7:28 PM Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:
>
> On Fri, Aug 20, 2021 at 7:19 PM Odintsov Vladislav <VlOdintsov@croc.ru<mailto:VlOdintsov@croc.ru>> wrote:
>
> Hi Numan,
>
> It looks like this patch introduces a regression.
> In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
> Debugging my problem I’ve found next problem:
>
> If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
> If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.
>
> Recompute is also solves a problem in my setup.
> I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
> I tried to look through the code but couldn’t understand the reason for this behaviour.
>
> Can you please take a look if there can be a possible bug?
>
> [1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf
>
> Hi Vladislav,
>
> Thanks for reporting this.  If recompute solves your problem then
> definitely there is an I-P bug.
>
> Can you give a bit more details ?  Is this LR a normal router ? or
> gateway router ?
> Is there a gateway router port associated with the LR ?
>
> Is there a script to reproduce the issue ?  I'll try to reproduce this locally.
>
> Are you using OVN 21.06 ?  or master ?  Is it possible to try it out
> with the master ?
>
> I was able to reproduce the issue.  I'll submit the fix.
>
> Thanks
> Numan
>
>
> Thanks
> Numan
>
>
> Regards,
> Vladislav Odintsov
>
> On 2 Aug 2021, at 23:49, numans@ovn.org<mailto:numans@ovn.org> wrote:
>
> From: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>>
>
> Prior to this patch, ovn-controller generates a zone id for each
> OVS interface which has external_ids:iface-id set even if there is
> no corresponding logical port for it.  This patch now changes the
> way we allocate the zone id.  A zone id is allocated only if
> there is an OVS interface with external_ids:iface-id and the
> corresponding logical port is claimed.  We use the runtime data
> (rt_data->lbinding_data.lports) for this.
>
> This patch also improves the ct_zones_runtime_data_handler()
> by using the tracked datapath data to allocate the zone ids
> for newly claimed lports or to free up the zone id for
> the released lports instead of triggering a full recompute.
>
> And finally this patch also adds a ct zone handler for pflow_output
> engine.  This handler falls back to recompute if the ct zone engine
> was recomputed.  Otherwise it returns true.
>
> Acked-by: Han Zhou <hzhou@ovn.org<mailto:hzhou@ovn.org>>
> Reviewed-by: Mark Michelson <mmichels@redhat.com<mailto:mmichels@redhat.com>>
> Signed-off-by: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>>
> ---
> controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
> lib/inc-proc-eng.h          |   4 ++
> tests/ovn.at<http://ovn.at>                |   2 +-
> 3 files changed, 108 insertions(+), 33 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index d69147c04..ee28f2612 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
>    }
> }
>
> +static bool
> +alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
> +                    unsigned long *ct_zone_bitmap, int *scan_start,
> +                    struct shash *pending_ct_zones)
> +{
> +    /* We assume that there are 64K zones and that we own them all. */
> +    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
> +    if (zone == MAX_CT_ZONES + 1) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_WARN_RL(&rl, "exhausted all ct zones");
> +        return false;
> +    }
> +
> +    *scan_start = zone + 1;
> +
> +    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> +                              zone, true, zone_name);
> +
> +    bitmap_set1(ct_zone_bitmap, zone);
> +    simap_put(ct_zones, zone_name, zone);
> +    return true;
> +}
> +
> static void
> -update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
> +update_ct_zones(const struct shash *binding_lports,
> +                const struct hmap *local_datapaths,
>                struct simap *ct_zones, unsigned long *ct_zone_bitmap,
>                struct shash *pending_ct_zones)
> {
> @@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
>    struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
>    unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
>
> -    SSET_FOR_EACH(user, lports) {
> -        sset_add(&all_users, user);
> +    struct shash_node *shash_node;
> +    SHASH_FOR_EACH (shash_node, binding_lports) {
> +        sset_add(&all_users, shash_node->name);
>    }
>
>    /* Local patched datapath (gateway routers) need zones assigned. */
> @@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
>    /* Assign a unique zone id for each logical port and two zones
>     * to a gateway router. */
>    SSET_FOR_EACH(user, &all_users) {
> -        int zone;
> -
>        if (simap_contains(ct_zones, user)) {
>            continue;
>        }
>
> -        /* We assume that there are 64K zones and that we own them all. */
> -        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
> -        if (zone == MAX_CT_ZONES + 1) {
> -            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> -            VLOG_WARN_RL(&rl, "exhausted all ct zones");
> -            break;
> -        }
> -        scan_start = zone + 1;
> -
> -        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> -                                  zone, true, user);
> -
> -        bitmap_set1(ct_zone_bitmap, zone);
> -        simap_put(ct_zones, user, zone);
> +        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
> +                            pending_ct_zones);
>    }
>
>    simap_destroy(&req_snat_zones);
> @@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
>    unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
>    struct shash pending;
>    struct simap current;
> +
> +    /* Tracked data. */
> +    bool recomputed;
> };
>
> static void *
> @@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
>    return data;
> }
>
> +static void
> +en_ct_zones_clear_tracked_data(void *data_)
> +{
> +    struct ed_type_ct_zones *data = data_;
> +    data->recomputed = false;
> +}
> +
> static void
> en_ct_zones_cleanup(void *data)
> {
> @@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
>    struct ed_type_runtime_data *rt_data =
>        engine_get_input_data("runtime_data", node);
>
> -    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
> +    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
>                    &ct_zones_data->current, ct_zones_data->bitmap,
>                    &ct_zones_data->pending);
>
>
> +    ct_zones_data->recomputed = true;
>    engine_set_node_state(node, EN_UPDATED);
> }
>
> @@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
> }
>
> static bool
> -ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
> +ct_zones_runtime_data_handler(struct engine_node *node, void *data)
> {
>    struct ed_type_runtime_data *rt_data =
>        engine_get_input_data("runtime_data", node);
> @@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
>        return false;
>    }
>
> -    /* If local_lports have changed then fall back to full recompute. */
> -    if (rt_data->local_lports_changed) {
> -        return false;
> -    }
> +    struct ed_type_ct_zones *ct_zones_data = data;
>
>    struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
>    struct tracked_datapath *tdp;
> +    int scan_start = 0;
> +
> +    bool updated = false;
> +
>    HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
>        if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
>            /* A new datapath has been added. Fall back to full recompute. */
>            return false;
>        }
>
> -        /* When an lport is claimed or released because of port binding,
> -         * changes we don't have to compute the ct zone entries for these.
> -         * That is because we generate the ct zone entries for each local
> -         * OVS interface which has external_ids:iface-id set.  For the local
> -         * OVS interface changes, rt_data->local_ports_changed will be true. */
> +        struct shash_node *shash_node;
> +        SHASH_FOR_EACH (shash_node, &tdp->lports) {
> +            struct tracked_lport *t_lport = shash_node->data;
> +            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
> +                if (!simap_contains(&ct_zones_data->current,
> +                                    t_lport->pb->logical_port)) {
> +                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
> +                                        &ct_zones_data->current,
> +                                        ct_zones_data->bitmap, &scan_start,
> +                                        &ct_zones_data->pending);
> +                    updated = true;
> +                }
> +            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
> +                struct simap_node *ct_zone =
> +                    simap_find(&ct_zones_data->current,
> +                               t_lport->pb->logical_port);
> +                if (ct_zone) {
> +                    add_pending_ct_zone_entry(
> +                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
> +                        ct_zone->data, false, ct_zone->name);
> +
> +                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
> +                    simap_delete(&ct_zones_data->current, ct_zone);
> +                    updated = true;
> +                }
> +            } else {
> +                OVS_NOT_REACHED();
> +            }
> +        }
> +    }
> +
> +    if (updated) {
> +        engine_set_node_state(node, EN_UPDATED);
>    }
>
>    return true;
> @@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
>    return true;
> }
>
> +static bool
> +pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
> +                                    void *data OVS_UNUSED)
> +{
> +    struct ed_type_ct_zones *ct_zones_data =
> +        engine_get_input_data("ct_zones", node);
> +
> +    /* If ct_zones engine node was recomputed, then fall back to full
> +     * recompute of pflow_output.  Otherwise there is no need to do
> +     * anything for the following reasons:
> +     *   - When an lport is claimed, ct zone handler for the
> +     *     runtime_data handler allocates the zone id for the lport (and it is
> +     *     saved in the br-int external_ids).
> +     *   - pflow_output handler for the runtime data  adds
> +     *     the physical flows for the claimed lport.
> +     * */
> +    return !ct_zones_data->recomputed;
> +}
> +
> static void *
> en_flow_output_init(struct engine_node *node OVS_UNUSED,
>                    struct engine_arg *arg OVS_UNUSED)
> @@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
>    stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
>
>    /* Define inc-proc-engine nodes. */
> -    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
> +    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
>    ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
>    ENGINE_NODE(non_vif_data, "non_vif_data");
>    ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
> @@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
>     */
>    engine_add_input(&en_pflow_output, &en_non_vif_data,
>                     NULL);
> -    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
> +    engine_add_input(&en_pflow_output, &en_ct_zones,
> +                     pflow_output_ct_zones_handler);
>    engine_add_input(&en_pflow_output, &en_sb_chassis,
>                     pflow_lflow_output_sb_chassis_handler);
>
> diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
> index 7e9f5bb70..859b30a71 100644
> --- a/lib/inc-proc-eng.h
> +++ b/lib/inc-proc-eng.h
> @@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
> #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
>    ENGINE_NODE_DEF(NAME, NAME_STR)
>
> +#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
> +    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
> +    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
> +
> #define ENGINE_NODE(NAME, NAME_STR) \
>    static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
>    ENGINE_NODE_DEF(NAME, NAME_STR)
> diff --git a/tests/ovn.at<http://ovn.at> b/tests/ovn.at<http://ovn.at>
> index a207f5e12..7ae136ad9 100644
> --- a/tests/ovn.at<http://ovn.at>
> +++ b/tests/ovn.at<http://ovn.at>
> @@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
> reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
>
> p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
> -AT_CHECK([test ! -z $p1_zoneid])
> +AT_CHECK([test -z $p1_zoneid])
>
> OVN_CLEANUP([hv1])
> AT_CLEANUP
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org<mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Odintsov Vladislav Aug. 23, 2021, 8:31 p.m. UTC | #6
Numan, I think I found one more problem with incremental processing, but I’m not sure in which commit the problem was introduced.

I met this problem between LR and Transi Switch using ovn-ic, but I’ve managed to emulate behaviour without need to setup ovn-ic infrastructure.
Problem comes when tunnel_key is changed for LSP’s port binding, which has chassis redirect config. As I understand northd can change port_binding tunnel_key if "requested-tnl-id" != tunnel_id.

Steps to reproduce (without ovn-ic):

1. Create LR and LS
ovn-nbctl lr-add lr1
ovn-nbctl ls-add ls1

2. Connect them both:
ovn-nbctl lrp-add lr1 lrp1 00:00:00:00:00:01 10.0.0.1/24
ovn-nbctl lsp-add ls1 lsp1 -- lsp-set-type lsp1 router -- lsp-set-addresses lsp1 router -- lsp-set-options lsp1 router-port=lrp1

3. Set gateway-chassis for LRP:
ovn-nbctl lrp-set-gateway-chassis lrp1 $(hostname -s)

4. Emulate change of tunnel_key by ovn-ic request
ovn-appctl -t ovn-northd pause
ovn-sbctl get port_binding lsp1 tunnel_key  # check existing value
ovn-sbctl set port_binding lsp1 tunnel_key=<some another value>

5. Check OF in table 65
dp=$(ovn-sbctl get datapath $(ovn-sbctl get port_binding lsp1 datapath) tunnel_key)
dp=$(printf "0x%x" $dp)
ovs-ofctl dump-flows br-int table=65 | grep -i "metadata=${dp} " > flow_before_recompute # with empty space in the end

6. Call recompute and compare flow
ovn-appctl -t ovn-controller recompute
ovs-ofctl dump-flows br-int table=65 | grep -i "metadata=${dp} " > flow_after_recompute # with empty space in the end

There should be different value in load:<value>->NSX_NX_REG14[].


I’m not good in incremental processing codebase, but it seems like change in lsp port_binding, which has peer (and CR in nat addresses?), should also trigger physical_run function for its peer. In this case lrp1.

Please let me know your thoughts.


Regards,
Vladislav Odintsov

On 23 Aug 2021, at 18:04, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:

On Sat, Aug 21, 2021 at 4:11 AM Odintsov Vladislav <VlOdintsov@croc.ru<mailto:VlOdintsov@croc.ru>> wrote:

Thanks Numan for the quick fix.

Though you’ve already sent a patch, just in case info:
I was using a normal distributed router, LRP with no gateway chassis.
Codebase - master branch.

Thanks.  It would be great if you can test out the patch I submitted for review.

Thanks
Numan


Regards,
Vladislav Odintsov

On 21 Aug 2021, at 02:37, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org><mailto:numans@ovn.org>> wrote:

On Fri, Aug 20, 2021 at 7:28 PM Numan Siddique <numans@ovn.org<mailto:numans@ovn.org><mailto:numans@ovn.org>> wrote:

On Fri, Aug 20, 2021 at 7:19 PM Odintsov Vladislav <VlOdintsov@croc.ru<mailto:VlOdintsov@croc.ru><mailto:VlOdintsov@croc.ru>> wrote:

Hi Numan,

It looks like this patch introduces a regression.
In my scenario with this commit my quite complicated virtual topology is non-working and I have to call recompute to make things working.
Debugging my problem I’ve found next problem:

If we create LR and LRP, we can see that for this LRP a ct_zone id is allocated (ovn-appctl -t ovn-controller ct-zone-list).
If we call ovn-appctl -t ovn-controller recompute, this zone id is deleted from the list.

Recompute is also solves a problem in my setup.
I’ve tried to revert commit [1] and zone id for LRP was no longer allocated.
I tried to look through the code but couldn’t understand the reason for this behaviour.

Can you please take a look if there can be a possible bug?

[1] https://github.com/ovn-org/ovn/commit/6fb87aad8c353bda13910ecfccc0c8cfa5ac3fdf

Hi Vladislav,

Thanks for reporting this.  If recompute solves your problem then
definitely there is an I-P bug.

Can you give a bit more details ?  Is this LR a normal router ? or
gateway router ?
Is there a gateway router port associated with the LR ?

Is there a script to reproduce the issue ?  I'll try to reproduce this locally.

Are you using OVN 21.06 ?  or master ?  Is it possible to try it out
with the master ?

I was able to reproduce the issue.  I'll submit the fix.

Thanks
Numan


Thanks
Numan


Regards,
Vladislav Odintsov

On 2 Aug 2021, at 23:49, numans@ovn.org<mailto:numans@ovn.org><mailto:numans@ovn.org> wrote:

From: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org><mailto:numans@ovn.org>>

Prior to this patch, ovn-controller generates a zone id for each
OVS interface which has external_ids:iface-id set even if there is
no corresponding logical port for it.  This patch now changes the
way we allocate the zone id.  A zone id is allocated only if
there is an OVS interface with external_ids:iface-id and the
corresponding logical port is claimed.  We use the runtime data
(rt_data->lbinding_data.lports) for this.

This patch also improves the ct_zones_runtime_data_handler()
by using the tracked datapath data to allocate the zone ids
for newly claimed lports or to free up the zone id for
the released lports instead of triggering a full recompute.

And finally this patch also adds a ct zone handler for pflow_output
engine.  This handler falls back to recompute if the ct zone engine
was recomputed.  Otherwise it returns true.

Acked-by: Han Zhou <hzhou@ovn.org<mailto:hzhou@ovn.org><mailto:hzhou@ovn.org>>
Reviewed-by: Mark Michelson <mmichels@redhat.com<mailto:mmichels@redhat.com><mailto:mmichels@redhat.com>>
Signed-off-by: Numan Siddique <numans@ovn.org<mailto:numans@ovn.org><mailto:numans@ovn.org>>
---
controller/ovn-controller.c | 135 +++++++++++++++++++++++++++---------
lib/inc-proc-eng.h          |   4 ++
tests/ovn.at<http://ovn.at><http://ovn.at>                |   2 +-
3 files changed, 108 insertions(+), 33 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index d69147c04..ee28f2612 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -624,8 +624,32 @@ add_pending_ct_zone_entry(struct shash *pending_ct_zones,
  }
}

+static bool
+alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
+                    unsigned long *ct_zone_bitmap, int *scan_start,
+                    struct shash *pending_ct_zones)
+{
+    /* We assume that there are 64K zones and that we own them all. */
+    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
+    if (zone == MAX_CT_ZONES + 1) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "exhausted all ct zones");
+        return false;
+    }
+
+    *scan_start = zone + 1;
+
+    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
+                              zone, true, zone_name);
+
+    bitmap_set1(ct_zone_bitmap, zone);
+    simap_put(ct_zones, zone_name, zone);
+    return true;
+}
+
static void
-update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
+update_ct_zones(const struct shash *binding_lports,
+                const struct hmap *local_datapaths,
              struct simap *ct_zones, unsigned long *ct_zone_bitmap,
              struct shash *pending_ct_zones)
{
@@ -636,8 +660,9 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
  struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
  unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];

-    SSET_FOR_EACH(user, lports) {
-        sset_add(&all_users, user);
+    struct shash_node *shash_node;
+    SHASH_FOR_EACH (shash_node, binding_lports) {
+        sset_add(&all_users, shash_node->name);
  }

  /* Local patched datapath (gateway routers) need zones assigned. */
@@ -725,26 +750,12 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
  /* Assign a unique zone id for each logical port and two zones
   * to a gateway router. */
  SSET_FOR_EACH(user, &all_users) {
-        int zone;
-
      if (simap_contains(ct_zones, user)) {
          continue;
      }

-        /* We assume that there are 64K zones and that we own them all. */
-        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
-        if (zone == MAX_CT_ZONES + 1) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-            VLOG_WARN_RL(&rl, "exhausted all ct zones");
-            break;
-        }
-        scan_start = zone + 1;
-
-        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
-                                  zone, true, user);
-
-        bitmap_set1(ct_zone_bitmap, zone);
-        simap_put(ct_zones, user, zone);
+        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
+                            pending_ct_zones);
  }

  simap_destroy(&req_snat_zones);
@@ -1769,6 +1780,9 @@ struct ed_type_ct_zones {
  unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
  struct shash pending;
  struct simap current;
+
+    /* Tracked data. */
+    bool recomputed;
};

static void *
@@ -1791,6 +1805,13 @@ en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
  return data;
}

+static void
+en_ct_zones_clear_tracked_data(void *data_)
+{
+    struct ed_type_ct_zones *data = data_;
+    data->recomputed = false;
+}
+
static void
en_ct_zones_cleanup(void *data)
{
@@ -1807,11 +1828,12 @@ en_ct_zones_run(struct engine_node *node, void *data)
  struct ed_type_runtime_data *rt_data =
      engine_get_input_data("runtime_data", node);

-    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
+    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
                  &ct_zones_data->current, ct_zones_data->bitmap,
                  &ct_zones_data->pending);


+    ct_zones_data->recomputed = true;
  engine_set_node_state(node, EN_UPDATED);
}

@@ -1869,7 +1891,7 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
}

static bool
-ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
+ct_zones_runtime_data_handler(struct engine_node *node, void *data)
{
  struct ed_type_runtime_data *rt_data =
      engine_get_input_data("runtime_data", node);
@@ -1879,24 +1901,53 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
      return false;
  }

-    /* If local_lports have changed then fall back to full recompute. */
-    if (rt_data->local_lports_changed) {
-        return false;
-    }
+    struct ed_type_ct_zones *ct_zones_data = data;

  struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
  struct tracked_datapath *tdp;
+    int scan_start = 0;
+
+    bool updated = false;
+
  HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
      if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
          /* A new datapath has been added. Fall back to full recompute. */
          return false;
      }

-        /* When an lport is claimed or released because of port binding,
-         * changes we don't have to compute the ct zone entries for these.
-         * That is because we generate the ct zone entries for each local
-         * OVS interface which has external_ids:iface-id set.  For the local
-         * OVS interface changes, rt_data->local_ports_changed will be true. */
+        struct shash_node *shash_node;
+        SHASH_FOR_EACH (shash_node, &tdp->lports) {
+            struct tracked_lport *t_lport = shash_node->data;
+            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
+                if (!simap_contains(&ct_zones_data->current,
+                                    t_lport->pb->logical_port)) {
+                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
+                                        &ct_zones_data->current,
+                                        ct_zones_data->bitmap, &scan_start,
+                                        &ct_zones_data->pending);
+                    updated = true;
+                }
+            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
+                struct simap_node *ct_zone =
+                    simap_find(&ct_zones_data->current,
+                               t_lport->pb->logical_port);
+                if (ct_zone) {
+                    add_pending_ct_zone_entry(
+                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
+                        ct_zone->data, false, ct_zone->name);
+
+                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
+                    simap_delete(&ct_zones_data->current, ct_zone);
+                    updated = true;
+                }
+            } else {
+                OVS_NOT_REACHED();
+            }
+        }
+    }
+
+    if (updated) {
+        engine_set_node_state(node, EN_UPDATED);
  }

  return true;
@@ -2868,6 +2919,25 @@ pflow_output_runtime_data_handler(struct engine_node *node, void *data)
  return true;
}

+static bool
+pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
+                                    void *data OVS_UNUSED)
+{
+    struct ed_type_ct_zones *ct_zones_data =
+        engine_get_input_data("ct_zones", node);
+
+    /* If ct_zones engine node was recomputed, then fall back to full
+     * recompute of pflow_output.  Otherwise there is no need to do
+     * anything for the following reasons:
+     *   - When an lport is claimed, ct zone handler for the
+     *     runtime_data handler allocates the zone id for the lport (and it is
+     *     saved in the br-int external_ids).
+     *   - pflow_output handler for the runtime data  adds
+     *     the physical flows for the claimed lport.
+     * */
+    return !ct_zones_data->recomputed;
+}
+
static void *
en_flow_output_init(struct engine_node *node OVS_UNUSED,
                  struct engine_arg *arg OVS_UNUSED)
@@ -3123,7 +3193,7 @@ main(int argc, char *argv[])
  stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);

  /* Define inc-proc-engine nodes. */
-    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
+    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
  ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
  ENGINE_NODE(non_vif_data, "non_vif_data");
  ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
@@ -3164,7 +3234,8 @@ main(int argc, char *argv[])
   */
  engine_add_input(&en_pflow_output, &en_non_vif_data,
                   NULL);
-    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
+    engine_add_input(&en_pflow_output, &en_ct_zones,
+                     pflow_output_ct_zones_handler);
  engine_add_input(&en_pflow_output, &en_sb_chassis,
                   pflow_lflow_output_sb_chassis_handler);

diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 7e9f5bb70..859b30a71 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -302,6 +302,10 @@ void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
#define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
  ENGINE_NODE_DEF(NAME, NAME_STR)

+#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
+    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
+    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
+
#define ENGINE_NODE(NAME, NAME_STR) \
  static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
  ENGINE_NODE_DEF(NAME, NAME_STR)
diff --git a/tests/ovn.at<http://ovn.at><http://ovn.at> b/tests/ovn.at<http://ovn.at><http://ovn.at>
index a207f5e12..7ae136ad9 100644
--- a/tests/ovn.at<http://ovn.at><http://ovn.at>
+++ b/tests/ovn.at<http://ovn.at><http://ovn.at>
@@ -23527,7 +23527,7 @@ OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])

p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
-AT_CHECK([test ! -z $p1_zoneid])
+AT_CHECK([test -z $p1_zoneid])

OVN_CLEANUP([hv1])
AT_CLEANUP
--
2.31.1
diff mbox series

Patch

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index d69147c04..ee28f2612 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -624,8 +624,32 @@  add_pending_ct_zone_entry(struct shash *pending_ct_zones,
     }
 }
 
+static bool
+alloc_id_to_ct_zone(const char *zone_name, struct simap *ct_zones,
+                    unsigned long *ct_zone_bitmap, int *scan_start,
+                    struct shash *pending_ct_zones)
+{
+    /* We assume that there are 64K zones and that we own them all. */
+    int zone = bitmap_scan(ct_zone_bitmap, 0, *scan_start, MAX_CT_ZONES + 1);
+    if (zone == MAX_CT_ZONES + 1) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+        VLOG_WARN_RL(&rl, "exhausted all ct zones");
+        return false;
+    }
+
+    *scan_start = zone + 1;
+
+    add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
+                              zone, true, zone_name);
+
+    bitmap_set1(ct_zone_bitmap, zone);
+    simap_put(ct_zones, zone_name, zone);
+    return true;
+}
+
 static void
-update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
+update_ct_zones(const struct shash *binding_lports,
+                const struct hmap *local_datapaths,
                 struct simap *ct_zones, unsigned long *ct_zone_bitmap,
                 struct shash *pending_ct_zones)
 {
@@ -636,8 +660,9 @@  update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
     struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
     unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
 
-    SSET_FOR_EACH(user, lports) {
-        sset_add(&all_users, user);
+    struct shash_node *shash_node;
+    SHASH_FOR_EACH (shash_node, binding_lports) {
+        sset_add(&all_users, shash_node->name);
     }
 
     /* Local patched datapath (gateway routers) need zones assigned. */
@@ -725,26 +750,12 @@  update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths,
     /* Assign a unique zone id for each logical port and two zones
      * to a gateway router. */
     SSET_FOR_EACH(user, &all_users) {
-        int zone;
-
         if (simap_contains(ct_zones, user)) {
             continue;
         }
 
-        /* We assume that there are 64K zones and that we own them all. */
-        zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1);
-        if (zone == MAX_CT_ZONES + 1) {
-            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-            VLOG_WARN_RL(&rl, "exhausted all ct zones");
-            break;
-        }
-        scan_start = zone + 1;
-
-        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
-                                  zone, true, user);
-
-        bitmap_set1(ct_zone_bitmap, zone);
-        simap_put(ct_zones, user, zone);
+        alloc_id_to_ct_zone(user, ct_zones, ct_zone_bitmap, &scan_start,
+                            pending_ct_zones);
     }
 
     simap_destroy(&req_snat_zones);
@@ -1769,6 +1780,9 @@  struct ed_type_ct_zones {
     unsigned long bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
     struct shash pending;
     struct simap current;
+
+    /* Tracked data. */
+    bool recomputed;
 };
 
 static void *
@@ -1791,6 +1805,13 @@  en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
     return data;
 }
 
+static void
+en_ct_zones_clear_tracked_data(void *data_)
+{
+    struct ed_type_ct_zones *data = data_;
+    data->recomputed = false;
+}
+
 static void
 en_ct_zones_cleanup(void *data)
 {
@@ -1807,11 +1828,12 @@  en_ct_zones_run(struct engine_node *node, void *data)
     struct ed_type_runtime_data *rt_data =
         engine_get_input_data("runtime_data", node);
 
-    update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
+    update_ct_zones(&rt_data->lbinding_data.lports, &rt_data->local_datapaths,
                     &ct_zones_data->current, ct_zones_data->bitmap,
                     &ct_zones_data->pending);
 
 
+    ct_zones_data->recomputed = true;
     engine_set_node_state(node, EN_UPDATED);
 }
 
@@ -1869,7 +1891,7 @@  ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
 }
 
 static bool
-ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
+ct_zones_runtime_data_handler(struct engine_node *node, void *data)
 {
     struct ed_type_runtime_data *rt_data =
         engine_get_input_data("runtime_data", node);
@@ -1879,24 +1901,53 @@  ct_zones_runtime_data_handler(struct engine_node *node, void *data OVS_UNUSED)
         return false;
     }
 
-    /* If local_lports have changed then fall back to full recompute. */
-    if (rt_data->local_lports_changed) {
-        return false;
-    }
+    struct ed_type_ct_zones *ct_zones_data = data;
 
     struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
     struct tracked_datapath *tdp;
+    int scan_start = 0;
+
+    bool updated = false;
+
     HMAP_FOR_EACH (tdp, node, tracked_dp_bindings) {
         if (tdp->tracked_type == TRACKED_RESOURCE_NEW) {
             /* A new datapath has been added. Fall back to full recompute. */
             return false;
         }
 
-        /* When an lport is claimed or released because of port binding,
-         * changes we don't have to compute the ct zone entries for these.
-         * That is because we generate the ct zone entries for each local
-         * OVS interface which has external_ids:iface-id set.  For the local
-         * OVS interface changes, rt_data->local_ports_changed will be true. */
+        struct shash_node *shash_node;
+        SHASH_FOR_EACH (shash_node, &tdp->lports) {
+            struct tracked_lport *t_lport = shash_node->data;
+            if (t_lport->tracked_type == TRACKED_RESOURCE_NEW) {
+                if (!simap_contains(&ct_zones_data->current,
+                                    t_lport->pb->logical_port)) {
+                    alloc_id_to_ct_zone(t_lport->pb->logical_port,
+                                        &ct_zones_data->current,
+                                        ct_zones_data->bitmap, &scan_start,
+                                        &ct_zones_data->pending);
+                    updated = true;
+                }
+            } else if (t_lport->tracked_type == TRACKED_RESOURCE_REMOVED) {
+                struct simap_node *ct_zone =
+                    simap_find(&ct_zones_data->current,
+                               t_lport->pb->logical_port);
+                if (ct_zone) {
+                    add_pending_ct_zone_entry(
+                        &ct_zones_data->pending, CT_ZONE_DB_QUEUED,
+                        ct_zone->data, false, ct_zone->name);
+
+                    bitmap_set0(ct_zones_data->bitmap, ct_zone->data);
+                    simap_delete(&ct_zones_data->current, ct_zone);
+                    updated = true;
+                }
+            } else {
+                OVS_NOT_REACHED();
+            }
+        }
+    }
+
+    if (updated) {
+        engine_set_node_state(node, EN_UPDATED);
     }
 
     return true;
@@ -2868,6 +2919,25 @@  pflow_output_runtime_data_handler(struct engine_node *node, void *data)
     return true;
 }
 
+static bool
+pflow_output_ct_zones_handler(struct engine_node *node OVS_UNUSED,
+                                    void *data OVS_UNUSED)
+{
+    struct ed_type_ct_zones *ct_zones_data =
+        engine_get_input_data("ct_zones", node);
+
+    /* If ct_zones engine node was recomputed, then fall back to full
+     * recompute of pflow_output.  Otherwise there is no need to do
+     * anything for the following reasons:
+     *   - When an lport is claimed, ct zone handler for the
+     *     runtime_data handler allocates the zone id for the lport (and it is
+     *     saved in the br-int external_ids).
+     *   - pflow_output handler for the runtime data  adds
+     *     the physical flows for the claimed lport.
+     * */
+    return !ct_zones_data->recomputed;
+}
+
 static void *
 en_flow_output_init(struct engine_node *node OVS_UNUSED,
                     struct engine_arg *arg OVS_UNUSED)
@@ -3123,7 +3193,7 @@  main(int argc, char *argv[])
     stopwatch_create(BFD_RUN_STOPWATCH_NAME, SW_MS);
 
     /* Define inc-proc-engine nodes. */
-    ENGINE_NODE_CUSTOM_DATA(ct_zones, "ct_zones");
+    ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(ct_zones, "ct_zones");
     ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
     ENGINE_NODE(non_vif_data, "non_vif_data");
     ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
@@ -3164,7 +3234,8 @@  main(int argc, char *argv[])
      */
     engine_add_input(&en_pflow_output, &en_non_vif_data,
                      NULL);
-    engine_add_input(&en_pflow_output, &en_ct_zones, NULL);
+    engine_add_input(&en_pflow_output, &en_ct_zones,
+                     pflow_output_ct_zones_handler);
     engine_add_input(&en_pflow_output, &en_sb_chassis,
                      pflow_lflow_output_sb_chassis_handler);
 
diff --git a/lib/inc-proc-eng.h b/lib/inc-proc-eng.h
index 7e9f5bb70..859b30a71 100644
--- a/lib/inc-proc-eng.h
+++ b/lib/inc-proc-eng.h
@@ -302,6 +302,10 @@  void engine_ovsdb_node_add_index(struct engine_node *, const char *name,
 #define ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
     ENGINE_NODE_DEF(NAME, NAME_STR)
 
+#define ENGINE_NODE_CUSTOM_WITH_CLEAR_TRACK_DATA(NAME, NAME_STR) \
+    ENGINE_NODE_CUSTOM_DATA(NAME, NAME_STR) \
+    en_##NAME.clear_tracked_data = en_##NAME##_clear_tracked_data;
+
 #define ENGINE_NODE(NAME, NAME_STR) \
     static bool (*en_##NAME##_is_valid)(struct engine_node *node) = NULL; \
     ENGINE_NODE_DEF(NAME, NAME_STR)
diff --git a/tests/ovn.at b/tests/ovn.at
index a207f5e12..7ae136ad9 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -23527,7 +23527,7 @@  OVS_WAIT_UNTIL([test $(ovs-ofctl dump-flows br-int table=38,metadata=${sw0_dpkey
 reg15=0x${p1_dpkey} | grep REG13 | wc -l) -eq 0])
 
 p1_zoneid=$(as hv1 ovs-vsctl get bridge br-int external_ids:ct-zone-sw0-p1 | sed 's/"//g')
-AT_CHECK([test ! -z $p1_zoneid])
+AT_CHECK([test -z $p1_zoneid])
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP