Message ID | 20240111153248.2790435-1-numans@ovn.org |
---|---|
State | Changes Requested |
Headers | show |
Series | northd lflow incremental processing | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
On 1/11/24 16:32, numans@ovn.org wrote: > From: Numan Siddique <numans@ovn.org> > > Since northd tracked data has the changed lb data, northd > engine handler for lflow engine now handles the lb changes > incrementally. All the lflows generated for each lb is The first sentence is a bit confusing. What about re-phrasing it to: Since northd tracked data has the changed lb information, the lflow change handler for northd inputs can now handle lb updates incrementally. > stored in the ovn_lb_datapaths->lflow_ref and this is used > similar to how we handle ovn_port changes. > > Signed-off-by: Numan Siddique <numans@ovn.org> > --- > northd/en-lflow.c | 11 ++--- > northd/lb.c | 3 ++ > northd/lb.h | 26 ++++++++++++ > northd/lflow-mgr.c | 47 +++++++++++++++++----- > northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++------ > northd/northd.h | 4 ++ > tests/ovn-northd.at | 30 ++++++++++---- > 7 files changed, 184 insertions(+), 35 deletions(-) > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c > index fef9a1352d..205605578f 100644 > --- a/northd/en-lflow.c > +++ b/northd/en-lflow.c > @@ -123,11 +123,6 @@ lflow_northd_handler(struct engine_node *node, > return false; > } > > - /* Fall back to recompute if load balancers have changed. */ > - if (northd_has_lbs_in_tracked_data(&northd_data->trk_data)) { > - return false; > - } > - > const struct engine_context *eng_ctx = engine_get_context(); > struct lflow_data *lflow_data = data; > > @@ -140,6 +135,12 @@ lflow_northd_handler(struct engine_node *node, > return false; > } > > + if (!lflow_handle_northd_lb_changes(eng_ctx->ovnsb_idl_txn, > + &northd_data->trk_data.trk_lbs, > + &lflow_input, lflow_data->lflow_table)) { Nit: indentation > + return false; > + } > + > engine_set_node_state(node, EN_UPDATED); > return true; > } > diff --git a/northd/lb.c b/northd/lb.c > index e6c8a51911..5fca41e049 100644 > --- a/northd/lb.c > +++ b/northd/lb.c > @@ -21,6 +21,7 @@ > > /* OVN includes */ > #include "lb.h" > +#include "lflow-mgr.h" > #include "lib/lb.h" > #include "northd.h" > > @@ -33,6 +34,7 @@ ovn_lb_datapaths_create(const struct ovn_northd_lb *lb, size_t n_ls_datapaths, > lb_dps->lb = lb; > lb_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); > lb_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); > + lb_dps->lflow_ref = lflow_ref_create(); > > return lb_dps; > } > @@ -42,6 +44,7 @@ ovn_lb_datapaths_destroy(struct ovn_lb_datapaths *lb_dps) > { > bitmap_free(lb_dps->nb_lr_map); > bitmap_free(lb_dps->nb_ls_map); > + lflow_ref_destroy(lb_dps->lflow_ref); > free(lb_dps); > } > > diff --git a/northd/lb.h b/northd/lb.h > index eeef2ea260..de677ca4ef 100644 > --- a/northd/lb.h > +++ b/northd/lb.h > @@ -20,6 +20,8 @@ > #include "openvswitch/hmap.h" > #include "uuid.h" > > +struct lflow_ref; > + > struct ovn_lb_datapaths { > struct hmap_node hmap_node; > > @@ -29,6 +31,30 @@ struct ovn_lb_datapaths { > > size_t n_nb_lr; > unsigned long *nb_lr_map; > + > + /* Reference of lflows generated for this load balancer. > + * > + * This data is initialized and destroyed by the en_northd node, but > + * populated and used only by the en_lflow node. Ideally this data should > + * be maintained as part of en_lflow's data (struct lflow_data): a hash > + * index from ovn_port key to lflows. However, it would be less efficient > + * and more complex: > + * > + * 1. It would require an extra search (using the index) to find the > + * lflows. > + * > + * 2. Building the index needs to be thread-safe, using either a global > + * lock which is obviously less efficient, or hash-based lock array which > + * is more complex. > + * > + * Maintaining the lflow_ref here is more straightforward. The drawback is > + * that we need to keep in mind that this data belongs to en_lflow node, > + * so never access it from any other nodes. > + * > + * 'lflow_ref' is used to reference logical flows generated for this > + * load balancer. > + */ > + struct lflow_ref *lflow_ref; > }; > > struct ovn_lb_datapaths *ovn_lb_datapaths_create(const struct ovn_northd_lb *, > diff --git a/northd/lflow-mgr.c b/northd/lflow-mgr.c > index 3cf9696f6e..6cb2a367fe 100644 > --- a/northd/lflow-mgr.c > +++ b/northd/lflow-mgr.c > @@ -375,7 +375,15 @@ struct lflow_ref_node { > /* The lflow. */ > struct ovn_lflow *lflow; > > - /* Index id of the datapath this lflow_ref_node belongs to. */ > + /* Indicates of the lflow was added with dp_group or not using > + * ovn_lflow_add_with_dp_group() macro. */ > + bool dpgrp_lflow; > + /* dpgrp bitmap and bitmap length. Valid only of dpgrp_lflow is true. */ > + unsigned long *dpgrp_bitmap; > + size_t dpgrp_bitmap_len; Instead of adding this new 'bool dgprp_lflow' and checking it everywhere where we need to update the lflow's dp_refcnts_map can't we just always use dpgrp_bitmap? For the case when flows are not added with ovn_lflow_add_with_dp_group() we can just set a single bit in dpgrp_bitmap, the one corresponding to od->index. Wdyt? > + > + /* Index id of the datapath this lflow_ref_node belongs to. > + * Valid only if dpgrp_lflow is false. */ > size_t dp_index; > > /* Indicates if the lflow_ref_node for an lflow - L(M, A) is linked > @@ -429,9 +437,19 @@ lflow_ref_unlink_lflows(struct lflow_ref *lflow_ref) > struct lflow_ref_node *lrn; > > LIST_FOR_EACH (lrn, lflow_list_node, &lflow_ref->lflows_ref_list) { > - if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > - lrn->dp_index)) { > - bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > + if (lrn->dpgrp_lflow) { > + size_t index; > + BITMAP_FOR_EACH_1 (index, lrn->dpgrp_bitmap_len, > + lrn->dpgrp_bitmap) { > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, index)) { > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > + } > + } > + } else { > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > + lrn->dp_index)) { > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > + } > } > > lrn->linked = false; > @@ -502,18 +520,26 @@ lflow_table_add_lflow(struct lflow_table *lflow_table, > io_port, ctrl_meter, stage_hint, where); > > if (lflow_ref) { > - /* lflow referencing is only supported if 'od' is not NULL. */ > - ovs_assert(od); > - > struct lflow_ref_node *lrn = > lflow_ref_node_find(&lflow_ref->lflow_ref_nodes, lflow, hash); > if (!lrn) { > lrn = xzalloc(sizeof *lrn); > lrn->lflow = lflow; > - lrn->dp_index = od->index; > + lrn->dpgrp_lflow = !od; > + if (lrn->dpgrp_lflow) { > + lrn->dpgrp_bitmap = bitmap_clone(dp_bitmap, dp_bitmap_len); > + lrn->dpgrp_bitmap_len = dp_bitmap_len; > + > + size_t index; > + BITMAP_FOR_EACH_1 (index, dp_bitmap_len, dp_bitmap) { > + inc_dp_refcnt(&lflow->dp_refcnts_map, index); > + } > + } else { > + lrn->dp_index = od->index; > + inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > + } > ovs_list_insert(&lflow_ref->lflows_ref_list, > &lrn->lflow_list_node); > - inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > ovs_list_insert(&lflow->referenced_by, &lrn->ref_list_node); > > hmap_insert(&lflow_ref->lflow_ref_nodes, &lrn->ref_node, hash); > @@ -1257,5 +1283,8 @@ lflow_ref_node_destroy(struct lflow_ref_node *lrn, > } > ovs_list_remove(&lrn->lflow_list_node); > ovs_list_remove(&lrn->ref_list_node); > + if (lrn->dpgrp_lflow) { > + bitmap_free(lrn->dpgrp_bitmap); > + } > free(lrn); > } Thanks, Dumitru
On Thu, Jan 11, 2024 at 7:33 AM <numans@ovn.org> wrote: > > From: Numan Siddique <numans@ovn.org> > > Since northd tracked data has the changed lb data, northd > engine handler for lflow engine now handles the lb changes > incrementally. All the lflows generated for each lb is > stored in the ovn_lb_datapaths->lflow_ref and this is used > similar to how we handle ovn_port changes. > > Signed-off-by: Numan Siddique <numans@ovn.org> > --- > northd/en-lflow.c | 11 ++--- > northd/lb.c | 3 ++ > northd/lb.h | 26 ++++++++++++ > northd/lflow-mgr.c | 47 +++++++++++++++++----- > northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++------ > northd/northd.h | 4 ++ > tests/ovn-northd.at | 30 ++++++++++---- > 7 files changed, 184 insertions(+), 35 deletions(-) > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c > index fef9a1352d..205605578f 100644 > --- a/northd/en-lflow.c > +++ b/northd/en-lflow.c > @@ -123,11 +123,6 @@ lflow_northd_handler(struct engine_node *node, > return false; > } > > - /* Fall back to recompute if load balancers have changed. */ > - if (northd_has_lbs_in_tracked_data(&northd_data->trk_data)) { > - return false; > - } > - > const struct engine_context *eng_ctx = engine_get_context(); > struct lflow_data *lflow_data = data; > > @@ -140,6 +135,12 @@ lflow_northd_handler(struct engine_node *node, > return false; > } > > + if (!lflow_handle_northd_lb_changes(eng_ctx->ovnsb_idl_txn, > + &northd_data->trk_data.trk_lbs, > + &lflow_input, lflow_data->lflow_table)) { > + return false; > + } > + > engine_set_node_state(node, EN_UPDATED); > return true; > } > diff --git a/northd/lb.c b/northd/lb.c > index e6c8a51911..5fca41e049 100644 > --- a/northd/lb.c > +++ b/northd/lb.c > @@ -21,6 +21,7 @@ > > /* OVN includes */ > #include "lb.h" > +#include "lflow-mgr.h" > #include "lib/lb.h" > #include "northd.h" > > @@ -33,6 +34,7 @@ ovn_lb_datapaths_create(const struct ovn_northd_lb *lb, size_t n_ls_datapaths, > lb_dps->lb = lb; > lb_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); > lb_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); > + lb_dps->lflow_ref = lflow_ref_create(); > > return lb_dps; > } > @@ -42,6 +44,7 @@ ovn_lb_datapaths_destroy(struct ovn_lb_datapaths *lb_dps) > { > bitmap_free(lb_dps->nb_lr_map); > bitmap_free(lb_dps->nb_ls_map); > + lflow_ref_destroy(lb_dps->lflow_ref); > free(lb_dps); > } > > diff --git a/northd/lb.h b/northd/lb.h > index eeef2ea260..de677ca4ef 100644 > --- a/northd/lb.h > +++ b/northd/lb.h > @@ -20,6 +20,8 @@ > #include "openvswitch/hmap.h" > #include "uuid.h" > > +struct lflow_ref; > + > struct ovn_lb_datapaths { > struct hmap_node hmap_node; > > @@ -29,6 +31,30 @@ struct ovn_lb_datapaths { > > size_t n_nb_lr; > unsigned long *nb_lr_map; > + > + /* Reference of lflows generated for this load balancer. > + * > + * This data is initialized and destroyed by the en_northd node, but > + * populated and used only by the en_lflow node. Ideally this data should > + * be maintained as part of en_lflow's data (struct lflow_data): a hash > + * index from ovn_port key to lflows. However, it would be less efficient > + * and more complex: > + * > + * 1. It would require an extra search (using the index) to find the > + * lflows. > + * > + * 2. Building the index needs to be thread-safe, using either a global > + * lock which is obviously less efficient, or hash-based lock array which > + * is more complex. > + * > + * Maintaining the lflow_ref here is more straightforward. The drawback is > + * that we need to keep in mind that this data belongs to en_lflow node, > + * so never access it from any other nodes. > + * > + * 'lflow_ref' is used to reference logical flows generated for this > + * load balancer. > + */ > + struct lflow_ref *lflow_ref; > }; > > struct ovn_lb_datapaths *ovn_lb_datapaths_create(const struct ovn_northd_lb *, > diff --git a/northd/lflow-mgr.c b/northd/lflow-mgr.c > index 3cf9696f6e..6cb2a367fe 100644 > --- a/northd/lflow-mgr.c > +++ b/northd/lflow-mgr.c > @@ -375,7 +375,15 @@ struct lflow_ref_node { > /* The lflow. */ > struct ovn_lflow *lflow; > > - /* Index id of the datapath this lflow_ref_node belongs to. */ > + /* Indicates of the lflow was added with dp_group or not using > + * ovn_lflow_add_with_dp_group() macro. */ nit: the sentence is a little confusing. Probably more clear to say: indicates whether the lflow was added with a dp_group using the ovn_lflow_add_with_dp_group() macro. > + bool dpgrp_lflow; > + /* dpgrp bitmap and bitmap length. Valid only of dpgrp_lflow is true. */ > + unsigned long *dpgrp_bitmap; > + size_t dpgrp_bitmap_len; > + > + /* Index id of the datapath this lflow_ref_node belongs to. > + * Valid only if dpgrp_lflow is false. */ > size_t dp_index; > > /* Indicates if the lflow_ref_node for an lflow - L(M, A) is linked > @@ -429,9 +437,19 @@ lflow_ref_unlink_lflows(struct lflow_ref *lflow_ref) The comment of this function needs to be updated for the below change: it clears all DP bits if it is generated from DPG directly. > struct lflow_ref_node *lrn; > > LIST_FOR_EACH (lrn, lflow_list_node, &lflow_ref->lflows_ref_list) { > - if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > - lrn->dp_index)) { > - bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > + if (lrn->dpgrp_lflow) { > + size_t index; > + BITMAP_FOR_EACH_1 (index, lrn->dpgrp_bitmap_len, > + lrn->dpgrp_bitmap) { > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, index)) { > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); This is wrong. Should use "index" instead of lrn->dp_index here. It is fixed in a future patch but should actually be fixed in this patch. With these addressed: Acked-by: Han Zhou <hzhou@ovn.org> Thanks, Han > + } > + } > + } else { > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > + lrn->dp_index)) { > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > + } > } > > lrn->linked = false; > @@ -502,18 +520,26 @@ lflow_table_add_lflow(struct lflow_table *lflow_table, > io_port, ctrl_meter, stage_hint, where); > > if (lflow_ref) { > - /* lflow referencing is only supported if 'od' is not NULL. */ > - ovs_assert(od); > - > struct lflow_ref_node *lrn = > lflow_ref_node_find(&lflow_ref->lflow_ref_nodes, lflow, hash); > if (!lrn) { > lrn = xzalloc(sizeof *lrn); > lrn->lflow = lflow; > - lrn->dp_index = od->index; > + lrn->dpgrp_lflow = !od; > + if (lrn->dpgrp_lflow) { > + lrn->dpgrp_bitmap = bitmap_clone(dp_bitmap, dp_bitmap_len); > + lrn->dpgrp_bitmap_len = dp_bitmap_len; > + > + size_t index; > + BITMAP_FOR_EACH_1 (index, dp_bitmap_len, dp_bitmap) { > + inc_dp_refcnt(&lflow->dp_refcnts_map, index); > + } > + } else { > + lrn->dp_index = od->index; > + inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > + } > ovs_list_insert(&lflow_ref->lflows_ref_list, > &lrn->lflow_list_node); > - inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > ovs_list_insert(&lflow->referenced_by, &lrn->ref_list_node); > > hmap_insert(&lflow_ref->lflow_ref_nodes, &lrn->ref_node, hash); > @@ -1257,5 +1283,8 @@ lflow_ref_node_destroy(struct lflow_ref_node *lrn, > } > ovs_list_remove(&lrn->lflow_list_node); > ovs_list_remove(&lrn->ref_list_node); > + if (lrn->dpgrp_lflow) { > + bitmap_free(lrn->dpgrp_bitmap); > + } > free(lrn); > } > diff --git a/northd/northd.c b/northd/northd.c > index 08732abbfa..6225dfe541 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -7477,7 +7477,7 @@ build_lb_rules_pre_stateful(struct lflow_table *lflows, > ovn_lflow_add_with_dp_group( > lflows, lb_dps->nb_ls_map, ods_size(ls_datapaths), > S_SWITCH_IN_PRE_STATEFUL, 120, ds_cstr(match), ds_cstr(action), > - &lb->nlb->header_, NULL); > + &lb->nlb->header_, lb_dps->lflow_ref); > } > } > > @@ -7922,7 +7922,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, > } > > build_lb_affinity_ls_flows(lflows, lb_dps, lb_vip, ls_datapaths, > - NULL); > + lb_dps->lflow_ref); > > unsigned long *dp_non_meter = NULL; > bool build_non_meter = false; > @@ -7946,7 +7946,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, > lflows, od, S_SWITCH_IN_LB, priority, > ds_cstr(match), ds_cstr(action), > NULL, meter, &lb->nlb->header_, > - NULL); > + lb_dps->lflow_ref); > } > } > if (!reject || build_non_meter) { > @@ -7954,7 +7954,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, > lflows, dp_non_meter ? dp_non_meter : lb_dps->nb_ls_map, > ods_size(ls_datapaths), S_SWITCH_IN_LB, priority, > ds_cstr(match), ds_cstr(action), &lb->nlb->header_, > - NULL); > + lb_dps->lflow_ref); > } > bitmap_free(dp_non_meter); > } > @@ -9375,7 +9375,7 @@ build_lswitch_arp_nd_service_monitor(const struct ovn_lb_datapaths *lb_dps, > S_SWITCH_IN_ARP_ND_RSP, 110, > ds_cstr(match), ds_cstr(actions), > &lb->nlb->header_, > - NULL); > + lb_dps->lflow_ref); > } > } > } > @@ -11397,7 +11397,8 @@ build_lrouter_nat_flows_for_lb( > if (!od->n_l3dgw_ports) { > bitmap_set1(gw_dp_bitmap[type], index); > } else { > - build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, NULL); > + build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, > + lb_dps->lflow_ref); > } > > if (lb->affinity_timeout) { > @@ -11418,17 +11419,17 @@ build_lrouter_nat_flows_for_lb( > * S_ROUTER_IN_DNAT stage. */ > ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, 120, > ds_cstr(&unsnat_match), "next;", > - &lb->nlb->header_, NULL); > + &lb->nlb->header_, lb_dps->lflow_ref); > } > } > > for (size_t type = 0; type < LROUTER_NAT_LB_FLOW_MAX; type++) { > build_gw_lrouter_nat_flows_for_lb(&ctx, type, lr_datapaths, > gw_dp_bitmap[type], > - NULL); > + lb_dps->lflow_ref); > build_lb_affinity_lr_flows(lflows, lb, lb_vip, ds_cstr(match), > aff_action[type], aff_dp_bitmap[type], > - lr_datapaths, NULL); > + lr_datapaths, lb_dps->lflow_ref); > } > > ds_destroy(&unsnat_match); > @@ -11477,7 +11478,7 @@ build_lswitch_flows_for_lb(struct ovn_lb_datapaths *lb_dps, > od->nbs->copp, > meter_groups), > &lb->nlb->header_, > - NULL); > + lb_dps->lflow_ref); > } > /* Ignore L4 port information in the key because fragmented packets > * may not have L4 information. The pre-stateful table will send > @@ -11527,7 +11528,7 @@ build_lrouter_defrag_flows_for_lb(struct ovn_lb_datapaths *lb_dps, > ovn_lflow_add_with_dp_group( > lflows, lb_dps->nb_lr_map, ods_size(lr_datapaths), > S_ROUTER_IN_DEFRAG, prio, ds_cstr(match), "ct_dnat;", > - &lb_dps->lb->nlb->header_, NULL); > + &lb_dps->lb->nlb->header_, lb_dps->lflow_ref); > } > } > > @@ -11569,7 +11570,7 @@ build_lrouter_flows_for_lb(struct ovn_lb_datapaths *lb_dps, > copp_meter_get(COPP_EVENT_ELB, > od->nbr->copp, > meter_groups), > - &lb->nlb->header_, NULL); > + &lb->nlb->header_, lb_dps->lflow_ref); > } > } > > @@ -11579,7 +11580,7 @@ build_lrouter_flows_for_lb(struct ovn_lb_datapaths *lb_dps, > > ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, > "flags.skip_snat_for_lb == 1 && ip", "next;", > - NULL); > + lb_dps->lflow_ref); > } > } > } > @@ -16388,6 +16389,7 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, > void > lflow_reset_northd_refs(struct lflow_input *lflow_input) > { > + struct ovn_lb_datapaths *lb_dps; > struct ovn_port *op; > > HMAP_FOR_EACH (op, key_node, lflow_input->ls_ports) { > @@ -16399,6 +16401,10 @@ lflow_reset_northd_refs(struct lflow_input *lflow_input) > lflow_ref_clear(op->lflow_ref); > lflow_ref_clear(op->stateful_lflow_ref); > } > + > + HMAP_FOR_EACH (lb_dps, hmap_node, lflow_input->lb_datapaths_map) { > + lflow_ref_clear(lb_dps->lflow_ref); > + } > } > > bool > @@ -16575,6 +16581,72 @@ lflow_handle_northd_port_changes(struct ovsdb_idl_txn *ovnsb_txn, > return true; > } > > +bool > +lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, > + struct tracked_lbs *trk_lbs, > + struct lflow_input *lflow_input, > + struct lflow_table *lflows) > +{ > + struct ovn_lb_datapaths *lb_dps; > + struct hmapx_node *hmapx_node; > + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->deleted) { > + lb_dps = hmapx_node->data; > + > + lflow_ref_resync_flows( > + lb_dps->lflow_ref, lflows, ovnsb_txn, lflow_input->ls_datapaths, > + lflow_input->lr_datapaths, > + lflow_input->ovn_internal_version_changed, > + lflow_input->sbrec_logical_flow_table, > + lflow_input->sbrec_logical_dp_group_table); > + } > + > + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->crupdated) { > + lb_dps = hmapx_node->data; > + > + /* unlink old lflows. */ > + lflow_ref_unlink_lflows(lb_dps->lflow_ref); > + > + /* Generate new lflows. */ > + struct ds match = DS_EMPTY_INITIALIZER; > + struct ds actions = DS_EMPTY_INITIALIZER; > + > + build_lswitch_arp_nd_service_monitor(lb_dps, lflow_input->ls_ports, > + lflows, &actions, > + &match); > + build_lrouter_defrag_flows_for_lb(lb_dps, lflows, > + lflow_input->lr_datapaths, &match); > + build_lrouter_flows_for_lb(lb_dps, lflows, > + lflow_input->meter_groups, > + lflow_input->lr_datapaths, > + lflow_input->lr_stateful_table, > + lflow_input->features, > + lflow_input->svc_monitor_map, > + &match, &actions); > + build_lswitch_flows_for_lb(lb_dps, lflows, > + lflow_input->meter_groups, > + lflow_input->ls_datapaths, > + lflow_input->features, > + lflow_input->svc_monitor_map, > + &match, &actions); > + > + ds_destroy(&match); > + ds_destroy(&actions); > + > + /* Sync the new flows to SB. */ > + bool handled = lflow_ref_sync_lflows( > + lb_dps->lflow_ref, lflows, ovnsb_txn, lflow_input->ls_datapaths, > + lflow_input->lr_datapaths, > + lflow_input->ovn_internal_version_changed, > + lflow_input->sbrec_logical_flow_table, > + lflow_input->sbrec_logical_dp_group_table); > + if (!handled) { > + return false; > + } > + } > + > + return true; > +} > + > static bool > mirror_needs_update(const struct nbrec_mirror *nb_mirror, > const struct sbrec_mirror *sb_mirror) > diff --git a/northd/northd.h b/northd/northd.h > index 42b4eee607..d2640f38d7 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -675,6 +675,10 @@ bool lflow_handle_northd_port_changes(struct ovsdb_idl_txn *ovnsb_txn, > struct tracked_ovn_ports *, > struct lflow_input *, > struct lflow_table *lflows); > +bool lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, > + struct tracked_lbs *, > + struct lflow_input *, > + struct lflow_table *lflows); > bool northd_handle_sb_port_binding_changes( > const struct sbrec_port_binding_table *, struct hmap *ls_ports, > struct hmap *lr_ports); > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index f7d47fc7e4..80374444fd 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -10519,7 +10519,7 @@ check ovn-nbctl --wait=sb lb-add lb1 10.0.0.10:80 10.0.0.3:80 > > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > @@ -10529,21 +10529,26 @@ check ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:10.0.0.3=sw0-p1:1 > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb set load_balancer . options:foo=bar > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb -- lb-add lb2 20.0.0.10:80 20.0.0.20:80 -- lb-add lb3 30.0.0.10:80 30.0.0.20:80 > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > @@ -10553,7 +10558,7 @@ check ovn-nbctl --wait=sb -- lb-del lb2 -- lb-del lb3 > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > @@ -10764,8 +10769,9 @@ check ovn-nbctl --wait=sb add load_balancer_group . load_Balancer $lb1_uuid > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb clear load_balancer_group . load_Balancer > @@ -10780,7 +10786,7 @@ check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb add load_balancer_group . load_Balancer $lb1_uuid > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > @@ -10789,6 +10795,7 @@ check ovn-nbctl --wait=sb add logical_switch sw0 load_balancer_group $lbg1_uuid > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > +check_engine_stats ls_stateful norecompute compute > check_engine_stats lflow recompute nocompute > check_engine_stats sync_to_sb_lb recompute compute > > @@ -10797,6 +10804,7 @@ check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb set load_balancer . options:bar=foo > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > +check_engine_stats ls_stateful norecompute compute > check_engine_stats lflow recompute nocompute > check_engine_stats sync_to_sb_lb recompute compute > > @@ -10806,6 +10814,7 @@ check ovn-nbctl --wait=sb set load_balancer lb1 vips:'"10.0.0.10:80"'='"10.0.0.1 > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > +check_engine_stats ls_stateful norecompute compute > check_engine_stats lflow recompute nocompute > check_engine_stats sync_to_sb_lb recompute compute > CHECK_NO_CHANGE_AFTER_RECOMPUTE > @@ -10903,6 +10912,7 @@ check_engine_stats northd recompute nocompute > check_engine_stats lr_stateful recompute nocompute > check_engine_stats lflow recompute nocompute > check_engine_stats sync_to_sb_lb recompute compute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > # Add back lb group to logical switch and then delete it. > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > @@ -10912,6 +10922,7 @@ check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > check_engine_stats lflow recompute nocompute > check_engine_stats sync_to_sb_lb recompute compute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb clear logical_switch sw0 load_balancer_group -- \ > @@ -10945,14 +10956,17 @@ check_engine_stats northd norecompute compute > check_engine_stats lr_stateful norecompute compute > check_engine_stats lflow norecompute nocompute > check_engine_stats sync_to_sb_lb norecompute nocompute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb set load_balancer_group . load_balancer="$lb2_uuid,$lb3_uuid,$lb4_uuid" > check_engine_stats lb_data norecompute compute > check_engine_stats northd norecompute compute > +check_engine_stats ls_stateful norecompute compute > check_engine_stats lr_stateful norecompute compute > -check_engine_stats lflow recompute nocompute > +check_engine_stats lflow norecompute compute > check_engine_stats sync_to_sb_lb recompute nocompute > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > check ovn-nbctl --wait=sb set logical_switch sw0 load_balancer_group=$lbg1_uuid > -- > 2.43.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Thu, Jan 25, 2024 at 1:25 AM Han Zhou <hzhou@ovn.org> wrote: > > On Thu, Jan 11, 2024 at 7:33 AM <numans@ovn.org> wrote: > > > > From: Numan Siddique <numans@ovn.org> > > > > Since northd tracked data has the changed lb data, northd > > engine handler for lflow engine now handles the lb changes > > incrementally. All the lflows generated for each lb is > > stored in the ovn_lb_datapaths->lflow_ref and this is used > > similar to how we handle ovn_port changes. > > > > Signed-off-by: Numan Siddique <numans@ovn.org> > > --- > > northd/en-lflow.c | 11 ++--- > > northd/lb.c | 3 ++ > > northd/lb.h | 26 ++++++++++++ > > northd/lflow-mgr.c | 47 +++++++++++++++++----- > > northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++------ > > northd/northd.h | 4 ++ > > tests/ovn-northd.at | 30 ++++++++++---- > > 7 files changed, 184 insertions(+), 35 deletions(-) > > > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c > > index fef9a1352d..205605578f 100644 > > --- a/northd/en-lflow.c > > +++ b/northd/en-lflow.c > > @@ -123,11 +123,6 @@ lflow_northd_handler(struct engine_node *node, > > return false; > > } > > > > - /* Fall back to recompute if load balancers have changed. */ > > - if (northd_has_lbs_in_tracked_data(&northd_data->trk_data)) { > > - return false; > > - } > > - > > const struct engine_context *eng_ctx = engine_get_context(); > > struct lflow_data *lflow_data = data; > > > > @@ -140,6 +135,12 @@ lflow_northd_handler(struct engine_node *node, > > return false; > > } > > > > + if (!lflow_handle_northd_lb_changes(eng_ctx->ovnsb_idl_txn, > > + &northd_data->trk_data.trk_lbs, > > + &lflow_input, lflow_data->lflow_table)) { > > + return false; > > + } > > + > > engine_set_node_state(node, EN_UPDATED); > > return true; > > } > > diff --git a/northd/lb.c b/northd/lb.c > > index e6c8a51911..5fca41e049 100644 > > --- a/northd/lb.c > > +++ b/northd/lb.c > > @@ -21,6 +21,7 @@ > > > > /* OVN includes */ > > #include "lb.h" > > +#include "lflow-mgr.h" > > #include "lib/lb.h" > > #include "northd.h" > > > > @@ -33,6 +34,7 @@ ovn_lb_datapaths_create(const struct ovn_northd_lb *lb, > size_t n_ls_datapaths, > > lb_dps->lb = lb; > > lb_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); > > lb_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); > > + lb_dps->lflow_ref = lflow_ref_create(); > > > > return lb_dps; > > } > > @@ -42,6 +44,7 @@ ovn_lb_datapaths_destroy(struct ovn_lb_datapaths > *lb_dps) > > { > > bitmap_free(lb_dps->nb_lr_map); > > bitmap_free(lb_dps->nb_ls_map); > > + lflow_ref_destroy(lb_dps->lflow_ref); > > free(lb_dps); > > } > > > > diff --git a/northd/lb.h b/northd/lb.h > > index eeef2ea260..de677ca4ef 100644 > > --- a/northd/lb.h > > +++ b/northd/lb.h > > @@ -20,6 +20,8 @@ > > #include "openvswitch/hmap.h" > > #include "uuid.h" > > > > +struct lflow_ref; > > + > > struct ovn_lb_datapaths { > > struct hmap_node hmap_node; > > > > @@ -29,6 +31,30 @@ struct ovn_lb_datapaths { > > > > size_t n_nb_lr; > > unsigned long *nb_lr_map; > > + > > + /* Reference of lflows generated for this load balancer. > > + * > > + * This data is initialized and destroyed by the en_northd node, but > > + * populated and used only by the en_lflow node. Ideally this data > should > > + * be maintained as part of en_lflow's data (struct lflow_data): a > hash > > + * index from ovn_port key to lflows. However, it would be less > efficient > > + * and more complex: > > + * > > + * 1. It would require an extra search (using the index) to find the > > + * lflows. > > + * > > + * 2. Building the index needs to be thread-safe, using either a > global > > + * lock which is obviously less efficient, or hash-based lock array > which > > + * is more complex. > > + * > > + * Maintaining the lflow_ref here is more straightforward. The > drawback is > > + * that we need to keep in mind that this data belongs to en_lflow > node, > > + * so never access it from any other nodes. > > + * > > + * 'lflow_ref' is used to reference logical flows generated for this > > + * load balancer. > > + */ > > + struct lflow_ref *lflow_ref; > > }; > > > > struct ovn_lb_datapaths *ovn_lb_datapaths_create(const struct > ovn_northd_lb *, > > diff --git a/northd/lflow-mgr.c b/northd/lflow-mgr.c > > index 3cf9696f6e..6cb2a367fe 100644 > > --- a/northd/lflow-mgr.c > > +++ b/northd/lflow-mgr.c > > @@ -375,7 +375,15 @@ struct lflow_ref_node { > > /* The lflow. */ > > struct ovn_lflow *lflow; > > > > - /* Index id of the datapath this lflow_ref_node belongs to. */ > > + /* Indicates of the lflow was added with dp_group or not using > > + * ovn_lflow_add_with_dp_group() macro. */ > > nit: the sentence is a little confusing. Probably more clear to say: > indicates whether the lflow was added with a dp_group using the > ovn_lflow_add_with_dp_group() macro. Ack. > > > + bool dpgrp_lflow; > > + /* dpgrp bitmap and bitmap length. Valid only of dpgrp_lflow is > true. */ > > + unsigned long *dpgrp_bitmap; > > + size_t dpgrp_bitmap_len; > > + > > + /* Index id of the datapath this lflow_ref_node belongs to. > > + * Valid only if dpgrp_lflow is false. */ > > size_t dp_index; > > > > /* Indicates if the lflow_ref_node for an lflow - L(M, A) is linked > > @@ -429,9 +437,19 @@ lflow_ref_unlink_lflows(struct lflow_ref *lflow_ref) > > The comment of this function needs to be updated for the below change: it > clears all DP bits if it is generated from DPG directly. Ack. > > > struct lflow_ref_node *lrn; > > > > LIST_FOR_EACH (lrn, lflow_list_node, &lflow_ref->lflows_ref_list) { > > - if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > > - lrn->dp_index)) { > > - bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > + if (lrn->dpgrp_lflow) { > > + size_t index; > > + BITMAP_FOR_EACH_1 (index, lrn->dpgrp_bitmap_len, > > + lrn->dpgrp_bitmap) { > > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, index)) { > > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > This is wrong. Should use "index" instead of lrn->dp_index here. It is > fixed in a future patch but should actually be fixed in this patch. > > With these addressed: > Acked-by: Han Zhou <hzhou@ovn.org> Thanks. I'll address the comments. > > Thanks, > Han > > > + } > > + } > > + } else { > > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > > + lrn->dp_index)) { > > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > + } > > } > > > > lrn->linked = false; > > @@ -502,18 +520,26 @@ lflow_table_add_lflow(struct lflow_table > *lflow_table, > > io_port, ctrl_meter, stage_hint, where); > > > > if (lflow_ref) { > > - /* lflow referencing is only supported if 'od' is not NULL. */ > > - ovs_assert(od); > > - > > struct lflow_ref_node *lrn = > > lflow_ref_node_find(&lflow_ref->lflow_ref_nodes, lflow, > hash); > > if (!lrn) { > > lrn = xzalloc(sizeof *lrn); > > lrn->lflow = lflow; > > - lrn->dp_index = od->index; > > + lrn->dpgrp_lflow = !od; > > + if (lrn->dpgrp_lflow) { > > + lrn->dpgrp_bitmap = bitmap_clone(dp_bitmap, > dp_bitmap_len); > > + lrn->dpgrp_bitmap_len = dp_bitmap_len; > > + > > + size_t index; > > + BITMAP_FOR_EACH_1 (index, dp_bitmap_len, dp_bitmap) { > > + inc_dp_refcnt(&lflow->dp_refcnts_map, index); > > + } > > + } else { > > + lrn->dp_index = od->index; > > + inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > > + } > > ovs_list_insert(&lflow_ref->lflows_ref_list, > > &lrn->lflow_list_node); > > - inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > > ovs_list_insert(&lflow->referenced_by, &lrn->ref_list_node); > > > > hmap_insert(&lflow_ref->lflow_ref_nodes, &lrn->ref_node, > hash); > > @@ -1257,5 +1283,8 @@ lflow_ref_node_destroy(struct lflow_ref_node *lrn, > > } > > ovs_list_remove(&lrn->lflow_list_node); > > ovs_list_remove(&lrn->ref_list_node); > > + if (lrn->dpgrp_lflow) { > > + bitmap_free(lrn->dpgrp_bitmap); > > + } > > free(lrn); > > } > > diff --git a/northd/northd.c b/northd/northd.c > > index 08732abbfa..6225dfe541 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -7477,7 +7477,7 @@ build_lb_rules_pre_stateful(struct lflow_table > *lflows, > > ovn_lflow_add_with_dp_group( > > lflows, lb_dps->nb_ls_map, ods_size(ls_datapaths), > > S_SWITCH_IN_PRE_STATEFUL, 120, ds_cstr(match), > ds_cstr(action), > > - &lb->nlb->header_, NULL); > > + &lb->nlb->header_, lb_dps->lflow_ref); > > } > > } > > > > @@ -7922,7 +7922,7 @@ build_lb_rules(struct lflow_table *lflows, struct > ovn_lb_datapaths *lb_dps, > > } > > > > build_lb_affinity_ls_flows(lflows, lb_dps, lb_vip, ls_datapaths, > > - NULL); > > + lb_dps->lflow_ref); > > > > unsigned long *dp_non_meter = NULL; > > bool build_non_meter = false; > > @@ -7946,7 +7946,7 @@ build_lb_rules(struct lflow_table *lflows, struct > ovn_lb_datapaths *lb_dps, > > lflows, od, S_SWITCH_IN_LB, priority, > > ds_cstr(match), ds_cstr(action), > > NULL, meter, &lb->nlb->header_, > > - NULL); > > + lb_dps->lflow_ref); > > } > > } > > if (!reject || build_non_meter) { > > @@ -7954,7 +7954,7 @@ build_lb_rules(struct lflow_table *lflows, struct > ovn_lb_datapaths *lb_dps, > > lflows, dp_non_meter ? dp_non_meter : lb_dps->nb_ls_map, > > ods_size(ls_datapaths), S_SWITCH_IN_LB, priority, > > ds_cstr(match), ds_cstr(action), &lb->nlb->header_, > > - NULL); > > + lb_dps->lflow_ref); > > } > > bitmap_free(dp_non_meter); > > } > > @@ -9375,7 +9375,7 @@ build_lswitch_arp_nd_service_monitor(const struct > ovn_lb_datapaths *lb_dps, > > S_SWITCH_IN_ARP_ND_RSP, 110, > > ds_cstr(match), ds_cstr(actions), > > &lb->nlb->header_, > > - NULL); > > + lb_dps->lflow_ref); > > } > > } > > } > > @@ -11397,7 +11397,8 @@ build_lrouter_nat_flows_for_lb( > > if (!od->n_l3dgw_ports) { > > bitmap_set1(gw_dp_bitmap[type], index); > > } else { > > - build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, NULL); > > + build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, > > + lb_dps->lflow_ref); > > } > > > > if (lb->affinity_timeout) { > > @@ -11418,17 +11419,17 @@ build_lrouter_nat_flows_for_lb( > > * S_ROUTER_IN_DNAT stage. */ > > ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, 120, > > ds_cstr(&unsnat_match), "next;", > > - &lb->nlb->header_, NULL); > > + &lb->nlb->header_, > lb_dps->lflow_ref); > > } > > } > > > > for (size_t type = 0; type < LROUTER_NAT_LB_FLOW_MAX; type++) { > > build_gw_lrouter_nat_flows_for_lb(&ctx, type, lr_datapaths, > > gw_dp_bitmap[type], > > - NULL); > > + lb_dps->lflow_ref); > > build_lb_affinity_lr_flows(lflows, lb, lb_vip, ds_cstr(match), > > aff_action[type], aff_dp_bitmap[type], > > - lr_datapaths, NULL); > > + lr_datapaths, lb_dps->lflow_ref); > > } > > > > ds_destroy(&unsnat_match); > > @@ -11477,7 +11478,7 @@ build_lswitch_flows_for_lb(struct > ovn_lb_datapaths *lb_dps, > > od->nbs->copp, > > meter_groups), > > &lb->nlb->header_, > > - NULL); > > + lb_dps->lflow_ref); > > } > > /* Ignore L4 port information in the key because fragmented > packets > > * may not have L4 information. The pre-stateful table will send > > @@ -11527,7 +11528,7 @@ build_lrouter_defrag_flows_for_lb(struct > ovn_lb_datapaths *lb_dps, > > ovn_lflow_add_with_dp_group( > > lflows, lb_dps->nb_lr_map, ods_size(lr_datapaths), > > S_ROUTER_IN_DEFRAG, prio, ds_cstr(match), "ct_dnat;", > > - &lb_dps->lb->nlb->header_, NULL); > > + &lb_dps->lb->nlb->header_, lb_dps->lflow_ref); > > } > > } > > > > @@ -11569,7 +11570,7 @@ build_lrouter_flows_for_lb(struct > ovn_lb_datapaths *lb_dps, > > copp_meter_get(COPP_EVENT_ELB, > > od->nbr->copp, > > meter_groups), > > - &lb->nlb->header_, NULL); > > + &lb->nlb->header_, > lb_dps->lflow_ref); > > } > > } > > > > @@ -11579,7 +11580,7 @@ build_lrouter_flows_for_lb(struct > ovn_lb_datapaths *lb_dps, > > > > ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, > > "flags.skip_snat_for_lb == 1 && ip", "next;", > > - NULL); > > + lb_dps->lflow_ref); > > } > > } > > } > > @@ -16388,6 +16389,7 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, > > void > > lflow_reset_northd_refs(struct lflow_input *lflow_input) > > { > > + struct ovn_lb_datapaths *lb_dps; > > struct ovn_port *op; > > > > HMAP_FOR_EACH (op, key_node, lflow_input->ls_ports) { > > @@ -16399,6 +16401,10 @@ lflow_reset_northd_refs(struct lflow_input > *lflow_input) > > lflow_ref_clear(op->lflow_ref); > > lflow_ref_clear(op->stateful_lflow_ref); > > } > > + > > + HMAP_FOR_EACH (lb_dps, hmap_node, lflow_input->lb_datapaths_map) { > > + lflow_ref_clear(lb_dps->lflow_ref); > > + } > > } > > > > bool > > @@ -16575,6 +16581,72 @@ lflow_handle_northd_port_changes(struct > ovsdb_idl_txn *ovnsb_txn, > > return true; > > } > > > > +bool > > +lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, > > + struct tracked_lbs *trk_lbs, > > + struct lflow_input *lflow_input, > > + struct lflow_table *lflows) > > +{ > > + struct ovn_lb_datapaths *lb_dps; > > + struct hmapx_node *hmapx_node; > > + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->deleted) { > > + lb_dps = hmapx_node->data; > > + > > + lflow_ref_resync_flows( > > + lb_dps->lflow_ref, lflows, ovnsb_txn, > lflow_input->ls_datapaths, > > + lflow_input->lr_datapaths, > > + lflow_input->ovn_internal_version_changed, > > + lflow_input->sbrec_logical_flow_table, > > + lflow_input->sbrec_logical_dp_group_table); > > + } > > + > > + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->crupdated) { > > + lb_dps = hmapx_node->data; > > + > > + /* unlink old lflows. */ > > + lflow_ref_unlink_lflows(lb_dps->lflow_ref); > > + > > + /* Generate new lflows. */ > > + struct ds match = DS_EMPTY_INITIALIZER; > > + struct ds actions = DS_EMPTY_INITIALIZER; > > + > > + build_lswitch_arp_nd_service_monitor(lb_dps, > lflow_input->ls_ports, > > + lflows, &actions, > > + &match); > > + build_lrouter_defrag_flows_for_lb(lb_dps, lflows, > > + lflow_input->lr_datapaths, > &match); > > + build_lrouter_flows_for_lb(lb_dps, lflows, > > + lflow_input->meter_groups, > > + lflow_input->lr_datapaths, > > + lflow_input->lr_stateful_table, > > + lflow_input->features, > > + lflow_input->svc_monitor_map, > > + &match, &actions); > > + build_lswitch_flows_for_lb(lb_dps, lflows, > > + lflow_input->meter_groups, > > + lflow_input->ls_datapaths, > > + lflow_input->features, > > + lflow_input->svc_monitor_map, > > + &match, &actions); > > + > > + ds_destroy(&match); > > + ds_destroy(&actions); > > + > > + /* Sync the new flows to SB. */ > > + bool handled = lflow_ref_sync_lflows( > > + lb_dps->lflow_ref, lflows, ovnsb_txn, > lflow_input->ls_datapaths, > > + lflow_input->lr_datapaths, > > + lflow_input->ovn_internal_version_changed, > > + lflow_input->sbrec_logical_flow_table, > > + lflow_input->sbrec_logical_dp_group_table); > > + if (!handled) { > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > static bool > > mirror_needs_update(const struct nbrec_mirror *nb_mirror, > > const struct sbrec_mirror *sb_mirror) > > diff --git a/northd/northd.h b/northd/northd.h > > index 42b4eee607..d2640f38d7 100644 > > --- a/northd/northd.h > > +++ b/northd/northd.h > > @@ -675,6 +675,10 @@ bool lflow_handle_northd_port_changes(struct > ovsdb_idl_txn *ovnsb_txn, > > struct tracked_ovn_ports *, > > struct lflow_input *, > > struct lflow_table *lflows); > > +bool lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, > > + struct tracked_lbs *, > > + struct lflow_input *, > > + struct lflow_table *lflows); > > bool northd_handle_sb_port_binding_changes( > > const struct sbrec_port_binding_table *, struct hmap *ls_ports, > > struct hmap *lr_ports); > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index f7d47fc7e4..80374444fd 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -10519,7 +10519,7 @@ check ovn-nbctl --wait=sb lb-add lb1 10.0.0.10:80 > 10.0.0.3:80 > > > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > > @@ -10529,21 +10529,26 @@ check ovn-nbctl --wait=sb set load_balancer . > ip_port_mappings:10.0.0.3=sw0-p1:1 > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb set load_balancer . options:foo=bar > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > > > check ovn-nbctl --wait=sb -- lb-add lb2 20.0.0.10:80 20.0.0.20:80 -- > lb-add lb3 30.0.0.10:80 30.0.0.20:80 > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > > @@ -10553,7 +10558,7 @@ check ovn-nbctl --wait=sb -- lb-del lb2 -- lb-del > lb3 > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > > > CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) > > @@ -10764,8 +10769,9 @@ check ovn-nbctl --wait=sb add load_balancer_group > . load_Balancer $lb1_uuid > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb clear load_balancer_group . load_Balancer > > @@ -10780,7 +10786,7 @@ check as northd ovn-appctl -t ovn-northd > inc-engine/clear-stats > > check ovn-nbctl --wait=sb add load_balancer_group . load_Balancer > $lb1_uuid > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > @@ -10789,6 +10795,7 @@ check ovn-nbctl --wait=sb add logical_switch sw0 > load_balancer_group $lbg1_uuid > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > +check_engine_stats ls_stateful norecompute compute > > check_engine_stats lflow recompute nocompute > > check_engine_stats sync_to_sb_lb recompute compute > > > > @@ -10797,6 +10804,7 @@ check as northd ovn-appctl -t ovn-northd > inc-engine/clear-stats > > check ovn-nbctl --wait=sb set load_balancer . options:bar=foo > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > +check_engine_stats ls_stateful norecompute compute > > check_engine_stats lflow recompute nocompute > > check_engine_stats sync_to_sb_lb recompute compute > > > > @@ -10806,6 +10814,7 @@ check ovn-nbctl --wait=sb set load_balancer lb1 > vips:'"10.0.0.10:80"'='"10.0.0.1 > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > +check_engine_stats ls_stateful norecompute compute > > check_engine_stats lflow recompute nocompute > > check_engine_stats sync_to_sb_lb recompute compute > > CHECK_NO_CHANGE_AFTER_RECOMPUTE > > @@ -10903,6 +10912,7 @@ check_engine_stats northd recompute nocompute > > check_engine_stats lr_stateful recompute nocompute > > check_engine_stats lflow recompute nocompute > > check_engine_stats sync_to_sb_lb recompute compute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > # Add back lb group to logical switch and then delete it. > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > @@ -10912,6 +10922,7 @@ check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > check_engine_stats lflow recompute nocompute > > check_engine_stats sync_to_sb_lb recompute compute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb clear logical_switch sw0 load_balancer_group > -- \ > > @@ -10945,14 +10956,17 @@ check_engine_stats northd norecompute compute > > check_engine_stats lr_stateful norecompute compute > > check_engine_stats lflow norecompute nocompute > > check_engine_stats sync_to_sb_lb norecompute nocompute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb set load_balancer_group . > load_balancer="$lb2_uuid,$lb3_uuid,$lb4_uuid" > > check_engine_stats lb_data norecompute compute > > check_engine_stats northd norecompute compute > > +check_engine_stats ls_stateful norecompute compute > > check_engine_stats lr_stateful norecompute compute > > -check_engine_stats lflow recompute nocompute > > +check_engine_stats lflow norecompute compute > > check_engine_stats sync_to_sb_lb recompute nocompute > > +CHECK_NO_CHANGE_AFTER_RECOMPUTE > > > > check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats > > check ovn-nbctl --wait=sb set logical_switch sw0 > load_balancer_group=$lbg1_uuid > > -- > > 2.43.0 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Fri, Jan 19, 2024 at 6:13 AM Dumitru Ceara <dceara@redhat.com> wrote: > > On 1/11/24 16:32, numans@ovn.org wrote: > > From: Numan Siddique <numans@ovn.org> > > > > Since northd tracked data has the changed lb data, northd > > engine handler for lflow engine now handles the lb changes > > incrementally. All the lflows generated for each lb is > > The first sentence is a bit confusing. What about re-phrasing it to: > > Since northd tracked data has the changed lb information, the lflow > change handler for northd inputs can now handle lb updates incrementally. Ack. > > > stored in the ovn_lb_datapaths->lflow_ref and this is used > > similar to how we handle ovn_port changes. > > > > Signed-off-by: Numan Siddique <numans@ovn.org> > > --- > > northd/en-lflow.c | 11 ++--- > > northd/lb.c | 3 ++ > > northd/lb.h | 26 ++++++++++++ > > northd/lflow-mgr.c | 47 +++++++++++++++++----- > > northd/northd.c | 98 +++++++++++++++++++++++++++++++++++++++------ > > northd/northd.h | 4 ++ > > tests/ovn-northd.at | 30 ++++++++++---- > > 7 files changed, 184 insertions(+), 35 deletions(-) > > > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c > > index fef9a1352d..205605578f 100644 > > --- a/northd/en-lflow.c > > +++ b/northd/en-lflow.c > > @@ -123,11 +123,6 @@ lflow_northd_handler(struct engine_node *node, > > return false; > > } > > > > - /* Fall back to recompute if load balancers have changed. */ > > - if (northd_has_lbs_in_tracked_data(&northd_data->trk_data)) { > > - return false; > > - } > > - > > const struct engine_context *eng_ctx = engine_get_context(); > > struct lflow_data *lflow_data = data; > > > > @@ -140,6 +135,12 @@ lflow_northd_handler(struct engine_node *node, > > return false; > > } > > > > + if (!lflow_handle_northd_lb_changes(eng_ctx->ovnsb_idl_txn, > > + &northd_data->trk_data.trk_lbs, > > + &lflow_input, lflow_data->lflow_table)) { > > Nit: indentation Ack. > > > + return false; > > + } > > + > > engine_set_node_state(node, EN_UPDATED); > > return true; > > } > > diff --git a/northd/lb.c b/northd/lb.c > > index e6c8a51911..5fca41e049 100644 > > --- a/northd/lb.c > > +++ b/northd/lb.c > > @@ -21,6 +21,7 @@ > > > > /* OVN includes */ > > #include "lb.h" > > +#include "lflow-mgr.h" > > #include "lib/lb.h" > > #include "northd.h" > > > > @@ -33,6 +34,7 @@ ovn_lb_datapaths_create(const struct ovn_northd_lb *lb, size_t n_ls_datapaths, > > lb_dps->lb = lb; > > lb_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); > > lb_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); > > + lb_dps->lflow_ref = lflow_ref_create(); > > > > return lb_dps; > > } > > @@ -42,6 +44,7 @@ ovn_lb_datapaths_destroy(struct ovn_lb_datapaths *lb_dps) > > { > > bitmap_free(lb_dps->nb_lr_map); > > bitmap_free(lb_dps->nb_ls_map); > > + lflow_ref_destroy(lb_dps->lflow_ref); > > free(lb_dps); > > } > > > > diff --git a/northd/lb.h b/northd/lb.h > > index eeef2ea260..de677ca4ef 100644 > > --- a/northd/lb.h > > +++ b/northd/lb.h > > @@ -20,6 +20,8 @@ > > #include "openvswitch/hmap.h" > > #include "uuid.h" > > > > +struct lflow_ref; > > + > > struct ovn_lb_datapaths { > > struct hmap_node hmap_node; > > > > @@ -29,6 +31,30 @@ struct ovn_lb_datapaths { > > > > size_t n_nb_lr; > > unsigned long *nb_lr_map; > > + > > + /* Reference of lflows generated for this load balancer. > > + * > > + * This data is initialized and destroyed by the en_northd node, but > > + * populated and used only by the en_lflow node. Ideally this data should > > + * be maintained as part of en_lflow's data (struct lflow_data): a hash > > + * index from ovn_port key to lflows. However, it would be less efficient > > + * and more complex: > > + * > > + * 1. It would require an extra search (using the index) to find the > > + * lflows. > > + * > > + * 2. Building the index needs to be thread-safe, using either a global > > + * lock which is obviously less efficient, or hash-based lock array which > > + * is more complex. > > + * > > + * Maintaining the lflow_ref here is more straightforward. The drawback is > > + * that we need to keep in mind that this data belongs to en_lflow node, > > + * so never access it from any other nodes. > > + * > > + * 'lflow_ref' is used to reference logical flows generated for this > > + * load balancer. > > + */ > > + struct lflow_ref *lflow_ref; > > }; > > > > struct ovn_lb_datapaths *ovn_lb_datapaths_create(const struct ovn_northd_lb *, > > diff --git a/northd/lflow-mgr.c b/northd/lflow-mgr.c > > index 3cf9696f6e..6cb2a367fe 100644 > > --- a/northd/lflow-mgr.c > > +++ b/northd/lflow-mgr.c > > @@ -375,7 +375,15 @@ struct lflow_ref_node { > > /* The lflow. */ > > struct ovn_lflow *lflow; > > > > - /* Index id of the datapath this lflow_ref_node belongs to. */ > > + /* Indicates of the lflow was added with dp_group or not using > > + * ovn_lflow_add_with_dp_group() macro. */ > > + bool dpgrp_lflow; > > + /* dpgrp bitmap and bitmap length. Valid only of dpgrp_lflow is true. */ > > + unsigned long *dpgrp_bitmap; > > + size_t dpgrp_bitmap_len; > > Instead of adding this new 'bool dgprp_lflow' and checking it everywhere > where we need to update the lflow's dp_refcnts_map can't we just always > use dpgrp_bitmap? > > For the case when flows are not added with ovn_lflow_add_with_dp_group() > we can just set a single bit in dpgrp_bitmap, the one corresponding to > od->index. > > Wdyt? It is a good suggestion. But unfortunately when ovn_lflow_add_with_dp_group() is not used, we don't know the bitmap size. Although this can be passed, it requires a lot of changes to pass the bitmap length. Thanks Numan > > > + > > + /* Index id of the datapath this lflow_ref_node belongs to. > > + * Valid only if dpgrp_lflow is false. */ > > size_t dp_index; > > > > /* Indicates if the lflow_ref_node for an lflow - L(M, A) is linked > > @@ -429,9 +437,19 @@ lflow_ref_unlink_lflows(struct lflow_ref *lflow_ref) > > struct lflow_ref_node *lrn; > > > > LIST_FOR_EACH (lrn, lflow_list_node, &lflow_ref->lflows_ref_list) { > > - if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > > - lrn->dp_index)) { > > - bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > + if (lrn->dpgrp_lflow) { > > + size_t index; > > + BITMAP_FOR_EACH_1 (index, lrn->dpgrp_bitmap_len, > > + lrn->dpgrp_bitmap) { > > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, index)) { > > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > + } > > + } > > + } else { > > + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, > > + lrn->dp_index)) { > > + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); > > + } > > } > > > > lrn->linked = false; > > @@ -502,18 +520,26 @@ lflow_table_add_lflow(struct lflow_table *lflow_table, > > io_port, ctrl_meter, stage_hint, where); > > > > if (lflow_ref) { > > - /* lflow referencing is only supported if 'od' is not NULL. */ > > - ovs_assert(od); > > - > > struct lflow_ref_node *lrn = > > lflow_ref_node_find(&lflow_ref->lflow_ref_nodes, lflow, hash); > > if (!lrn) { > > lrn = xzalloc(sizeof *lrn); > > lrn->lflow = lflow; > > - lrn->dp_index = od->index; > > + lrn->dpgrp_lflow = !od; > > + if (lrn->dpgrp_lflow) { > > + lrn->dpgrp_bitmap = bitmap_clone(dp_bitmap, dp_bitmap_len); > > + lrn->dpgrp_bitmap_len = dp_bitmap_len; > > + > > + size_t index; > > + BITMAP_FOR_EACH_1 (index, dp_bitmap_len, dp_bitmap) { > > + inc_dp_refcnt(&lflow->dp_refcnts_map, index); > > + } > > + } else { > > + lrn->dp_index = od->index; > > + inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > > + } > > ovs_list_insert(&lflow_ref->lflows_ref_list, > > &lrn->lflow_list_node); > > - inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); > > ovs_list_insert(&lflow->referenced_by, &lrn->ref_list_node); > > > > hmap_insert(&lflow_ref->lflow_ref_nodes, &lrn->ref_node, hash); > > @@ -1257,5 +1283,8 @@ lflow_ref_node_destroy(struct lflow_ref_node *lrn, > > } > > ovs_list_remove(&lrn->lflow_list_node); > > ovs_list_remove(&lrn->ref_list_node); > > + if (lrn->dpgrp_lflow) { > > + bitmap_free(lrn->dpgrp_bitmap); > > + } > > free(lrn); > > } > > Thanks, > Dumitru > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/northd/en-lflow.c b/northd/en-lflow.c index fef9a1352d..205605578f 100644 --- a/northd/en-lflow.c +++ b/northd/en-lflow.c @@ -123,11 +123,6 @@ lflow_northd_handler(struct engine_node *node, return false; } - /* Fall back to recompute if load balancers have changed. */ - if (northd_has_lbs_in_tracked_data(&northd_data->trk_data)) { - return false; - } - const struct engine_context *eng_ctx = engine_get_context(); struct lflow_data *lflow_data = data; @@ -140,6 +135,12 @@ lflow_northd_handler(struct engine_node *node, return false; } + if (!lflow_handle_northd_lb_changes(eng_ctx->ovnsb_idl_txn, + &northd_data->trk_data.trk_lbs, + &lflow_input, lflow_data->lflow_table)) { + return false; + } + engine_set_node_state(node, EN_UPDATED); return true; } diff --git a/northd/lb.c b/northd/lb.c index e6c8a51911..5fca41e049 100644 --- a/northd/lb.c +++ b/northd/lb.c @@ -21,6 +21,7 @@ /* OVN includes */ #include "lb.h" +#include "lflow-mgr.h" #include "lib/lb.h" #include "northd.h" @@ -33,6 +34,7 @@ ovn_lb_datapaths_create(const struct ovn_northd_lb *lb, size_t n_ls_datapaths, lb_dps->lb = lb; lb_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); lb_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); + lb_dps->lflow_ref = lflow_ref_create(); return lb_dps; } @@ -42,6 +44,7 @@ ovn_lb_datapaths_destroy(struct ovn_lb_datapaths *lb_dps) { bitmap_free(lb_dps->nb_lr_map); bitmap_free(lb_dps->nb_ls_map); + lflow_ref_destroy(lb_dps->lflow_ref); free(lb_dps); } diff --git a/northd/lb.h b/northd/lb.h index eeef2ea260..de677ca4ef 100644 --- a/northd/lb.h +++ b/northd/lb.h @@ -20,6 +20,8 @@ #include "openvswitch/hmap.h" #include "uuid.h" +struct lflow_ref; + struct ovn_lb_datapaths { struct hmap_node hmap_node; @@ -29,6 +31,30 @@ struct ovn_lb_datapaths { size_t n_nb_lr; unsigned long *nb_lr_map; + + /* Reference of lflows generated for this load balancer. + * + * This data is initialized and destroyed by the en_northd node, but + * populated and used only by the en_lflow node. Ideally this data should + * be maintained as part of en_lflow's data (struct lflow_data): a hash + * index from ovn_port key to lflows. However, it would be less efficient + * and more complex: + * + * 1. It would require an extra search (using the index) to find the + * lflows. + * + * 2. Building the index needs to be thread-safe, using either a global + * lock which is obviously less efficient, or hash-based lock array which + * is more complex. + * + * Maintaining the lflow_ref here is more straightforward. The drawback is + * that we need to keep in mind that this data belongs to en_lflow node, + * so never access it from any other nodes. + * + * 'lflow_ref' is used to reference logical flows generated for this + * load balancer. + */ + struct lflow_ref *lflow_ref; }; struct ovn_lb_datapaths *ovn_lb_datapaths_create(const struct ovn_northd_lb *, diff --git a/northd/lflow-mgr.c b/northd/lflow-mgr.c index 3cf9696f6e..6cb2a367fe 100644 --- a/northd/lflow-mgr.c +++ b/northd/lflow-mgr.c @@ -375,7 +375,15 @@ struct lflow_ref_node { /* The lflow. */ struct ovn_lflow *lflow; - /* Index id of the datapath this lflow_ref_node belongs to. */ + /* Indicates of the lflow was added with dp_group or not using + * ovn_lflow_add_with_dp_group() macro. */ + bool dpgrp_lflow; + /* dpgrp bitmap and bitmap length. Valid only of dpgrp_lflow is true. */ + unsigned long *dpgrp_bitmap; + size_t dpgrp_bitmap_len; + + /* Index id of the datapath this lflow_ref_node belongs to. + * Valid only if dpgrp_lflow is false. */ size_t dp_index; /* Indicates if the lflow_ref_node for an lflow - L(M, A) is linked @@ -429,9 +437,19 @@ lflow_ref_unlink_lflows(struct lflow_ref *lflow_ref) struct lflow_ref_node *lrn; LIST_FOR_EACH (lrn, lflow_list_node, &lflow_ref->lflows_ref_list) { - if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, - lrn->dp_index)) { - bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); + if (lrn->dpgrp_lflow) { + size_t index; + BITMAP_FOR_EACH_1 (index, lrn->dpgrp_bitmap_len, + lrn->dpgrp_bitmap) { + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, index)) { + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); + } + } + } else { + if (dec_dp_refcnt(&lrn->lflow->dp_refcnts_map, + lrn->dp_index)) { + bitmap_set0(lrn->lflow->dpg_bitmap, lrn->dp_index); + } } lrn->linked = false; @@ -502,18 +520,26 @@ lflow_table_add_lflow(struct lflow_table *lflow_table, io_port, ctrl_meter, stage_hint, where); if (lflow_ref) { - /* lflow referencing is only supported if 'od' is not NULL. */ - ovs_assert(od); - struct lflow_ref_node *lrn = lflow_ref_node_find(&lflow_ref->lflow_ref_nodes, lflow, hash); if (!lrn) { lrn = xzalloc(sizeof *lrn); lrn->lflow = lflow; - lrn->dp_index = od->index; + lrn->dpgrp_lflow = !od; + if (lrn->dpgrp_lflow) { + lrn->dpgrp_bitmap = bitmap_clone(dp_bitmap, dp_bitmap_len); + lrn->dpgrp_bitmap_len = dp_bitmap_len; + + size_t index; + BITMAP_FOR_EACH_1 (index, dp_bitmap_len, dp_bitmap) { + inc_dp_refcnt(&lflow->dp_refcnts_map, index); + } + } else { + lrn->dp_index = od->index; + inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); + } ovs_list_insert(&lflow_ref->lflows_ref_list, &lrn->lflow_list_node); - inc_dp_refcnt(&lflow->dp_refcnts_map, lrn->dp_index); ovs_list_insert(&lflow->referenced_by, &lrn->ref_list_node); hmap_insert(&lflow_ref->lflow_ref_nodes, &lrn->ref_node, hash); @@ -1257,5 +1283,8 @@ lflow_ref_node_destroy(struct lflow_ref_node *lrn, } ovs_list_remove(&lrn->lflow_list_node); ovs_list_remove(&lrn->ref_list_node); + if (lrn->dpgrp_lflow) { + bitmap_free(lrn->dpgrp_bitmap); + } free(lrn); } diff --git a/northd/northd.c b/northd/northd.c index 08732abbfa..6225dfe541 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -7477,7 +7477,7 @@ build_lb_rules_pre_stateful(struct lflow_table *lflows, ovn_lflow_add_with_dp_group( lflows, lb_dps->nb_ls_map, ods_size(ls_datapaths), S_SWITCH_IN_PRE_STATEFUL, 120, ds_cstr(match), ds_cstr(action), - &lb->nlb->header_, NULL); + &lb->nlb->header_, lb_dps->lflow_ref); } } @@ -7922,7 +7922,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, } build_lb_affinity_ls_flows(lflows, lb_dps, lb_vip, ls_datapaths, - NULL); + lb_dps->lflow_ref); unsigned long *dp_non_meter = NULL; bool build_non_meter = false; @@ -7946,7 +7946,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, lflows, od, S_SWITCH_IN_LB, priority, ds_cstr(match), ds_cstr(action), NULL, meter, &lb->nlb->header_, - NULL); + lb_dps->lflow_ref); } } if (!reject || build_non_meter) { @@ -7954,7 +7954,7 @@ build_lb_rules(struct lflow_table *lflows, struct ovn_lb_datapaths *lb_dps, lflows, dp_non_meter ? dp_non_meter : lb_dps->nb_ls_map, ods_size(ls_datapaths), S_SWITCH_IN_LB, priority, ds_cstr(match), ds_cstr(action), &lb->nlb->header_, - NULL); + lb_dps->lflow_ref); } bitmap_free(dp_non_meter); } @@ -9375,7 +9375,7 @@ build_lswitch_arp_nd_service_monitor(const struct ovn_lb_datapaths *lb_dps, S_SWITCH_IN_ARP_ND_RSP, 110, ds_cstr(match), ds_cstr(actions), &lb->nlb->header_, - NULL); + lb_dps->lflow_ref); } } } @@ -11397,7 +11397,8 @@ build_lrouter_nat_flows_for_lb( if (!od->n_l3dgw_ports) { bitmap_set1(gw_dp_bitmap[type], index); } else { - build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, NULL); + build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, + lb_dps->lflow_ref); } if (lb->affinity_timeout) { @@ -11418,17 +11419,17 @@ build_lrouter_nat_flows_for_lb( * S_ROUTER_IN_DNAT stage. */ ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT, 120, ds_cstr(&unsnat_match), "next;", - &lb->nlb->header_, NULL); + &lb->nlb->header_, lb_dps->lflow_ref); } } for (size_t type = 0; type < LROUTER_NAT_LB_FLOW_MAX; type++) { build_gw_lrouter_nat_flows_for_lb(&ctx, type, lr_datapaths, gw_dp_bitmap[type], - NULL); + lb_dps->lflow_ref); build_lb_affinity_lr_flows(lflows, lb, lb_vip, ds_cstr(match), aff_action[type], aff_dp_bitmap[type], - lr_datapaths, NULL); + lr_datapaths, lb_dps->lflow_ref); } ds_destroy(&unsnat_match); @@ -11477,7 +11478,7 @@ build_lswitch_flows_for_lb(struct ovn_lb_datapaths *lb_dps, od->nbs->copp, meter_groups), &lb->nlb->header_, - NULL); + lb_dps->lflow_ref); } /* Ignore L4 port information in the key because fragmented packets * may not have L4 information. The pre-stateful table will send @@ -11527,7 +11528,7 @@ build_lrouter_defrag_flows_for_lb(struct ovn_lb_datapaths *lb_dps, ovn_lflow_add_with_dp_group( lflows, lb_dps->nb_lr_map, ods_size(lr_datapaths), S_ROUTER_IN_DEFRAG, prio, ds_cstr(match), "ct_dnat;", - &lb_dps->lb->nlb->header_, NULL); + &lb_dps->lb->nlb->header_, lb_dps->lflow_ref); } } @@ -11569,7 +11570,7 @@ build_lrouter_flows_for_lb(struct ovn_lb_datapaths *lb_dps, copp_meter_get(COPP_EVENT_ELB, od->nbr->copp, meter_groups), - &lb->nlb->header_, NULL); + &lb->nlb->header_, lb_dps->lflow_ref); } } @@ -11579,7 +11580,7 @@ build_lrouter_flows_for_lb(struct ovn_lb_datapaths *lb_dps, ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "flags.skip_snat_for_lb == 1 && ip", "next;", - NULL); + lb_dps->lflow_ref); } } } @@ -16388,6 +16389,7 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, void lflow_reset_northd_refs(struct lflow_input *lflow_input) { + struct ovn_lb_datapaths *lb_dps; struct ovn_port *op; HMAP_FOR_EACH (op, key_node, lflow_input->ls_ports) { @@ -16399,6 +16401,10 @@ lflow_reset_northd_refs(struct lflow_input *lflow_input) lflow_ref_clear(op->lflow_ref); lflow_ref_clear(op->stateful_lflow_ref); } + + HMAP_FOR_EACH (lb_dps, hmap_node, lflow_input->lb_datapaths_map) { + lflow_ref_clear(lb_dps->lflow_ref); + } } bool @@ -16575,6 +16581,72 @@ lflow_handle_northd_port_changes(struct ovsdb_idl_txn *ovnsb_txn, return true; } +bool +lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, + struct tracked_lbs *trk_lbs, + struct lflow_input *lflow_input, + struct lflow_table *lflows) +{ + struct ovn_lb_datapaths *lb_dps; + struct hmapx_node *hmapx_node; + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->deleted) { + lb_dps = hmapx_node->data; + + lflow_ref_resync_flows( + lb_dps->lflow_ref, lflows, ovnsb_txn, lflow_input->ls_datapaths, + lflow_input->lr_datapaths, + lflow_input->ovn_internal_version_changed, + lflow_input->sbrec_logical_flow_table, + lflow_input->sbrec_logical_dp_group_table); + } + + HMAPX_FOR_EACH (hmapx_node, &trk_lbs->crupdated) { + lb_dps = hmapx_node->data; + + /* unlink old lflows. */ + lflow_ref_unlink_lflows(lb_dps->lflow_ref); + + /* Generate new lflows. */ + struct ds match = DS_EMPTY_INITIALIZER; + struct ds actions = DS_EMPTY_INITIALIZER; + + build_lswitch_arp_nd_service_monitor(lb_dps, lflow_input->ls_ports, + lflows, &actions, + &match); + build_lrouter_defrag_flows_for_lb(lb_dps, lflows, + lflow_input->lr_datapaths, &match); + build_lrouter_flows_for_lb(lb_dps, lflows, + lflow_input->meter_groups, + lflow_input->lr_datapaths, + lflow_input->lr_stateful_table, + lflow_input->features, + lflow_input->svc_monitor_map, + &match, &actions); + build_lswitch_flows_for_lb(lb_dps, lflows, + lflow_input->meter_groups, + lflow_input->ls_datapaths, + lflow_input->features, + lflow_input->svc_monitor_map, + &match, &actions); + + ds_destroy(&match); + ds_destroy(&actions); + + /* Sync the new flows to SB. */ + bool handled = lflow_ref_sync_lflows( + lb_dps->lflow_ref, lflows, ovnsb_txn, lflow_input->ls_datapaths, + lflow_input->lr_datapaths, + lflow_input->ovn_internal_version_changed, + lflow_input->sbrec_logical_flow_table, + lflow_input->sbrec_logical_dp_group_table); + if (!handled) { + return false; + } + } + + return true; +} + static bool mirror_needs_update(const struct nbrec_mirror *nb_mirror, const struct sbrec_mirror *sb_mirror) diff --git a/northd/northd.h b/northd/northd.h index 42b4eee607..d2640f38d7 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -675,6 +675,10 @@ bool lflow_handle_northd_port_changes(struct ovsdb_idl_txn *ovnsb_txn, struct tracked_ovn_ports *, struct lflow_input *, struct lflow_table *lflows); +bool lflow_handle_northd_lb_changes(struct ovsdb_idl_txn *ovnsb_txn, + struct tracked_lbs *, + struct lflow_input *, + struct lflow_table *lflows); bool northd_handle_sb_port_binding_changes( const struct sbrec_port_binding_table *, struct hmap *ls_ports, struct hmap *lr_ports); diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index f7d47fc7e4..80374444fd 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -10519,7 +10519,7 @@ check ovn-nbctl --wait=sb lb-add lb1 10.0.0.10:80 10.0.0.3:80 check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) @@ -10529,21 +10529,26 @@ check ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:10.0.0.3=sw0-p1:1 check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute +CHECK_NO_CHANGE_AFTER_RECOMPUTE +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb set load_balancer . options:foo=bar check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute +CHECK_NO_CHANGE_AFTER_RECOMPUTE +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb -- lb-add lb2 20.0.0.10:80 20.0.0.20:80 -- lb-add lb3 30.0.0.10:80 30.0.0.20:80 check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) @@ -10553,7 +10558,7 @@ check ovn-nbctl --wait=sb -- lb-del lb2 -- lb-del lb3 check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE(1) @@ -10764,8 +10769,9 @@ check ovn-nbctl --wait=sb add load_balancer_group . load_Balancer $lb1_uuid check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute +CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb clear load_balancer_group . load_Balancer @@ -10780,7 +10786,7 @@ check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb add load_balancer_group . load_Balancer $lb1_uuid check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute CHECK_NO_CHANGE_AFTER_RECOMPUTE @@ -10789,6 +10795,7 @@ check ovn-nbctl --wait=sb add logical_switch sw0 load_balancer_group $lbg1_uuid check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute +check_engine_stats ls_stateful norecompute compute check_engine_stats lflow recompute nocompute check_engine_stats sync_to_sb_lb recompute compute @@ -10797,6 +10804,7 @@ check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb set load_balancer . options:bar=foo check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute +check_engine_stats ls_stateful norecompute compute check_engine_stats lflow recompute nocompute check_engine_stats sync_to_sb_lb recompute compute @@ -10806,6 +10814,7 @@ check ovn-nbctl --wait=sb set load_balancer lb1 vips:'"10.0.0.10:80"'='"10.0.0.1 check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute +check_engine_stats ls_stateful norecompute compute check_engine_stats lflow recompute nocompute check_engine_stats sync_to_sb_lb recompute compute CHECK_NO_CHANGE_AFTER_RECOMPUTE @@ -10903,6 +10912,7 @@ check_engine_stats northd recompute nocompute check_engine_stats lr_stateful recompute nocompute check_engine_stats lflow recompute nocompute check_engine_stats sync_to_sb_lb recompute compute +CHECK_NO_CHANGE_AFTER_RECOMPUTE # Add back lb group to logical switch and then delete it. check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats @@ -10912,6 +10922,7 @@ check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute check_engine_stats lflow recompute nocompute check_engine_stats sync_to_sb_lb recompute compute +CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb clear logical_switch sw0 load_balancer_group -- \ @@ -10945,14 +10956,17 @@ check_engine_stats northd norecompute compute check_engine_stats lr_stateful norecompute compute check_engine_stats lflow norecompute nocompute check_engine_stats sync_to_sb_lb norecompute nocompute +CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb set load_balancer_group . load_balancer="$lb2_uuid,$lb3_uuid,$lb4_uuid" check_engine_stats lb_data norecompute compute check_engine_stats northd norecompute compute +check_engine_stats ls_stateful norecompute compute check_engine_stats lr_stateful norecompute compute -check_engine_stats lflow recompute nocompute +check_engine_stats lflow norecompute compute check_engine_stats sync_to_sb_lb recompute nocompute +CHECK_NO_CHANGE_AFTER_RECOMPUTE check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats check ovn-nbctl --wait=sb set logical_switch sw0 load_balancer_group=$lbg1_uuid