Message ID | 20250106142825.468355-2-xsimonar@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Postpone ports performance fixes. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_ovn-kubernetes | success | github build: passed |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
Hi Xavier, I have some comments below. On 1/6/25 09:28, Xavier Simonart wrote: > In some cases, when a port was going to be postponed, the same port (lsp) > was claimed four times: > - within runtime_data handling pb changes: > - lsp is claimed as handling pb changes, and added to postponed list. > - it's claimed again as handling postponed list. > - within runtime_data handling postponed list, the same two operations: > - lsp is claimed as handling pb changes, and added to postponed list. > - it's claimed again as handling postponed list. > > With this patch, the lsp1 port would only only be claimed twice. > - within runtime_data handling pb changes: > - lsp is claimed as handling pb changes, and added to postponed list. > - within runtime_data handling postponed list, the same two operations: I think "the same two operations" was copy-pasted from the first paragraph. Here it doesn't make as much sense since you are not describing two operations below, just one. > - it's claimed again as handling postponed list. > > This patch is only about CPU utilization; it should not result in any > flow changes. > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > --- > v2: rebase on origin/main for scapy fix. > v3: - Updated based on Ales' question: remove deleted pb from postponed_ports. > - Rebase on origin/main. > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > --- > controller/binding.c | 46 ++++++++++++++++++++++--------------- > controller/binding.h | 2 ++ > controller/ovn-controller.c | 37 +++++++++++++++++++++++++++-- > 3 files changed, 64 insertions(+), 21 deletions(-) > > diff --git a/controller/binding.c b/controller/binding.c > index 16302046e..a6e02bb5e 100644 > --- a/controller/binding.c > +++ b/controller/binding.c > @@ -3102,6 +3102,33 @@ handle_updated_port(struct binding_ctx_in *b_ctx_in, > return handled; > } > > +bool > +binding_handle_postponed_ports(struct binding_ctx_in *b_ctx_in, > + struct binding_ctx_out *b_ctx_out) > +{ > + /* Also handle any postponed (throttled) ports. */ > + const char *port_name; > + const struct sbrec_port_binding *pb; > + bool handled = true; > + struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); > + sset_clone(&postponed_ports, b_ctx_out->postponed_ports); > + SSET_FOR_EACH (port_name, &postponed_ports) { > + pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, > + port_name); > + if (!pb) { > + sset_find_and_delete(b_ctx_out->postponed_ports, port_name); > + continue; > + } > + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); > + if (!handled) { > + break; > + } > + } > + sset_destroy(&postponed_ports); > + cleanup_claimed_port_timestamps(); > + return handled; > +} > + > /* Returns true if the port binding changes resulted in local binding > * updates, false otherwise. > */ > @@ -3248,25 +3275,6 @@ delete_done: > } > } > > - /* Also handle any postponed (throttled) ports. */ > - const char *port_name; > - struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); > - sset_clone(&postponed_ports, b_ctx_out->postponed_ports); > - SSET_FOR_EACH (port_name, &postponed_ports) { > - pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, > - port_name); > - if (!pb) { > - sset_find_and_delete(b_ctx_out->postponed_ports, port_name); > - continue; > - } > - handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); > - if (!handled) { > - break; > - } > - } > - sset_destroy(&postponed_ports); > - cleanup_claimed_port_timestamps(); > - > if (handled) { > /* There may be new local datapaths added by the above handling, so go > * through each port_binding of newly added local datapaths to update > diff --git a/controller/binding.h b/controller/binding.h > index d13ae36c7..5303d4f58 100644 > --- a/controller/binding.h > +++ b/controller/binding.h > @@ -196,6 +196,8 @@ bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, > > bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, > struct binding_ctx_out *); > +bool binding_handle_postponed_ports(struct binding_ctx_in *, > + struct binding_ctx_out *); > bool binding_handle_port_binding_changes(struct binding_ctx_in *, > struct binding_ctx_out *); > void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 157def2a3..584b4b579 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -1540,6 +1540,38 @@ runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void *data) > return true; > } > > +static bool > +runtime_data_postponed_ports_handler(struct engine_node *node, void *data) > +{ > + struct ed_type_runtime_data *rt_data = data; > + struct binding_ctx_in b_ctx_in; > + struct binding_ctx_out b_ctx_out; > + init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out); > + if (!b_ctx_in.chassis_rec) { > + return false; > + } > + > + rt_data->tracked = true; > + b_ctx_out.tracked_dp_bindings = &rt_data->tracked_dp_bindings; > + > + if (!binding_handle_postponed_ports(&b_ctx_in, &b_ctx_out)) { > + return false; > + } > + > + rt_data->local_lports_changed = b_ctx_out.local_lports_changed; > + rt_data->localnet_learn_fdb = b_ctx_out.localnet_learn_fdb; > + rt_data->localnet_learn_fdb_changed = b_ctx_out.localnet_learn_fdb_changed; > + if (b_ctx_out.related_lports_changed || > + b_ctx_out.non_vif_ports_changed || > + b_ctx_out.local_lports_changed || > + b_ctx_out.localnet_learn_fdb_changed || > + !hmap_is_empty(b_ctx_out.tracked_dp_bindings)) { > + engine_set_node_state(node, EN_UPDATED); > + } > + > + return true; > +} > + > static bool > runtime_data_sb_port_binding_handler(struct engine_node *node, void *data) > { > @@ -5260,9 +5292,10 @@ main(int argc, char *argv[]) > runtime_data_sb_datapath_binding_handler); > engine_add_input(&en_runtime_data, &en_sb_port_binding, > runtime_data_sb_port_binding_handler); > - /* Reuse the same handler for any previously postponed ports. */ > + /* Run postponed_ports_handler after port_binding_handler in case port get > + * deleted */ Is this comment correct? Does the postponed_ports_handler run after port_binding_handler in case the port gets deleted, or is it because the port_binding_handler is what populates the postponed_ports sset? I've been dealing with the incremental engine quite a bit lately, and this sort of usage seems to go against the intended flow of data. It appears that en_runtime_data populates the global postponed_ports sset. Then en_postponed_ports uses the postponed_ports sset to handle any postponed ports. IMO, if en_postponed_ports relies on the output of en_runtime_data, then en_runtime_data should have the postponed ports in its output, and en_runtime_data should be an input to en_postponed_ports. This way, we're not reliant on the side effect of the engine running the nodes in the order in which they are written in ovn-controller. Instead, we're defining node dependencies in a way that is easier to understand and is resilient to internal changes to the incremental engine code. I know this is not the only place where this is done. There are several places where we rely on this side effect instead of allowing the data to flow as the engine code intends. I thought about acking this patch and letting it slide, since it's already done in other places. However, instead of just allowing it to continue to happen, I'm going to suggest that this can be the first step towards getting the incremental nodes' flow of data in a better state. > engine_add_input(&en_runtime_data, &en_postponed_ports, > - runtime_data_sb_port_binding_handler); > + runtime_data_postponed_ports_handler); > /* Run sb_ro_handler after port_binding_handler in case port get deleted */ > engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler); >
Hi Mark Thanks for the review and the comments. On Mon, Jan 20, 2025 at 10:08 PM Mark Michelson <mmichels@redhat.com> wrote: > Hi Xavier, > > I have some comments below. > > On 1/6/25 09:28, Xavier Simonart wrote: > > In some cases, when a port was going to be postponed, the same port (lsp) > > was claimed four times: > > - within runtime_data handling pb changes: > > - lsp is claimed as handling pb changes, and added to postponed list. > > - it's claimed again as handling postponed list. > > - within runtime_data handling postponed list, the same two operations: > > - lsp is claimed as handling pb changes, and added to postponed list. > > - it's claimed again as handling postponed list. > > > > With this patch, the lsp1 port would only only be claimed twice. > > - within runtime_data handling pb changes: > > - lsp is claimed as handling pb changes, and added to postponed list. > > - within runtime_data handling postponed list, the same two operations: > > I think "the same two operations" was copy-pasted from the first > paragraph. Here it doesn't make as much sense since you are not > describing two operations below, just one. > Correct, thanks. > > > - it's claimed again as handling postponed list. > > > > This patch is only about CPU utilization; it should not result in any > > flow changes. > > > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > > > --- > > v2: rebase on origin/main for scapy fix. > > v3: - Updated based on Ales' question: remove deleted pb from > postponed_ports. > > - Rebase on origin/main. > > > > Signed-off-by: Xavier Simonart <xsimonar@redhat.com> > > --- > > controller/binding.c | 46 ++++++++++++++++++++++--------------- > > controller/binding.h | 2 ++ > > controller/ovn-controller.c | 37 +++++++++++++++++++++++++++-- > > 3 files changed, 64 insertions(+), 21 deletions(-) > > > > diff --git a/controller/binding.c b/controller/binding.c > > index 16302046e..a6e02bb5e 100644 > > --- a/controller/binding.c > > +++ b/controller/binding.c > > @@ -3102,6 +3102,33 @@ handle_updated_port(struct binding_ctx_in > *b_ctx_in, > > return handled; > > } > > > > +bool > > +binding_handle_postponed_ports(struct binding_ctx_in *b_ctx_in, > > + struct binding_ctx_out *b_ctx_out) > > +{ > > + /* Also handle any postponed (throttled) ports. */ > > + const char *port_name; > > + const struct sbrec_port_binding *pb; > > + bool handled = true; > > + struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); > > + sset_clone(&postponed_ports, b_ctx_out->postponed_ports); > > + SSET_FOR_EACH (port_name, &postponed_ports) { > > + pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, > > + port_name); > > + if (!pb) { > > + sset_find_and_delete(b_ctx_out->postponed_ports, port_name); > > + continue; > > + } > > + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); > > + if (!handled) { > > + break; > > + } > > + } > > + sset_destroy(&postponed_ports); > > + cleanup_claimed_port_timestamps(); > > + return handled; > > +} > > + > > /* Returns true if the port binding changes resulted in local binding > > * updates, false otherwise. > > */ > > @@ -3248,25 +3275,6 @@ delete_done: > > } > > } > > > > - /* Also handle any postponed (throttled) ports. */ > > - const char *port_name; > > - struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); > > - sset_clone(&postponed_ports, b_ctx_out->postponed_ports); > > - SSET_FOR_EACH (port_name, &postponed_ports) { > > - pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, > > - port_name); > > - if (!pb) { > > - sset_find_and_delete(b_ctx_out->postponed_ports, port_name); > > - continue; > > - } > > - handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); > > - if (!handled) { > > - break; > > - } > > - } > > - sset_destroy(&postponed_ports); > > - cleanup_claimed_port_timestamps(); > > - > > if (handled) { > > /* There may be new local datapaths added by the above > handling, so go > > * through each port_binding of newly added local datapaths to > update > > diff --git a/controller/binding.h b/controller/binding.h > > index d13ae36c7..5303d4f58 100644 > > --- a/controller/binding.h > > +++ b/controller/binding.h > > @@ -196,6 +196,8 @@ bool binding_cleanup(struct ovsdb_idl_txn > *ovnsb_idl_txn, > > > > bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, > > struct binding_ctx_out *); > > +bool binding_handle_postponed_ports(struct binding_ctx_in *, > > + struct binding_ctx_out *); > > bool binding_handle_port_binding_changes(struct binding_ctx_in *, > > struct binding_ctx_out *); > > void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index 157def2a3..584b4b579 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -1540,6 +1540,38 @@ runtime_data_ovs_interface_shadow_handler(struct > engine_node *node, void *data) > > return true; > > } > > > > +static bool > > +runtime_data_postponed_ports_handler(struct engine_node *node, void > *data) > > +{ > > + struct ed_type_runtime_data *rt_data = data; > > + struct binding_ctx_in b_ctx_in; > > + struct binding_ctx_out b_ctx_out; > > + init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out); > > + if (!b_ctx_in.chassis_rec) { > > + return false; > > + } > > + > > + rt_data->tracked = true; > > + b_ctx_out.tracked_dp_bindings = &rt_data->tracked_dp_bindings; > > + > > + if (!binding_handle_postponed_ports(&b_ctx_in, &b_ctx_out)) { > > + return false; > > + } > > + > > + rt_data->local_lports_changed = b_ctx_out.local_lports_changed; > > + rt_data->localnet_learn_fdb = b_ctx_out.localnet_learn_fdb; > > + rt_data->localnet_learn_fdb_changed = > b_ctx_out.localnet_learn_fdb_changed; > > + if (b_ctx_out.related_lports_changed || > > + b_ctx_out.non_vif_ports_changed || > > + b_ctx_out.local_lports_changed || > > + b_ctx_out.localnet_learn_fdb_changed || > > + !hmap_is_empty(b_ctx_out.tracked_dp_bindings)) { > > + engine_set_node_state(node, EN_UPDATED); > > + } > > + > > + return true; > > +} > > + > > static bool > > runtime_data_sb_port_binding_handler(struct engine_node *node, void > *data) > > { > > @@ -5260,9 +5292,10 @@ main(int argc, char *argv[]) > > runtime_data_sb_datapath_binding_handler); > > engine_add_input(&en_runtime_data, &en_sb_port_binding, > > runtime_data_sb_port_binding_handler); > > - /* Reuse the same handler for any previously postponed ports. */ > > + /* Run postponed_ports_handler after port_binding_handler in case > port get > > + * deleted */ > > Is this comment correct? Does the postponed_ports_handler run after > port_binding_handler in case the port gets deleted, or is it because the > port_binding_handler is what populates the postponed_ports sset? > I think that the comment is correct, for the same reason as mentioned for the OVS interface handler. If a port gets deleted, it's handled in port_binding_handler. Hence postponed_ports_handling, if run afterwards, can run safely without having to check whether ports are still valid. Even if port_binding_handler populates the postponed_ports set, there is in fact no much value in handling this port again when handling postponed_ports set just afterwards; the port just got postponed (for 500 msec), so it's too early to handle it now. In other words, from that perspective, it would be even better to run the postponed_handler before the port_binding_handler. This is why I mentioned in the commit message that, even with this patch, we still try to claim such a port twice. I've been dealing with the incremental engine quite a bit lately, and > this sort of usage seems to go against the intended flow of data. It > I somehow agree, but the fact that runtime_data_port_binding_handler must be run early was already present, and related to the same sb_port_binding_handler. (e.g. runtime_data_ovs_interface_shadow_handler must be run afterwards). I did not try to fix this in this patch. But, yes, with this patch we have one more node that needs to run after the runtime_data_port_binding_handler. > appears that en_runtime_data populates the global postponed_ports sset. > Correct, but ideally we should only handle those postponed_ports in next ovn-controller iteration loops, as in most cases the ports just got postponed. > Then en_postponed_ports uses the postponed_ports sset to handle any > postponed ports. > > IMO, if en_postponed_ports relies on the output of en_runtime_data, then > en_runtime_data should have the postponed ports in its output, and > en_runtime_data should be an input to en_postponed_ports. I am not sure we can have a node being both an input and an output of other nodes. However, we can create an additional node. For instance, I think that we could have a "prep_data" node (until a better name is found), having sb_port_binding as input( and maybe only handling the port deletion part). This new node would be an additional input of OVS_interface node (with a noop handler), the runtime data node, as well as the postponed ports node. > This way, > we're not reliant on the side effect of the engine running the nodes in > the order in which they are written in ovn-controller. Instead, we're > defining node dependencies in a way that is easier to understand and is > resilient to internal changes to the incremental engine code. > > I know this is not the only place where this is done. There are several > places where we rely on this side effect instead of allowing the data to > flow as the engine code intends. I thought about acking this patch and > letting it slide, since it's already done in other places. However, > instead of just allowing it to continue to happen, I'm going to suggest > that this can be the first step towards getting the incremental nodes' > flow of data in a better state. > Agreed. As it does not seem to be as easy as I thought/hoped, I propose to drop this patch for now until I can properly add the new nodes I will still try to have the second patch of the series ( controller: Avoid potential 100% cpu when ports are postponed. <http://patchwork.ozlabs.org/project/ovn/patch/20250106142825.468355-3-xsimonar@redhat.com/>) go through as it fixes a real issue (this patch was only a small optimization). I'll submit a v4 of patch 2 for it to rebase. > > > engine_add_input(&en_runtime_data, &en_postponed_ports, > > - runtime_data_sb_port_binding_handler); > > + runtime_data_postponed_ports_handler); > > /* Run sb_ro_handler after port_binding_handler in case port get > deleted */ > > engine_add_input(&en_runtime_data, &en_sb_ro, > runtime_data_sb_ro_handler); > > > > Thanks Xavier
diff --git a/controller/binding.c b/controller/binding.c index 16302046e..a6e02bb5e 100644 --- a/controller/binding.c +++ b/controller/binding.c @@ -3102,6 +3102,33 @@ handle_updated_port(struct binding_ctx_in *b_ctx_in, return handled; } +bool +binding_handle_postponed_ports(struct binding_ctx_in *b_ctx_in, + struct binding_ctx_out *b_ctx_out) +{ + /* Also handle any postponed (throttled) ports. */ + const char *port_name; + const struct sbrec_port_binding *pb; + bool handled = true; + struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); + sset_clone(&postponed_ports, b_ctx_out->postponed_ports); + SSET_FOR_EACH (port_name, &postponed_ports) { + pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, + port_name); + if (!pb) { + sset_find_and_delete(b_ctx_out->postponed_ports, port_name); + continue; + } + handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); + if (!handled) { + break; + } + } + sset_destroy(&postponed_ports); + cleanup_claimed_port_timestamps(); + return handled; +} + /* Returns true if the port binding changes resulted in local binding * updates, false otherwise. */ @@ -3248,25 +3275,6 @@ delete_done: } } - /* Also handle any postponed (throttled) ports. */ - const char *port_name; - struct sset postponed_ports = SSET_INITIALIZER(&postponed_ports); - sset_clone(&postponed_ports, b_ctx_out->postponed_ports); - SSET_FOR_EACH (port_name, &postponed_ports) { - pb = lport_lookup_by_name(b_ctx_in->sbrec_port_binding_by_name, - port_name); - if (!pb) { - sset_find_and_delete(b_ctx_out->postponed_ports, port_name); - continue; - } - handled = handle_updated_port(b_ctx_in, b_ctx_out, pb); - if (!handled) { - break; - } - } - sset_destroy(&postponed_ports); - cleanup_claimed_port_timestamps(); - if (handled) { /* There may be new local datapaths added by the above handling, so go * through each port_binding of newly added local datapaths to update diff --git a/controller/binding.h b/controller/binding.h index d13ae36c7..5303d4f58 100644 --- a/controller/binding.h +++ b/controller/binding.h @@ -196,6 +196,8 @@ bool binding_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn, bool binding_handle_ovs_interface_changes(struct binding_ctx_in *, struct binding_ctx_out *); +bool binding_handle_postponed_ports(struct binding_ctx_in *, + struct binding_ctx_out *); bool binding_handle_port_binding_changes(struct binding_ctx_in *, struct binding_ctx_out *); void binding_tracked_dp_destroy(struct hmap *tracked_datapaths); diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 157def2a3..584b4b579 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -1540,6 +1540,38 @@ runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void *data) return true; } +static bool +runtime_data_postponed_ports_handler(struct engine_node *node, void *data) +{ + struct ed_type_runtime_data *rt_data = data; + struct binding_ctx_in b_ctx_in; + struct binding_ctx_out b_ctx_out; + init_binding_ctx(node, rt_data, &b_ctx_in, &b_ctx_out); + if (!b_ctx_in.chassis_rec) { + return false; + } + + rt_data->tracked = true; + b_ctx_out.tracked_dp_bindings = &rt_data->tracked_dp_bindings; + + if (!binding_handle_postponed_ports(&b_ctx_in, &b_ctx_out)) { + return false; + } + + rt_data->local_lports_changed = b_ctx_out.local_lports_changed; + rt_data->localnet_learn_fdb = b_ctx_out.localnet_learn_fdb; + rt_data->localnet_learn_fdb_changed = b_ctx_out.localnet_learn_fdb_changed; + if (b_ctx_out.related_lports_changed || + b_ctx_out.non_vif_ports_changed || + b_ctx_out.local_lports_changed || + b_ctx_out.localnet_learn_fdb_changed || + !hmap_is_empty(b_ctx_out.tracked_dp_bindings)) { + engine_set_node_state(node, EN_UPDATED); + } + + return true; +} + static bool runtime_data_sb_port_binding_handler(struct engine_node *node, void *data) { @@ -5260,9 +5292,10 @@ main(int argc, char *argv[]) runtime_data_sb_datapath_binding_handler); engine_add_input(&en_runtime_data, &en_sb_port_binding, runtime_data_sb_port_binding_handler); - /* Reuse the same handler for any previously postponed ports. */ + /* Run postponed_ports_handler after port_binding_handler in case port get + * deleted */ engine_add_input(&en_runtime_data, &en_postponed_ports, - runtime_data_sb_port_binding_handler); + runtime_data_postponed_ports_handler); /* Run sb_ro_handler after port_binding_handler in case port get deleted */ engine_add_input(&en_runtime_data, &en_sb_ro, runtime_data_sb_ro_handler);