Message ID | 1565657498-62682-7-git-send-email-yihung.wei@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | Support zone-based conntrack timeout policy | expand |
Thanks for the patch Some high level comments: 1/ The ct_tp_kill_list code is still in common code I think we discussed moving that to the dpif backer code ct_timeout_policy_unref() is adding to this deferred kill list which is not needed for userspace datapath. 2/ clear_existing_ct_timeout_policies() is in common code, but only does something if ct_dpif_timeout_policy_dump_start/next/done are realized in the datatype type specific code (which is only for the kernel code, which is correct). I think it would be cleaner and less confusing just to make the API clear_existing_ct_timeout_policies() kernel specific; i.e. in dpif-netlink. Some other comments inline On Mon, Aug 12, 2019 at 5:55 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote: > This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains > the zone-based configuration in the vswitchd. Whenever there is a > database change, vswitchd will read the datapath, CT_Zone, and > CT_Timeout_Policy tables from ovsdb, builds an internal snapshot of the > database configuration in bridge.c, and pushes down the change into > ofproto and dpif layer. > > If a new zone-based timeout policy is added, it updates the zone to > timeout policy mapping in the per datapath type datapath structure in > dpif-backer, and pushes down the timeout policy into the datapath via > dpif interface. > > If a timeout policy is no longer used, for kernel datapath, vswitchd > may not be able to remove it from datapath immediately since > datapath flows can still reference the to-be-deleted timeout policies. > Thus, we keep an timeout policy kill list, that vswitchd will go > back to the list periodically and try to kill the unused timeout policies. > > Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> > --- > ofproto/ofproto-dpif.c | 293 > +++++++++++++++++++++++++++++++++++++++++++++ > ofproto/ofproto-dpif.h | 10 ++ > ofproto/ofproto-provider.h | 10 ++ > ofproto/ofproto.c | 30 +++++ > ofproto/ofproto.h | 5 + > vswitchd/bridge.c | 202 +++++++++++++++++++++++++++++++ > 6 files changed, 550 insertions(+) > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 751535249e21..3013d83e96a0 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -156,6 +156,25 @@ struct ofport_dpif { > size_t n_qdscp; > }; > > +struct ct_timeout_policy { > + int ref_count; /* The number of ct zones that use this > + * timeout policy. */ > + uint32_t tp_id; /* Timeout policy id in the datapath. */ > + struct simap tp; /* A map from timeout policy attribute to > + * timeout value. */ > + struct hmap_node node; /* Element in struct dpif_backer's > "ct_tps" > + * cmap. */ > + struct ovs_list list_node; /* Element in struct dpif_backer's > + * "ct_tp_kill_list" list. */ > +}; > + > +struct ct_zone { > + uint16_t zone_id; > + struct ct_timeout_policy *ct_tp; > + struct cmap_node node; /* Element in struct dpif_backer's > + * "ct_zones" cmap. */ > +}; > + > static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *, > ofp_port_t); > > @@ -196,6 +215,9 @@ static struct hmap all_ofproto_dpifs_by_uuid = > > static bool ofproto_use_tnl_push_pop = true; > static void ofproto_unixctl_init(void); > +static void ct_zone_config_init(struct dpif_backer *backer); > +static void ct_zone_config_uninit(struct dpif_backer *backer); > +static void ct_zone_timeout_policy_sweep(struct dpif_backer *backer); > > static inline struct ofproto_dpif * > ofproto_dpif_cast(const struct ofproto *ofproto) > @@ -488,6 +510,7 @@ type_run(const char *type) > } > > process_dpif_port_changes(backer); > + ct_zone_timeout_policy_sweep(backer); > > return 0; > } > @@ -683,6 +706,7 @@ close_dpif_backer(struct dpif_backer *backer, bool del) > } > dpif_close(backer->dpif); > id_pool_destroy(backer->meter_ids); > + ct_zone_config_uninit(backer); > free(backer); > } > > @@ -694,6 +718,8 @@ struct odp_garbage { > > static void check_support(struct dpif_backer *backer); > > +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX > seems like random placement; could be moved where it is used. > + > static int > open_dpif_backer(const char *type, struct dpif_backer **backerp) > { > @@ -811,6 +837,8 @@ open_dpif_backer(const char *type, struct dpif_backer > **backerp) > backer->meter_ids = NULL; > } > > + ct_zone_config_init(backer); > + > /* Make a pristine snapshot of 'support' into 'boottime_support'. > * 'boottime_support' can be checked to prevent 'support' to be > changed > * beyond the datapath capabilities. In case 'support' is changed by > @@ -5086,6 +5114,269 @@ ct_flush(const struct ofproto *ofproto_, const > uint16_t *zone) > ct_dpif_flush(ofproto->backer->dpif, zone, NULL); > } > > +static struct ct_timeout_policy * > +ct_timeout_policy_lookup(const struct hmap *ct_tps, struct simap *tp) > +{ > + struct ct_timeout_policy *ct_tp; > + > + HMAP_FOR_EACH_WITH_HASH (ct_tp, node, simap_hash(tp), ct_tps) { > + if (simap_equal(&ct_tp->tp, tp)) { > + return ct_tp; > + } > + } > + return NULL; > +} > + > +static struct ct_timeout_policy * > +ct_timeout_policy_alloc__(void) > +{ > + struct ct_timeout_policy *ct_tp = xzalloc(sizeof *ct_tp); > + simap_init(&ct_tp->tp); > + return ct_tp; > +} > by using above API, you are not saving any code and maybe more error prone > + > +static struct ct_timeout_policy * > +ct_timeout_policy_alloc(struct simap *tp, struct id_pool *tp_ids) > +{ > + struct simap_node *node; > + > + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); > + SIMAP_FOR_EACH (node, tp) { > + simap_put(&ct_tp->tp, node->name, node->data); > + } > + > + if (!id_pool_alloc_id(tp_ids, &ct_tp->tp_id)) { > + VLOG_ERR_RL(&rl, "failed to allocate timeout policy id."); > + simap_destroy(&ct_tp->tp); > + free(tp); > I think you rather need to free 'ct_tp'; i.e. free(ct_tp) + return NULL; > + } > + > + return ct_tp; > +} > + > +static void > +ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp, > + struct id_pool *tp_ids) > +{ > + id_pool_free_id(tp_ids, ct_tp->tp_id); > + simap_destroy(&ct_tp->tp); > + ovsrcu_postpone(free, ct_tp); > +} > + > +static void > +ct_timeout_policy_unref(struct dpif_backer *backer, > + struct ct_timeout_policy *ct_tp) > +{ > + if (ct_tp) { > + ct_tp->ref_count--; > + > + if (!ct_tp->ref_count) { > + hmap_remove(&backer->ct_tps, &ct_tp->node); > + ovs_list_push_back(&backer->ct_tp_kill_list, > &ct_tp->list_node); > + } > + } > +} > + > +static struct ct_zone * > +ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone) > +{ > + struct ct_zone *ct_zone; > + > + CMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) { > + if (ct_zone->zone_id == zone) { > + return ct_zone; > + } > + } > + return NULL; > +} > + > +static struct ct_zone * > +ct_zone_alloc(uint16_t zone) > +{ > + struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone); > + ct_zone->zone_id = zone; > + return ct_zone; > +} > + > +static void > +ct_zone_destroy(struct ct_zone *ct_zone) > +{ > + ovsrcu_postpone(free, ct_zone); > +} > + > +static void > +ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone > *ct_zone) > +{ > + cmap_remove(&backer->ct_zones, &ct_zone->node, > + hash_int(ct_zone->zone_id, 0)); > + ct_zone_destroy(ct_zone); > +} > + > +static void > +ct_add_timeout_policy_to_dpif(struct dpif *dpif, > + struct ct_timeout_policy *ct_tp) > +{ > + struct ct_dpif_timeout_policy cdtp; > + struct simap_node *node; > + > + cdtp.id = ct_tp->tp_id; > + SIMAP_FOR_EACH (node, &ct_tp->tp) { > + ct_dpif_set_timeout_policy_attr_by_name(&cdtp, node->name, > node->data); > + } > + > + int err = ct_dpif_set_timeout_policy(dpif, &cdtp); > + if (err) { > + VLOG_ERR_RL(&rl, "failed to set timeout policy %"PRIu32" (%s)", > + ct_tp->tp_id, ovs_strerror(err)); > + } > +} > + > +static void > +clear_existing_ct_timeout_policies(struct dpif_backer *backer) > +{ > + /* In kernel datapath, when OVS starts, there may be some pre-existing > + * timeout policies in the kernel. To avoid reassign the same timeout > + * policy ids, we dump all the pre-existing timeout policies and keep > + * the ids in the pool. Since OVS will not use those timeout policies > + * for new datapath flow, we add them to the kill list and remove > + * them later on. */ > + void *state; > + > + int err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state); > + if (err) { > + return; > + } > can be + if (ct_dpif_timeout_policy_dump_start(backer->dpif, &state)) { + return; + } similarly below > + > + struct ct_dpif_timeout_policy cdtp; > + while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state, > + &cdtp))) { > + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); > + ct_tp->tp_id = cdtp.id; > + id_pool_add(backer->tp_ids, cdtp.id); > + ovs_list_insert(&backer->ct_tp_kill_list, &ct_tp->list_node); > not sure why you add to beginning here rather than end; was there some deeper meaning at play ? > + } > + > + ct_dpif_timeout_policy_dump_done(backer->dpif, state); > +} > + > +static void > +ct_zone_config_init(struct dpif_backer *backer) > +{ > + cmap_init(&backer->ct_zones); > + hmap_init(&backer->ct_tps); > + backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID); > + ovs_list_init(&backer->ct_tp_kill_list); > + clear_existing_ct_timeout_policies(backer); > +} > + > +static void > +ct_zone_config_uninit(struct dpif_backer *backer) > +{ > + struct ct_timeout_policy *ct_tp; > + struct ct_zone *ct_zone; > + > + CMAP_FOR_EACH (ct_zone, node, &backer->ct_zones) { > + ct_zone_remove_and_destroy(backer, ct_zone); > + } > + > + HMAP_FOR_EACH_POP (ct_tp, node, &backer->ct_tps) { > + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); > + } > + > + LIST_FOR_EACH_POP (ct_tp, list_node, &backer->ct_tp_kill_list) { > + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); > + } > + > + cmap_destroy(&backer->ct_zones); > + hmap_destroy(&backer->ct_tps); > + id_pool_destroy(backer->tp_ids); > +} > + > +static void > +ct_zone_timeout_policy_sweep(struct dpif_backer *backer) > +{ > + if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) { > + struct ct_timeout_policy *ct_tp, *next; > + > + LIST_FOR_EACH_SAFE (ct_tp, next, list_node, > &backer->ct_tp_kill_list) { > + int err = ct_dpif_del_timeout_policy(backer->dpif, > ct_tp->tp_id); > + if (!err) { > + ovs_list_remove(&ct_tp->list_node); > + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); > + } else { > + VLOG_INFO_RL(&rl, "failed to delete timeout policy id = " > + "%"PRIu32" %s", ct_tp->tp_id, > ovs_strerror(err)); > + } > + } > + } > +} > + > +static void > +ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone, > + struct simap *timeout_policy) > +{ > + struct dpif_backer *backer; > + struct ct_timeout_policy *ct_tp; > + struct ct_zone *ct_zone; > + > + backer = shash_find_data(&all_dpif_backers, datapath_type); > + if (!backer) { > + return; > + } > + > + ct_tp = ct_timeout_policy_lookup(&backer->ct_tps, timeout_policy); > + if (!ct_tp) { > + ct_tp = ct_timeout_policy_alloc(timeout_policy, backer->tp_ids); > + if (ct_tp) { > + hmap_insert(&backer->ct_tps, &ct_tp->node, > simap_hash(&ct_tp->tp)); > + ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp); > + } else { > + VLOG_ERR_RL(&rl, "failed to allocate timeout policy"); > + return; > + } > + } > + > + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > + if (ct_zone) { > + if (ct_zone->ct_tp != ct_tp) { > + /* Add the new zone timeout pollicy. */ > + struct ct_zone *new_ct_zone = ct_zone_alloc(zone); > + new_ct_zone->ct_tp = ct_tp; > + ct_tp->ref_count++; > + cmap_replace(&backer->ct_zones, &ct_zone->node, > &new_ct_zone->node, > + hash_int(zone, 0)); > + > + /* Deletes the old zone timeout policy. */ > + ct_timeout_policy_unref(backer, ct_zone->ct_tp); > + ct_zone_destroy(ct_zone); > + } > + } else { > + struct ct_zone *new_ct_zone = ct_zone_alloc(zone); > + new_ct_zone->ct_tp = ct_tp; > + cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone, > 0)); > + ct_tp->ref_count++; > + } > +} > + > +static void > +ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) > +{ > + struct dpif_backer *backer; > + struct ct_zone *ct_zone; > + > + backer = shash_find_data(&all_dpif_backers, datapath_type); > struct dpif_backer *backer = shash_find_data(&all_dpif_backers, datapath_type); > + if (!backer) { > + return; > + } > + > + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > + if (ct_zone) { > + ct_timeout_policy_unref(backer, ct_zone->ct_tp); > + ct_zone_remove_and_destroy(backer, ct_zone); > + } > +} > + > static bool > set_frag_handling(struct ofproto *ofproto_, > enum ofputil_frag_handling frag_handling) > @@ -6189,4 +6480,6 @@ const struct ofproto_class ofproto_dpif_class = { > get_datapath_version, /* get_datapath_version */ > type_set_config, > ct_flush, /* ct_flush */ > + ct_set_zone_timeout_policy, > + ct_del_zone_timeout_policy, > }; > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > index cd5321eb942c..0dd7a45fe550 100644 > --- a/ofproto/ofproto-dpif.h > +++ b/ofproto/ofproto-dpif.h > @@ -245,6 +245,16 @@ struct dpif_backer { > /* Meter. */ > struct id_pool *meter_ids; /* Datapath meter allocation. */ > > + /* Connection tracking. */ > + struct id_pool *tp_ids; /* Datapath timeout policy id > + * allocation. */ > + struct cmap ct_zones; /* "struct ct_zone"s indexed by > zone > + * id. */ > + struct hmap ct_tps; /* "struct ct_timeout_policy"s > indexed > + * by timeout policy (struct > simap). */ > + struct ovs_list ct_tp_kill_list; /* A list of timeout policy to be > + * deleted. */ > + > /* Version string of the datapath stored in OVSDB. */ > char *dp_version_string; > > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 7907d4bfb416..54da71737b96 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -58,6 +58,7 @@ > #include "tun-metadata.h" > #include "versions.h" > #include "vl-mff-map.h" > +#include "vswitch-idl.h" > > struct match; > struct ofputil_flow_mod; > @@ -1872,6 +1873,15 @@ struct ofproto_class { > /* Flushes the connection tracking tables. If 'zone' is not NULL, > * only deletes connections in '*zone'. */ > void (*ct_flush)(const struct ofproto *, const uint16_t *zone); > + > + /* Sets conntrack timeout policy specified by 'timeout_policy' to > 'zone' > + * in datapath type 'dp_type'. */ > + void (*ct_set_zone_timeout_policy)(const char *dp_type, uint16_t zone, > + struct simap *timeout_policy); > + > + /* Deletes the timeout policy associated with 'zone' in datapath type > + * 'dp_type'. */ > + void (*ct_del_zone_timeout_policy)(const char *dp_type, uint16_t > zone); > }; > > extern const struct ofproto_class ofproto_dpif_class; > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index 1d6fc00696f8..4bcb285c7457 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -935,6 +935,36 @@ ofproto_get_flow_restore_wait(void) > return flow_restore_wait; > } > > +/* Connection tracking configuration. */ > +void > +ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t > zone, > + struct simap *timeout_policy) > +{ > + const struct ofproto_class *class; > + > + datapath_type = ofproto_normalize_type(datapath_type); > + class = ofproto_class_find__(datapath_type); > const struct ofproto_class *class = ofproto_class_find__(datapath_type); > + > + if (class->ct_set_zone_timeout_policy) { > + class->ct_set_zone_timeout_policy(datapath_type, zone, > + timeout_policy); > + } > +} > + > +void > +ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t > zone) > +{ > + const struct ofproto_class *class; > + > + datapath_type = ofproto_normalize_type(datapath_type); > + class = ofproto_class_find__(datapath_type); > const struct ofproto_class *class = ofproto_class_find__(datapath_type); > + > + if (class->ct_del_zone_timeout_policy) { > + class->ct_del_zone_timeout_policy(datapath_type, zone); > + } > + > +} > + > > /* Spanning Tree Protocol (STP) configuration. */ > > diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h > index 6e4afffa17e0..acd8bdef78df 100644 > --- a/ofproto/ofproto.h > +++ b/ofproto/ofproto.h > @@ -362,6 +362,11 @@ int ofproto_get_stp_status(struct ofproto *, struct > ofproto_stp_status *); > int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings > *); > int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status > *); > void ofproto_set_vlan_limit(int vlan_limit); > +void ofproto_ct_set_zone_timeout_policy(const char *datapath_type, > + uint16_t zone, > + struct simap *timeout_policy); > +void ofproto_ct_del_zone_timeout_policy(const char *datapath_type, > + uint16_t zone); > > /* Configuration of ports. */ > void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port); > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 2976771aeaba..c09343536dba 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -153,9 +153,35 @@ struct aa_mapping { > char *br_name; > }; > > +/* Internal representation of conntrak zone configuration table in OVSDB. > */ > 'conntrack' > +struct ct_zone { > + uint16_t zone; > + struct simap tp; /* A map from timeout policy attribute to > + * timeout value. */ > + unsigned int last_used; /* The last idl_seqno that this struct is > used > + * in OVSDB. This number is used for > garbage > + * collection. */ > + struct hmap_node node; /* Element in struct datapath_cfgs's > + * "ct_zone_timeout_policies" hmap. */ > +}; > + > +/* Internal representation of datapath configuration table in OVSDB. */ > +struct datapath { > + char *type; /* Datapath type. */ > + struct hmap ct_zones; /* "struct ct_zone"s indexed by zone id. > */ > + struct hmap_node node; /* In 'all_datapath_cfgs'. */ > + const struct ovsrec_datapath *dp_cfg; > + unsigned int last_used; /* The last idl_seqno that this struct is > used > + * in OVSDB. This number is used for > garbage > + * collection. */ > +}; > + > /* All bridges, indexed by name. */ > static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges); > > +/* All datapath configuartions, indexed by type. */ > +static struct hmap all_datapaths = HMAP_INITIALIZER(&all_datapaths); > + > /* OVSDB IDL used to obtain configuration. */ > static struct ovsdb_idl *idl; > > @@ -588,6 +614,181 @@ config_ofproto_types(const struct smap *other_config) > } > > static void > +get_timeout_policy_from_ovsrec(struct simap *tp, > + const struct ovsrec_ct_timeout_policy > *tp_cfg) > +{ > + for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { > + simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); > + } > +} > + > +static struct ct_zone * > +ct_zone_lookup(struct hmap *ct_zones, uint16_t zone) > +{ > + struct ct_zone *ct_zone; > + > + HMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) { > + if (ct_zone->zone == zone) { > + return ct_zone; > + } > + } > + return NULL; > +} > + > +static struct ct_zone * > +ct_zone_alloc(uint16_t zone, struct ovsrec_ct_timeout_policy *tp_cfg) > +{ > + struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone); > + > + ct_zone->zone = zone; > + simap_init(&ct_zone->tp); > + get_timeout_policy_from_ovsrec(&ct_zone->tp, tp_cfg); > + return ct_zone; > +} > + > +static void > +ct_zone_remove_and_destroy(struct datapath *dp, struct ct_zone *ct_zone) > +{ > + hmap_remove(&dp->ct_zones, &ct_zone->node); > + simap_destroy(&ct_zone->tp); > + free(ct_zone); > +} > + > +/* Replace 'old_tp' by 'new_tp' (destroyed 'new_tp'). Returns true if > 'old_tp' > + * and 'new_tp' contains different data, false if they are the same. */ > +static bool > +update_timeout_policy(struct simap *old_tp, struct simap *new_tp) > +{ > + bool changed = !simap_equal(old_tp, new_tp); > + simap_swap(old_tp, new_tp); > + simap_destroy(new_tp); > + return changed; > +} > + > +static struct datapath * > +datapath_lookup(const char *type) > +{ > + struct datapath *dp; > + > + HMAP_FOR_EACH_WITH_HASH (dp, node, hash_string(type, 0), > &all_datapaths) { > + if (!strcmp(dp->type, type)) { > + return dp; > + } > + } > + return NULL; > +} > + > +static struct datapath * > +datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type) > +{ > + struct datapath *dp; > + > + ovs_assert(!datapath_lookup(type)); > The caller just did a lookup before calling here; i.e you can remove assert. > + dp = xzalloc(sizeof *dp); > struct datapath *dp = xzalloc(sizeof *dp); + > + dp->type = xstrdup(type); > + dp->dp_cfg = dp_cfg; > + > + hmap_init(&dp->ct_zones); > + hmap_insert(&all_datapaths, &dp->node, hash_string(type, 0)); > + return dp; > +} > + > +static void > +datapath_destroy(struct datapath *dp) > +{ > + struct ct_zone *ct_zone; > + > + if (dp) { > + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { > HMAP_FOR_EACH_SAFE > + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone); > + ct_zone_remove_and_destroy(dp, ct_zone); > + } > + > + hmap_remove(&all_datapaths, &dp->node); > + hmap_destroy(&dp->ct_zones); > + free(dp->type); > + free(dp); > + } > +} > + > +static void > +update_datapath_cfgs(const struct ovsrec_open_vswitch *cfg) > +{ > + struct datapath *dp; > + size_t i; > + > + /* Add new datapath configs. */ > + for (i = 0; i < cfg->n_datapaths; i++) { > + const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i]; > + char *dp_name = cfg->key_datapaths[i]; > + > + dp = datapath_lookup(dp_name); > + if (!dp) { > + dp = datapath_create(dp_cfg, dp_name); > + } > + dp->last_used = idl_seqno; > + } > + > + /* Get rid of deleted datapath configs. */ > + HMAP_FOR_EACH (dp, node, &all_datapaths) { > HMAP_FOR_EACH_SAFE > + if (dp->last_used != idl_seqno) { > + datapath_destroy(dp); > + } > + } > +} > + > +static void > +reconfigure_ct_zones(struct datapath *dp) > +{ > + const struct ovsrec_datapath *dp_cfg = dp->dp_cfg; > + struct ct_zone *ct_zone; > + > + /* Loop through all zones. Add or update configs. */ > + for (size_t i = 0; i < dp_cfg->n_ct_zones; i++) { > + uint16_t zone = dp_cfg->key_ct_zones[i]; > + struct ovsrec_ct_zone *zone_cfg = dp_cfg->value_ct_zones[i]; > + struct ovsrec_ct_timeout_policy *tp_cfg = > zone_cfg->timeout_policy; > + > + ct_zone = ct_zone_lookup(&dp->ct_zones, zone); > + if (ct_zone) { > + struct simap new_tp = SIMAP_INITIALIZER(&new_tp); > + get_timeout_policy_from_ovsrec(&new_tp, tp_cfg); > + if (update_timeout_policy(&ct_zone->tp, &new_tp)) { > + ofproto_ct_set_zone_timeout_policy(dp->type, > ct_zone->zone, > + &ct_zone->tp); > + } > + } else { > + ct_zone = ct_zone_alloc(zone, tp_cfg); > + hmap_insert(&dp->ct_zones, &ct_zone->node, hash_int(zone, 0)); > + ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone, > + &ct_zone->tp); > + } > + ct_zone->last_used = idl_seqno; > + } > + > + /* Remove unused ct_zone configs. */ > + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { > HMAP_FOR_EACH_SAFE > + if (ct_zone->last_used != idl_seqno) { > + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone); > + ct_zone_remove_and_destroy(dp, ct_zone); > + } > + } > +} > + > +static void > +reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg) > +{ > + struct datapath *dp; > + > + update_datapath_cfgs(cfg); > + > + HMAP_FOR_EACH (dp, node, &all_datapaths) { > + reconfigure_ct_zones(dp); > + } > +} > + > +static void > bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) > { > struct sockaddr_in *managers; > @@ -669,6 +870,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch > *ovs_cfg) > } > > reconfigure_system_stats(ovs_cfg); > + reconfigure_datapath_cfgs(ovs_cfg); > > /* Complete the configuration. */ > sflow_bridge_number = 0; > -- > 2.7.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Tue, Aug 13, 2019 at 7:46 PM Darrell Ball <dlu998@gmail.com> wrote: > > Thanks for the patch > > Some high level comments: > > 1/ The ct_tp_kill_list code is still in common code > I think we discussed moving that to the dpif backer code > ct_timeout_policy_unref() is adding to this deferred kill list which is not needed for userspace > datapath. > 2/ clear_existing_ct_timeout_policies() is in common code, but only does something if > ct_dpif_timeout_policy_dump_start/next/done are realized in the datatype type specific code > (which is only for the kernel code, which is correct). I think it would be cleaner and less confusing > just to make the API clear_existing_ct_timeout_policies() kernel specific; i.e. in dpif-netlink. Thanks for review. I address most of the code changes as in the detailed inline code review. For the two high level concerns, it is mainly because currently ct_tp_kill_list is maintained in ofproto-dpif.c I thought about to move the ct_tp_kill_list implementation from dpif_backer (in ofproto-dpif.c) to dpif-netlink.c, and here is why I still keep it in ofproto-dpif.c in the dpif_backer layer in this version. AFAIK, currently, we do not have a proper place to store ct_tp_kill_list in dpif-netlink.c in the userspace. dpif-netlink is for the kernel datapath implementation, all the information that we configured to dpif-netlink are directly pass down into the kernel currently. In userspace datapath, we can store userspace specific information in "struct dp_netdev", but there is no such place in dpif-neltink for now. In this case, it is naturally to maintain the ct_tp_kill_list one level up in the dpif_backer layer. Anyhow, we can always make proper change on the way we maintain timeout policy in ofproto-dpif layer when the dpif-netdev implementation is introduced. > On Mon, Aug 12, 2019 at 5:55 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote: >> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c >> index 751535249e21..3013d83e96a0 100644 >> --- a/ofproto/ofproto-dpif.c >> +++ b/ofproto/ofproto-dpif.c >> @@ -694,6 +718,8 @@ struct odp_garbage { >> >> static void check_support(struct dpif_backer *backer); >> >> +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX > > > seems like random placement; could be moved where it is used. > ok, I will move it right before ct_zone_config_init(). >> +static struct ct_timeout_policy * >> +ct_timeout_policy_alloc__(void) >> +{ >> + struct ct_timeout_policy *ct_tp = xzalloc(sizeof *ct_tp); >> + simap_init(&ct_tp->tp); >> + return ct_tp; >> +} > > > by using above API, you are not saving any code and maybe more error prone > This function is used in ct_timeout_policy_alloc() and clear_existing_ct_timeout_policies(). So are you sugguesting to expand it in these two functions? >> >> + >> +static struct ct_timeout_policy * >> +ct_timeout_policy_alloc(struct simap *tp, struct id_pool *tp_ids) >> +{ >> + struct simap_node *node; >> + >> + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); >> + SIMAP_FOR_EACH (node, tp) { >> + simap_put(&ct_tp->tp, node->name, node->data); >> + } >> + >> + if (!id_pool_alloc_id(tp_ids, &ct_tp->tp_id)) { >> + VLOG_ERR_RL(&rl, "failed to allocate timeout policy id."); >> + simap_destroy(&ct_tp->tp); >> + free(tp); > > > I think you rather need to free 'ct_tp'; i.e. free(ct_tp) Yes, thanks for spotting this bug. >> +static void >> +clear_existing_ct_timeout_policies(struct dpif_backer *backer) >> +{ >> + /* In kernel datapath, when OVS starts, there may be some pre-existing >> + * timeout policies in the kernel. To avoid reassign the same timeout >> + * policy ids, we dump all the pre-existing timeout policies and keep >> + * the ids in the pool. Since OVS will not use those timeout policies >> + * for new datapath flow, we add them to the kill list and remove >> + * them later on. */ >> + void *state; >> + >> + int err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state); >> + if (err) { >> + return; >> + } > > > can be > > + if (ct_dpif_timeout_policy_dump_start(backer->dpif, &state)) { > + return; > + } > > similarly below Sure, I will get rid of 'int err' in v4. >> + >> + struct ct_dpif_timeout_policy cdtp; >> + while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state, >> + &cdtp))) { >> + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); >> + ct_tp->tp_id = cdtp.id; >> + id_pool_add(backer->tp_ids, cdtp.id); >> + ovs_list_insert(&backer->ct_tp_kill_list, &ct_tp->list_node); > > > not sure why you add to beginning here rather than end; was there some deeper meaning at play ? Not really, I am fine to add it on either side tho. >> +static void >> +ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) >> +{ >> + struct dpif_backer *backer; >> + struct ct_zone *ct_zone; >> + >> + backer = shash_find_data(&all_dpif_backers, datapath_type); > > > struct dpif_backer *backer = shash_find_data(&all_dpif_backers, datapath_type); Sure, will do in v4. >> >> + if (!backer) { >> + return; >> + } >> + >> + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > > > struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone); Sure, added to v4. >> --- a/ofproto/ofproto.c >> +++ b/ofproto/ofproto.c >> @@ -935,6 +935,36 @@ ofproto_get_flow_restore_wait(void) >> return flow_restore_wait; >> } >> >> +/* Connection tracking configuration. */ >> +void >> +ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone, >> + struct simap *timeout_policy) >> +{ >> + const struct ofproto_class *class; >> + >> + datapath_type = ofproto_normalize_type(datapath_type); >> + class = ofproto_class_find__(datapath_type); > > > const struct ofproto_class *class = ofproto_class_find__(datapath_type); Done in v4. >> +void >> +ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) >> +{ >> + const struct ofproto_class *class; >> + >> + datapath_type = ofproto_normalize_type(datapath_type); >> + class = ofproto_class_find__(datapath_type); > > const struct ofproto_class *class = ofproto_class_find__(datapath_type); Ack. >> --- a/vswitchd/bridge.c >> +++ b/vswitchd/bridge.c >> @@ -153,9 +153,35 @@ struct aa_mapping { >> char *br_name; >> }; >> >> +/* Internal representation of conntrak zone configuration table in OVSDB. */ > > > 'conntrack' Done. >> +static struct datapath * >> +datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type) >> +{ >> + struct datapath *dp; >> + >> + ovs_assert(!datapath_lookup(type)); > > The caller just did a lookup before calling here; i.e you can remove assert. Sounds good. >> >> + dp = xzalloc(sizeof *dp); > struct datapath *dp = xzalloc(sizeof *dp); > ok. >> +static void >> +datapath_destroy(struct datapath *dp) >> +{ >> + struct ct_zone *ct_zone; >> + >> + if (dp) { >> + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { > > HMAP_FOR_EACH_SAFE > I replace HMAP_FOR_EACH to HMAP_FOR_EACH_SAFE in v3 for the following 3 cases that you mentioned. Thanks, -Yi-Hung
On Wed, Aug 14, 2019 at 1:28 PM Yi-Hung Wei <yihung.wei@gmail.com> wrote: > On Tue, Aug 13, 2019 at 7:46 PM Darrell Ball <dlu998@gmail.com> wrote: > > > > Thanks for the patch > > > > Some high level comments: > > > > 1/ The ct_tp_kill_list code is still in common code > > I think we discussed moving that to the dpif backer code > > ct_timeout_policy_unref() is adding to this deferred kill list which > is not needed for userspace > > datapath. > > 2/ clear_existing_ct_timeout_policies() is in common code, but only does > something if > > ct_dpif_timeout_policy_dump_start/next/done are realized in the datatype > type specific code > > (which is only for the kernel code, which is correct). I think it would > be cleaner and less confusing > > just to make the API clear_existing_ct_timeout_policies() kernel > specific; i.e. in dpif-netlink. > > > Thanks for review. I address most of the code changes as in the > detailed inline code review. > > For the two high level concerns, it is mainly because currently > ct_tp_kill_list is maintained in ofproto-dpif.c I thought about to > move the ct_tp_kill_list implementation from dpif_backer (in > ofproto-dpif.c) to dpif-netlink.c, and here is why I still keep it in > ofproto-dpif.c in the dpif_backer layer in this version. > > AFAIK, currently, we do not have a proper place to store > ct_tp_kill_list in dpif-netlink.c in the userspace. dpif-netlink is > for the kernel datapath implementation, all the information that we > configured to dpif-netlink are directly pass down into the kernel > currently. In userspace datapath, we can store userspace specific > information in "struct dp_netdev", but there is no such place in > dpif-neltink for now. In this case, it is naturally to maintain the > ct_tp_kill_list one level up in the dpif_backer layer. > > Anyhow, we can always make proper change on the way we maintain > timeout policy in ofproto-dpif layer when the dpif-netdev > implementation is introduced. > As discussed, lets defer. > > > > On Mon, Aug 12, 2019 at 5:55 PM Yi-Hung Wei <yihung.wei@gmail.com> > wrote: > >> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > >> index 751535249e21..3013d83e96a0 100644 > >> --- a/ofproto/ofproto-dpif.c > >> +++ b/ofproto/ofproto-dpif.c > >> @@ -694,6 +718,8 @@ struct odp_garbage { > >> > >> static void check_support(struct dpif_backer *backer); > >> > >> +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX > > > > > > seems like random placement; could be moved where it is used. > > > > ok, I will move it right before ct_zone_config_init(). > > > >> +static struct ct_timeout_policy * > >> +ct_timeout_policy_alloc__(void) > >> +{ > >> + struct ct_timeout_policy *ct_tp = xzalloc(sizeof *ct_tp); > >> + simap_init(&ct_tp->tp); > >> + return ct_tp; > >> +} > > > > > > by using above API, you are not saving any code and maybe more error > prone > > > > This function is used in ct_timeout_policy_alloc() and > clear_existing_ct_timeout_policies(). So are you sugguesting to expand > it in these two functions? > ohh. I missed the other usage; I don't feel strongly either way. > > >> > >> + > >> +static struct ct_timeout_policy * > >> +ct_timeout_policy_alloc(struct simap *tp, struct id_pool *tp_ids) > >> +{ > >> + struct simap_node *node; > >> + > >> + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); > >> + SIMAP_FOR_EACH (node, tp) { > >> + simap_put(&ct_tp->tp, node->name, node->data); > >> + } > >> + > >> + if (!id_pool_alloc_id(tp_ids, &ct_tp->tp_id)) { > >> + VLOG_ERR_RL(&rl, "failed to allocate timeout policy id."); > >> + simap_destroy(&ct_tp->tp); > >> + free(tp); > > > > > > I think you rather need to free 'ct_tp'; i.e. free(ct_tp) > > Yes, thanks for spotting this bug. > > > >> +static void > >> +clear_existing_ct_timeout_policies(struct dpif_backer *backer) > >> +{ > >> + /* In kernel datapath, when OVS starts, there may be some > pre-existing > >> + * timeout policies in the kernel. To avoid reassign the same > timeout > >> + * policy ids, we dump all the pre-existing timeout policies and > keep > >> + * the ids in the pool. Since OVS will not use those timeout > policies > >> + * for new datapath flow, we add them to the kill list and remove > >> + * them later on. */ > >> + void *state; > >> + > >> + int err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state); > >> + if (err) { > >> + return; > >> + } > > > > > > can be > > > > + if (ct_dpif_timeout_policy_dump_start(backer->dpif, &state)) { > > + return; > > + } > > > > similarly below > > Sure, I will get rid of 'int err' in v4. > > >> + > >> + struct ct_dpif_timeout_policy cdtp; > >> + while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, > state, > >> + &cdtp))) { > >> + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); > >> + ct_tp->tp_id = cdtp.id; > >> + id_pool_add(backer->tp_ids, cdtp.id); > >> + ovs_list_insert(&backer->ct_tp_kill_list, &ct_tp->list_node); > > > > > > not sure why you add to beginning here rather than end; was there some > deeper meaning at play ? > > Not really, I am fine to add it on either side tho. > typically, we add to end then, which helps to eliminate questions and is same as the one other case you added. > > >> +static void > >> +ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) > >> +{ > >> + struct dpif_backer *backer; > >> + struct ct_zone *ct_zone; > >> + > >> + backer = shash_find_data(&all_dpif_backers, datapath_type); > > > > > > struct dpif_backer *backer = shash_find_data(&all_dpif_backers, > datapath_type); > > Sure, will do in v4. > > > >> > >> + if (!backer) { > >> + return; > >> + } > >> + > >> + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > > > > > > struct ct_zone *ct_zone = ct_zone_lookup(&backer->ct_zones, zone); > > Sure, added to v4. > > > >> --- a/ofproto/ofproto.c > >> +++ b/ofproto/ofproto.c > >> @@ -935,6 +935,36 @@ ofproto_get_flow_restore_wait(void) > >> return flow_restore_wait; > >> } > >> > >> +/* Connection tracking configuration. */ > >> +void > >> +ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t > zone, > >> + struct simap *timeout_policy) > >> +{ > >> + const struct ofproto_class *class; > >> + > >> + datapath_type = ofproto_normalize_type(datapath_type); > >> + class = ofproto_class_find__(datapath_type); > > > > > > const struct ofproto_class *class = ofproto_class_find__(datapath_type); > > Done in v4. > > > >> +void > >> +ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t > zone) > >> +{ > >> + const struct ofproto_class *class; > >> + > >> + datapath_type = ofproto_normalize_type(datapath_type); > >> + class = ofproto_class_find__(datapath_type); > > > > const struct ofproto_class *class = ofproto_class_find__(datapath_type); > > Ack. > > >> --- a/vswitchd/bridge.c > >> +++ b/vswitchd/bridge.c > >> @@ -153,9 +153,35 @@ struct aa_mapping { > >> char *br_name; > >> }; > >> > >> +/* Internal representation of conntrak zone configuration table in > OVSDB. */ > > > > > > 'conntrack' > > Done. > > >> +static struct datapath * > >> +datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type) > >> +{ > >> + struct datapath *dp; > >> + > >> + ovs_assert(!datapath_lookup(type)); > > > > The caller just did a lookup before calling here; i.e you can remove > assert. > > Sounds good. > > >> > >> + dp = xzalloc(sizeof *dp); > > struct datapath *dp = xzalloc(sizeof *dp); > > > ok. > > > >> +static void > >> +datapath_destroy(struct datapath *dp) > >> +{ > >> + struct ct_zone *ct_zone; > >> + > >> + if (dp) { > >> + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { > > > > HMAP_FOR_EACH_SAFE > > > > I replace HMAP_FOR_EACH to HMAP_FOR_EACH_SAFE in v3 for the following > 3 cases that you mentioned. > > Thanks, > > -Yi-Hung >
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 751535249e21..3013d83e96a0 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -156,6 +156,25 @@ struct ofport_dpif { size_t n_qdscp; }; +struct ct_timeout_policy { + int ref_count; /* The number of ct zones that use this + * timeout policy. */ + uint32_t tp_id; /* Timeout policy id in the datapath. */ + struct simap tp; /* A map from timeout policy attribute to + * timeout value. */ + struct hmap_node node; /* Element in struct dpif_backer's "ct_tps" + * cmap. */ + struct ovs_list list_node; /* Element in struct dpif_backer's + * "ct_tp_kill_list" list. */ +}; + +struct ct_zone { + uint16_t zone_id; + struct ct_timeout_policy *ct_tp; + struct cmap_node node; /* Element in struct dpif_backer's + * "ct_zones" cmap. */ +}; + static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *, ofp_port_t); @@ -196,6 +215,9 @@ static struct hmap all_ofproto_dpifs_by_uuid = static bool ofproto_use_tnl_push_pop = true; static void ofproto_unixctl_init(void); +static void ct_zone_config_init(struct dpif_backer *backer); +static void ct_zone_config_uninit(struct dpif_backer *backer); +static void ct_zone_timeout_policy_sweep(struct dpif_backer *backer); static inline struct ofproto_dpif * ofproto_dpif_cast(const struct ofproto *ofproto) @@ -488,6 +510,7 @@ type_run(const char *type) } process_dpif_port_changes(backer); + ct_zone_timeout_policy_sweep(backer); return 0; } @@ -683,6 +706,7 @@ close_dpif_backer(struct dpif_backer *backer, bool del) } dpif_close(backer->dpif); id_pool_destroy(backer->meter_ids); + ct_zone_config_uninit(backer); free(backer); } @@ -694,6 +718,8 @@ struct odp_garbage { static void check_support(struct dpif_backer *backer); +#define MAX_TIMEOUT_POLICY_ID UINT32_MAX + static int open_dpif_backer(const char *type, struct dpif_backer **backerp) { @@ -811,6 +837,8 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) backer->meter_ids = NULL; } + ct_zone_config_init(backer); + /* Make a pristine snapshot of 'support' into 'boottime_support'. * 'boottime_support' can be checked to prevent 'support' to be changed * beyond the datapath capabilities. In case 'support' is changed by @@ -5086,6 +5114,269 @@ ct_flush(const struct ofproto *ofproto_, const uint16_t *zone) ct_dpif_flush(ofproto->backer->dpif, zone, NULL); } +static struct ct_timeout_policy * +ct_timeout_policy_lookup(const struct hmap *ct_tps, struct simap *tp) +{ + struct ct_timeout_policy *ct_tp; + + HMAP_FOR_EACH_WITH_HASH (ct_tp, node, simap_hash(tp), ct_tps) { + if (simap_equal(&ct_tp->tp, tp)) { + return ct_tp; + } + } + return NULL; +} + +static struct ct_timeout_policy * +ct_timeout_policy_alloc__(void) +{ + struct ct_timeout_policy *ct_tp = xzalloc(sizeof *ct_tp); + simap_init(&ct_tp->tp); + return ct_tp; +} + +static struct ct_timeout_policy * +ct_timeout_policy_alloc(struct simap *tp, struct id_pool *tp_ids) +{ + struct simap_node *node; + + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); + SIMAP_FOR_EACH (node, tp) { + simap_put(&ct_tp->tp, node->name, node->data); + } + + if (!id_pool_alloc_id(tp_ids, &ct_tp->tp_id)) { + VLOG_ERR_RL(&rl, "failed to allocate timeout policy id."); + simap_destroy(&ct_tp->tp); + free(tp); + return NULL; + } + + return ct_tp; +} + +static void +ct_timeout_policy_destroy(struct ct_timeout_policy *ct_tp, + struct id_pool *tp_ids) +{ + id_pool_free_id(tp_ids, ct_tp->tp_id); + simap_destroy(&ct_tp->tp); + ovsrcu_postpone(free, ct_tp); +} + +static void +ct_timeout_policy_unref(struct dpif_backer *backer, + struct ct_timeout_policy *ct_tp) +{ + if (ct_tp) { + ct_tp->ref_count--; + + if (!ct_tp->ref_count) { + hmap_remove(&backer->ct_tps, &ct_tp->node); + ovs_list_push_back(&backer->ct_tp_kill_list, &ct_tp->list_node); + } + } +} + +static struct ct_zone * +ct_zone_lookup(const struct cmap *ct_zones, uint16_t zone) +{ + struct ct_zone *ct_zone; + + CMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) { + if (ct_zone->zone_id == zone) { + return ct_zone; + } + } + return NULL; +} + +static struct ct_zone * +ct_zone_alloc(uint16_t zone) +{ + struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone); + ct_zone->zone_id = zone; + return ct_zone; +} + +static void +ct_zone_destroy(struct ct_zone *ct_zone) +{ + ovsrcu_postpone(free, ct_zone); +} + +static void +ct_zone_remove_and_destroy(struct dpif_backer *backer, struct ct_zone *ct_zone) +{ + cmap_remove(&backer->ct_zones, &ct_zone->node, + hash_int(ct_zone->zone_id, 0)); + ct_zone_destroy(ct_zone); +} + +static void +ct_add_timeout_policy_to_dpif(struct dpif *dpif, + struct ct_timeout_policy *ct_tp) +{ + struct ct_dpif_timeout_policy cdtp; + struct simap_node *node; + + cdtp.id = ct_tp->tp_id; + SIMAP_FOR_EACH (node, &ct_tp->tp) { + ct_dpif_set_timeout_policy_attr_by_name(&cdtp, node->name, node->data); + } + + int err = ct_dpif_set_timeout_policy(dpif, &cdtp); + if (err) { + VLOG_ERR_RL(&rl, "failed to set timeout policy %"PRIu32" (%s)", + ct_tp->tp_id, ovs_strerror(err)); + } +} + +static void +clear_existing_ct_timeout_policies(struct dpif_backer *backer) +{ + /* In kernel datapath, when OVS starts, there may be some pre-existing + * timeout policies in the kernel. To avoid reassign the same timeout + * policy ids, we dump all the pre-existing timeout policies and keep + * the ids in the pool. Since OVS will not use those timeout policies + * for new datapath flow, we add them to the kill list and remove + * them later on. */ + void *state; + + int err = ct_dpif_timeout_policy_dump_start(backer->dpif, &state); + if (err) { + return; + } + + struct ct_dpif_timeout_policy cdtp; + while (!(err = ct_dpif_timeout_policy_dump_next(backer->dpif, state, + &cdtp))) { + struct ct_timeout_policy *ct_tp = ct_timeout_policy_alloc__(); + ct_tp->tp_id = cdtp.id; + id_pool_add(backer->tp_ids, cdtp.id); + ovs_list_insert(&backer->ct_tp_kill_list, &ct_tp->list_node); + } + + ct_dpif_timeout_policy_dump_done(backer->dpif, state); +} + +static void +ct_zone_config_init(struct dpif_backer *backer) +{ + cmap_init(&backer->ct_zones); + hmap_init(&backer->ct_tps); + backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID); + ovs_list_init(&backer->ct_tp_kill_list); + clear_existing_ct_timeout_policies(backer); +} + +static void +ct_zone_config_uninit(struct dpif_backer *backer) +{ + struct ct_timeout_policy *ct_tp; + struct ct_zone *ct_zone; + + CMAP_FOR_EACH (ct_zone, node, &backer->ct_zones) { + ct_zone_remove_and_destroy(backer, ct_zone); + } + + HMAP_FOR_EACH_POP (ct_tp, node, &backer->ct_tps) { + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); + } + + LIST_FOR_EACH_POP (ct_tp, list_node, &backer->ct_tp_kill_list) { + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); + } + + cmap_destroy(&backer->ct_zones); + hmap_destroy(&backer->ct_tps); + id_pool_destroy(backer->tp_ids); +} + +static void +ct_zone_timeout_policy_sweep(struct dpif_backer *backer) +{ + if (!ovs_list_is_empty(&backer->ct_tp_kill_list)) { + struct ct_timeout_policy *ct_tp, *next; + + LIST_FOR_EACH_SAFE (ct_tp, next, list_node, &backer->ct_tp_kill_list) { + int err = ct_dpif_del_timeout_policy(backer->dpif, ct_tp->tp_id); + if (!err) { + ovs_list_remove(&ct_tp->list_node); + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); + } else { + VLOG_INFO_RL(&rl, "failed to delete timeout policy id = " + "%"PRIu32" %s", ct_tp->tp_id, ovs_strerror(err)); + } + } + } +} + +static void +ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone, + struct simap *timeout_policy) +{ + struct dpif_backer *backer; + struct ct_timeout_policy *ct_tp; + struct ct_zone *ct_zone; + + backer = shash_find_data(&all_dpif_backers, datapath_type); + if (!backer) { + return; + } + + ct_tp = ct_timeout_policy_lookup(&backer->ct_tps, timeout_policy); + if (!ct_tp) { + ct_tp = ct_timeout_policy_alloc(timeout_policy, backer->tp_ids); + if (ct_tp) { + hmap_insert(&backer->ct_tps, &ct_tp->node, simap_hash(&ct_tp->tp)); + ct_add_timeout_policy_to_dpif(backer->dpif, ct_tp); + } else { + VLOG_ERR_RL(&rl, "failed to allocate timeout policy"); + return; + } + } + + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); + if (ct_zone) { + if (ct_zone->ct_tp != ct_tp) { + /* Add the new zone timeout pollicy. */ + struct ct_zone *new_ct_zone = ct_zone_alloc(zone); + new_ct_zone->ct_tp = ct_tp; + ct_tp->ref_count++; + cmap_replace(&backer->ct_zones, &ct_zone->node, &new_ct_zone->node, + hash_int(zone, 0)); + + /* Deletes the old zone timeout policy. */ + ct_timeout_policy_unref(backer, ct_zone->ct_tp); + ct_zone_destroy(ct_zone); + } + } else { + struct ct_zone *new_ct_zone = ct_zone_alloc(zone); + new_ct_zone->ct_tp = ct_tp; + cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone, 0)); + ct_tp->ref_count++; + } +} + +static void +ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) +{ + struct dpif_backer *backer; + struct ct_zone *ct_zone; + + backer = shash_find_data(&all_dpif_backers, datapath_type); + if (!backer) { + return; + } + + ct_zone = ct_zone_lookup(&backer->ct_zones, zone); + if (ct_zone) { + ct_timeout_policy_unref(backer, ct_zone->ct_tp); + ct_zone_remove_and_destroy(backer, ct_zone); + } +} + static bool set_frag_handling(struct ofproto *ofproto_, enum ofputil_frag_handling frag_handling) @@ -6189,4 +6480,6 @@ const struct ofproto_class ofproto_dpif_class = { get_datapath_version, /* get_datapath_version */ type_set_config, ct_flush, /* ct_flush */ + ct_set_zone_timeout_policy, + ct_del_zone_timeout_policy, }; diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index cd5321eb942c..0dd7a45fe550 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -245,6 +245,16 @@ struct dpif_backer { /* Meter. */ struct id_pool *meter_ids; /* Datapath meter allocation. */ + /* Connection tracking. */ + struct id_pool *tp_ids; /* Datapath timeout policy id + * allocation. */ + struct cmap ct_zones; /* "struct ct_zone"s indexed by zone + * id. */ + struct hmap ct_tps; /* "struct ct_timeout_policy"s indexed + * by timeout policy (struct simap). */ + struct ovs_list ct_tp_kill_list; /* A list of timeout policy to be + * deleted. */ + /* Version string of the datapath stored in OVSDB. */ char *dp_version_string; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 7907d4bfb416..54da71737b96 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -58,6 +58,7 @@ #include "tun-metadata.h" #include "versions.h" #include "vl-mff-map.h" +#include "vswitch-idl.h" struct match; struct ofputil_flow_mod; @@ -1872,6 +1873,15 @@ struct ofproto_class { /* Flushes the connection tracking tables. If 'zone' is not NULL, * only deletes connections in '*zone'. */ void (*ct_flush)(const struct ofproto *, const uint16_t *zone); + + /* Sets conntrack timeout policy specified by 'timeout_policy' to 'zone' + * in datapath type 'dp_type'. */ + void (*ct_set_zone_timeout_policy)(const char *dp_type, uint16_t zone, + struct simap *timeout_policy); + + /* Deletes the timeout policy associated with 'zone' in datapath type + * 'dp_type'. */ + void (*ct_del_zone_timeout_policy)(const char *dp_type, uint16_t zone); }; extern const struct ofproto_class ofproto_dpif_class; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 1d6fc00696f8..4bcb285c7457 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -935,6 +935,36 @@ ofproto_get_flow_restore_wait(void) return flow_restore_wait; } +/* Connection tracking configuration. */ +void +ofproto_ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone, + struct simap *timeout_policy) +{ + const struct ofproto_class *class; + + datapath_type = ofproto_normalize_type(datapath_type); + class = ofproto_class_find__(datapath_type); + + if (class->ct_set_zone_timeout_policy) { + class->ct_set_zone_timeout_policy(datapath_type, zone, + timeout_policy); + } +} + +void +ofproto_ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone) +{ + const struct ofproto_class *class; + + datapath_type = ofproto_normalize_type(datapath_type); + class = ofproto_class_find__(datapath_type); + + if (class->ct_del_zone_timeout_policy) { + class->ct_del_zone_timeout_policy(datapath_type, zone); + } + +} + /* Spanning Tree Protocol (STP) configuration. */ diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 6e4afffa17e0..acd8bdef78df 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -362,6 +362,11 @@ int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *); int ofproto_set_rstp(struct ofproto *, const struct ofproto_rstp_settings *); int ofproto_get_rstp_status(struct ofproto *, struct ofproto_rstp_status *); void ofproto_set_vlan_limit(int vlan_limit); +void ofproto_ct_set_zone_timeout_policy(const char *datapath_type, + uint16_t zone, + struct simap *timeout_policy); +void ofproto_ct_del_zone_timeout_policy(const char *datapath_type, + uint16_t zone); /* Configuration of ports. */ void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 2976771aeaba..c09343536dba 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -153,9 +153,35 @@ struct aa_mapping { char *br_name; }; +/* Internal representation of conntrak zone configuration table in OVSDB. */ +struct ct_zone { + uint16_t zone; + struct simap tp; /* A map from timeout policy attribute to + * timeout value. */ + unsigned int last_used; /* The last idl_seqno that this struct is used + * in OVSDB. This number is used for garbage + * collection. */ + struct hmap_node node; /* Element in struct datapath_cfgs's + * "ct_zone_timeout_policies" hmap. */ +}; + +/* Internal representation of datapath configuration table in OVSDB. */ +struct datapath { + char *type; /* Datapath type. */ + struct hmap ct_zones; /* "struct ct_zone"s indexed by zone id. */ + struct hmap_node node; /* In 'all_datapath_cfgs'. */ + const struct ovsrec_datapath *dp_cfg; + unsigned int last_used; /* The last idl_seqno that this struct is used + * in OVSDB. This number is used for garbage + * collection. */ +}; + /* All bridges, indexed by name. */ static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges); +/* All datapath configuartions, indexed by type. */ +static struct hmap all_datapaths = HMAP_INITIALIZER(&all_datapaths); + /* OVSDB IDL used to obtain configuration. */ static struct ovsdb_idl *idl; @@ -588,6 +614,181 @@ config_ofproto_types(const struct smap *other_config) } static void +get_timeout_policy_from_ovsrec(struct simap *tp, + const struct ovsrec_ct_timeout_policy *tp_cfg) +{ + for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { + simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); + } +} + +static struct ct_zone * +ct_zone_lookup(struct hmap *ct_zones, uint16_t zone) +{ + struct ct_zone *ct_zone; + + HMAP_FOR_EACH_WITH_HASH (ct_zone, node, hash_int(zone, 0), ct_zones) { + if (ct_zone->zone == zone) { + return ct_zone; + } + } + return NULL; +} + +static struct ct_zone * +ct_zone_alloc(uint16_t zone, struct ovsrec_ct_timeout_policy *tp_cfg) +{ + struct ct_zone *ct_zone = xzalloc(sizeof *ct_zone); + + ct_zone->zone = zone; + simap_init(&ct_zone->tp); + get_timeout_policy_from_ovsrec(&ct_zone->tp, tp_cfg); + return ct_zone; +} + +static void +ct_zone_remove_and_destroy(struct datapath *dp, struct ct_zone *ct_zone) +{ + hmap_remove(&dp->ct_zones, &ct_zone->node); + simap_destroy(&ct_zone->tp); + free(ct_zone); +} + +/* Replace 'old_tp' by 'new_tp' (destroyed 'new_tp'). Returns true if 'old_tp' + * and 'new_tp' contains different data, false if they are the same. */ +static bool +update_timeout_policy(struct simap *old_tp, struct simap *new_tp) +{ + bool changed = !simap_equal(old_tp, new_tp); + simap_swap(old_tp, new_tp); + simap_destroy(new_tp); + return changed; +} + +static struct datapath * +datapath_lookup(const char *type) +{ + struct datapath *dp; + + HMAP_FOR_EACH_WITH_HASH (dp, node, hash_string(type, 0), &all_datapaths) { + if (!strcmp(dp->type, type)) { + return dp; + } + } + return NULL; +} + +static struct datapath * +datapath_create(const struct ovsrec_datapath *dp_cfg, const char *type) +{ + struct datapath *dp; + + ovs_assert(!datapath_lookup(type)); + dp = xzalloc(sizeof *dp); + + dp->type = xstrdup(type); + dp->dp_cfg = dp_cfg; + + hmap_init(&dp->ct_zones); + hmap_insert(&all_datapaths, &dp->node, hash_string(type, 0)); + return dp; +} + +static void +datapath_destroy(struct datapath *dp) +{ + struct ct_zone *ct_zone; + + if (dp) { + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone); + ct_zone_remove_and_destroy(dp, ct_zone); + } + + hmap_remove(&all_datapaths, &dp->node); + hmap_destroy(&dp->ct_zones); + free(dp->type); + free(dp); + } +} + +static void +update_datapath_cfgs(const struct ovsrec_open_vswitch *cfg) +{ + struct datapath *dp; + size_t i; + + /* Add new datapath configs. */ + for (i = 0; i < cfg->n_datapaths; i++) { + const struct ovsrec_datapath *dp_cfg = cfg->value_datapaths[i]; + char *dp_name = cfg->key_datapaths[i]; + + dp = datapath_lookup(dp_name); + if (!dp) { + dp = datapath_create(dp_cfg, dp_name); + } + dp->last_used = idl_seqno; + } + + /* Get rid of deleted datapath configs. */ + HMAP_FOR_EACH (dp, node, &all_datapaths) { + if (dp->last_used != idl_seqno) { + datapath_destroy(dp); + } + } +} + +static void +reconfigure_ct_zones(struct datapath *dp) +{ + const struct ovsrec_datapath *dp_cfg = dp->dp_cfg; + struct ct_zone *ct_zone; + + /* Loop through all zones. Add or update configs. */ + for (size_t i = 0; i < dp_cfg->n_ct_zones; i++) { + uint16_t zone = dp_cfg->key_ct_zones[i]; + struct ovsrec_ct_zone *zone_cfg = dp_cfg->value_ct_zones[i]; + struct ovsrec_ct_timeout_policy *tp_cfg = zone_cfg->timeout_policy; + + ct_zone = ct_zone_lookup(&dp->ct_zones, zone); + if (ct_zone) { + struct simap new_tp = SIMAP_INITIALIZER(&new_tp); + get_timeout_policy_from_ovsrec(&new_tp, tp_cfg); + if (update_timeout_policy(&ct_zone->tp, &new_tp)) { + ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone, + &ct_zone->tp); + } + } else { + ct_zone = ct_zone_alloc(zone, tp_cfg); + hmap_insert(&dp->ct_zones, &ct_zone->node, hash_int(zone, 0)); + ofproto_ct_set_zone_timeout_policy(dp->type, ct_zone->zone, + &ct_zone->tp); + } + ct_zone->last_used = idl_seqno; + } + + /* Remove unused ct_zone configs. */ + HMAP_FOR_EACH (ct_zone, node, &dp->ct_zones) { + if (ct_zone->last_used != idl_seqno) { + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone); + ct_zone_remove_and_destroy(dp, ct_zone); + } + } +} + +static void +reconfigure_datapath_cfgs(const struct ovsrec_open_vswitch *cfg) +{ + struct datapath *dp; + + update_datapath_cfgs(cfg); + + HMAP_FOR_EACH (dp, node, &all_datapaths) { + reconfigure_ct_zones(dp); + } +} + +static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { struct sockaddr_in *managers; @@ -669,6 +870,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) } reconfigure_system_stats(ovs_cfg); + reconfigure_datapath_cfgs(ovs_cfg); /* Complete the configuration. */ sflow_bridge_number = 0;
This patch consumes the CT_Zone and CT_Timeout_Policy tables, maintains the zone-based configuration in the vswitchd. Whenever there is a database change, vswitchd will read the datapath, CT_Zone, and CT_Timeout_Policy tables from ovsdb, builds an internal snapshot of the database configuration in bridge.c, and pushes down the change into ofproto and dpif layer. If a new zone-based timeout policy is added, it updates the zone to timeout policy mapping in the per datapath type datapath structure in dpif-backer, and pushes down the timeout policy into the datapath via dpif interface. If a timeout policy is no longer used, for kernel datapath, vswitchd may not be able to remove it from datapath immediately since datapath flows can still reference the to-be-deleted timeout policies. Thus, we keep an timeout policy kill list, that vswitchd will go back to the list periodically and try to kill the unused timeout policies. Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> --- ofproto/ofproto-dpif.c | 293 +++++++++++++++++++++++++++++++++++++++++++++ ofproto/ofproto-dpif.h | 10 ++ ofproto/ofproto-provider.h | 10 ++ ofproto/ofproto.c | 30 +++++ ofproto/ofproto.h | 5 + vswitchd/bridge.c | 202 +++++++++++++++++++++++++++++++ 6 files changed, 550 insertions(+)