Message ID | 20200513133135.48474-2-xiangxia.m.yue@gmail.com |
---|---|
State | Changes Requested |
Headers | show |
Series | expand the meter table and fix bug | expand |
On 5/13/20 3:31 PM, xiangxia.m.yue@gmail.com wrote: > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > For now, ovs-vswitchd use the array of the dp_meter struct > to store meter's data, and at most, there are only 65536 > (defined by MAX_METERS) meters that can be used. But in some > case, for example, in the edge gateway, we should use 200,000, > at least, meters for IP address bandwidth limitation. > Every one IP address will use two meters for its rx and tx > path[1]. In other way, ovs-vswitchd should support meter-offload > (rte_mtr_xxx api introduced by dpdk.), but there are more than > 65536 meters in the hardware, such as Mellanox ConnectX-6. > > This patch use array to manage the meter, but it can ben expanded. > > [1]. > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1 > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0 > > Cc: Ilya Maximets <i.maximets@ovn.org> > Cc: William Tu <u9012063@gmail.com> > Cc: Jarno Rajahalme <jarno@ovn.org> > Cc: Ben Pfaff <blp@ovn.org> > Cc: Andy Zhou <azhou@ovn.org> > Cc: Pravin Shelar <pshelar@ovn.org> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > --- > v2: > * add comments for dp_meter_instance > * change the log > * remove extra newline > * I don't move the dp_netdev_meter_init/destroy up. because > them depends other meters function and put all meter function > together may make the codes clean. > --- Hi. Thanks for working on this! This is not a full review, just a few things that I spotted on a quick glance. I didn't review any thread safety/rcu aspects yet. Best regards, Ilya Maximets. > lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 250 insertions(+), 69 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index ef14e83b5f06..b5deaab31eb0 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) > > /* Configuration parameters. */ > enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ > -enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ > -enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ > -enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ > + > +/* Maximum number of meters in the table. */ > +#define METER_ENTRY_MAX (200000ULL) > +/* Maximum number of bands / meter. */ > +#define METER_BAND_MAX (8) > +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10) Why we need to change enums to defines and also rename them? > > COVERAGE_DEFINE(datapath_drop_meter); > COVERAGE_DEFINE(datapath_drop_upcall_error); > @@ -283,12 +286,26 @@ struct dp_meter { > uint16_t flags; > uint16_t n_bands; > uint32_t max_delta_t; > + uint32_t id; > + struct ovs_mutex lock; > uint64_t used; > uint64_t packet_count; > uint64_t byte_count; > struct dp_meter_band bands[]; > }; > > +struct dp_meter_instance { > + uint32_t n_meters; This should be called 'n_allocated' or smething like this. 'n_meters' makes me think that it's the number of actually used meters. > + /* Followed by struct dp_meter[n]; where n is the n_meters. */ > + OVSRCU_TYPE(struct dp_meter *) dp_meters[]; > +}; > + > +struct dp_meter_table { > + OVSRCU_TYPE(struct dp_meter_instance *) ti; What does 'ti' mean? I looked throught the code it always stands for meter instance, but how 'meter instance' relates to 'ti'? That is confusing. > + uint32_t count; Why count is part of 'dp_meter_table'? I think it should be part of 'dp_meter_instance' and named something like 'n_used', or actually 'n_meters'. > + struct ovs_mutex lock; > +}; Why we need this structure at all? Can it be just 3 fields inside struct dp_netdev? Why it is table? It's not a table. 'instance' is a table. Confusing. > + > struct pmd_auto_lb { > bool auto_lb_requested; /* Auto load balancing requested by user. */ > bool is_enabled; /* Current status of Auto load balancing. */ > @@ -329,8 +346,7 @@ struct dp_netdev { > atomic_uint32_t tx_flush_interval; > > /* Meters. */ > - struct ovs_mutex meter_locks[N_METER_LOCKS]; > - struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > + struct dp_meter_table meter_tbl; > > /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/ > OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min; > @@ -378,19 +394,6 @@ struct dp_netdev { > struct pmd_auto_lb pmd_alb; > }; > > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id) > - OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > -} > - > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id) > - OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > -} > - > - > static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, > odp_port_t) > OVS_REQUIRES(dp->port_mutex); > @@ -1523,6 +1526,9 @@ choose_port(struct dp_netdev *dp, const char *name) > return ODPP_NONE; > } > > +static void dp_netdev_meter_init(struct dp_meter_table *tbl); > +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl); These functions should be named dp_netdev_meter_table_{init,destroy}. > + > static int > create_dp_netdev(const char *name, const struct dpif_class *class, > struct dp_netdev **dpp) > @@ -1556,9 +1562,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class, > dp->reconfigure_seq = seq_create(); > dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq); > > - for (int i = 0; i < N_METER_LOCKS; ++i) { > - ovs_mutex_init_adaptive(&dp->meter_locks[i]); > - } > + dp_netdev_meter_init(&dp->meter_tbl); > > /* Disable upcalls by default. */ > dp_netdev_disable_upcall(dp); > @@ -1647,16 +1651,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp) > fat_rwlock_destroy(&dp->upcall_rwlock); > } > > -static void > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id) > - OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - if (dp->meters[meter_id]) { > - free(dp->meters[meter_id]); > - dp->meters[meter_id] = NULL; > - } > -} > - > /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' > * through the 'dp_netdevs' shash while freeing 'dp'. */ > static void > @@ -1694,16 +1688,7 @@ dp_netdev_free(struct dp_netdev *dp) > /* Upcalls must be disabled at this point */ > dp_netdev_destroy_upcall_lock(dp); > > - int i; > - > - for (i = 0; i < MAX_METERS; ++i) { > - meter_lock(dp, i); > - dp_delete_meter(dp, i); > - meter_unlock(dp, i); > - } > - for (i = 0; i < N_METER_LOCKS; ++i) { > - ovs_mutex_destroy(&dp->meter_locks[i]); > - } > + dp_netdev_meter_destroy(&dp->meter_tbl); > > free(dp->pmd_cmask); > free(CONST_CAST(char *, dp->name)); > @@ -5713,14 +5698,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp) > > > /* Meters */ > +static uint32_t > +meter_hash(struct dp_meter_instance *ti, uint32_t id) > +{ > + uint32_t n_meters = ti->n_meters; > + > + return id % n_meters; > +} Why we need a hash here in this implementation? Below code will be broken if meter_hash will hash different ids to the same hash value. There should be no hash or there should be good collision protection. > + > +static void > +dp_meter_free(struct dp_meter *meter) > +{ > + ovs_mutex_destroy(&meter->lock); > + free(meter); > +} > + > +static struct dp_meter_instance * > +dp_meter_instance_alloc(const uint32_t size) > +{ > + struct dp_meter_instance *ti; > + > + ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size); Don't parenthesize argument of sizeof if it's a variable. > + ti->n_meters = size; > + > + return ti; > +} > + > +static void > +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size) > +{ > + struct dp_meter_instance *new_ti; > + struct dp_meter_instance *ti; > + int n_meters; > + int i; > + > + new_ti = dp_meter_instance_alloc(size); > + > + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); > + n_meters = MIN(size, ti->n_meters); > + > + for (i = 0; i < n_meters; i++) { > + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { > + new_ti->dp_meters[i] = ti->dp_meters[i]; > + } > + } > + > + ovsrcu_set(&tbl->ti, new_ti); > + ovsrcu_postpone(free, ti); > +} > + > +static void > +dp_meter_instance_insert(struct dp_meter_instance *ti, 'dp_meter_instance_insert' sounds like we're going to create a new dp_meter_instance and insert it to dp_meter_table, but it's not the case. > + struct dp_meter *meter) > +{ > + uint32_t hash; > + > + hash = meter_hash(ti, meter->id); > + ovsrcu_set(&ti->dp_meters[hash], meter); > +} > + > +static void > +dp_meter_instance_remove(struct dp_meter_instance *ti, > + struct dp_meter *meter) > +{ > + uint32_t hash; > + > + hash = meter_hash(ti, meter->id); > + ovsrcu_set(&ti->dp_meters[hash], NULL); > +} > + > +static void > +dp_netdev_meter_init(struct dp_meter_table *tbl) > +{ > + struct dp_meter_instance *ti; > + > + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); > + ovsrcu_set(&tbl->ti, ti); > + > + ovs_mutex_init(&tbl->lock); > + tbl->count = 0; > +} > + > +static void > +dp_netdev_meter_destroy(struct dp_meter_table *tbl) 'dp_netdev_meter_destroy' sounds like we're going to destroy a single meter. > +{ > + struct dp_meter_instance *ti; > + int i; > + > + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); > + for (i = 0; i < ti->n_meters; i++) { > + struct dp_meter *meter; > + > + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]); > + if (meter) { > + ovsrcu_postpone(dp_meter_free, meter); > + } > + } > + > + ovsrcu_postpone(free, ti); > + ovs_mutex_destroy(&tbl->lock); > +} > + > +static struct dp_meter * > +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id) > +{ > + struct dp_meter_instance *ti; > + struct dp_meter *meter; > + uint32_t hash; > + > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); After gitting rcu protected pointer, you have to check if it's a valid pointer. > + hash = meter_hash(ti, meter_id); > + > + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]); > + if (meter && meter->id == meter_id) { > + return meter; > + } > + > + return NULL; > +} > + > +static void > +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id) > + OVS_REQUIRES(meter_tbl->lock) Please, keep thread safety annotations indented with 4 spaces from the left. > +{ > + struct dp_meter_instance *ti; > + struct dp_meter *meter; > + > + meter = dp_meter_lookup(meter_tbl, meter_id); > + if (!meter) { > + return; > + } > + > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); > + dp_meter_instance_remove(ti, meter); > + ovsrcu_postpone(dp_meter_free, meter); > + > + meter_tbl->count--; > + /* Shrink the meter array if necessary. */ > + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && > + meter_tbl->count <= (ti->n_meters / 4)) { > + int half_size = ti->n_meters / 2; > + int i; > + > + /* Avoid hash collision, don't move slots to other place. > + * Make sure there are no references of meters in array > + * which will be released. > + */ > + for (i = half_size; i < ti->n_meters; i++) { > + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { > + return; > + } > + } > + > + dp_meter_instance_realloc(meter_tbl, half_size); > + } > +} > + > +static int > +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter) > + OVS_REQUIRES(meter_tbl->lock) ditto. > +{ > + struct dp_meter_instance *ti; > + uint32_t hash; > + > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); > + hash = meter_hash(ti, meter->id); > + > + if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *, > + &ti->dp_meters[hash]))) { > + VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n", > + meter->id, hash, ti->n_meters); > + return EBUSY; How this could happen if you're always calling _detach_free before calling attach? > + } > + > + dp_meter_instance_insert(ti, meter); > + > + meter_tbl->count++; > + if (meter_tbl->count >= ti->n_meters) { > + dp_meter_instance_realloc(meter_tbl, ti->n_meters * 2); > + } > + > + return 0; > +} > + > static void > dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, > struct ofputil_meter_features *features) > { > - features->max_meters = MAX_METERS; > + features->max_meters = METER_ENTRY_MAX; > features->band_types = DP_SUPPORTED_METER_BAND_TYPES; > features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK; > - features->max_bands = MAX_BANDS; > + features->max_bands = METER_BAND_MAX; > features->max_color = 0; > } > > @@ -5742,14 +5910,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > uint32_t exceeded_rate[NETDEV_MAX_BURST]; > int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */ > > - if (meter_id >= MAX_METERS) { > + if (meter_id >= METER_ENTRY_MAX) { > return; > } > > - meter_lock(dp, meter_id); > - meter = dp->meters[meter_id]; > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > if (!meter) { > - goto out; > + return; > } > > /* Initialize as negative values. */ > @@ -5757,6 +5924,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > /* Initialize as zeroes. */ > memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate); > > + ovs_mutex_lock(&meter->lock); > /* All packets will hit the meter at the same time. */ > long_delta_t = now / 1000 - meter->used / 1000; /* msec */ > > @@ -5874,8 +6042,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > dp_packet_batch_refill(packets_, packet, j); > } > } > - out: > - meter_unlock(dp, meter_id); > + > + ovs_mutex_unlock(&meter->lock); > } > > /* Meter set/get/del processing is still single-threaded. */ > @@ -5884,11 +6052,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > struct ofputil_meter_config *config) > { > struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > uint32_t mid = meter_id.uint32; > struct dp_meter *meter; > - int i; > + int err, i; > > - if (mid >= MAX_METERS) { > + if (mid >= METER_ENTRY_MAX) { > return EFBIG; /* Meter_id out of range. */ > } > > @@ -5896,7 +6065,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > return EBADF; /* Unsupported flags set */ > } > > - if (config->n_bands > MAX_BANDS) { > + if (config->n_bands > METER_BAND_MAX) { > return EINVAL; > } > > @@ -5917,6 +6086,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > meter->n_bands = config->n_bands; > meter->max_delta_t = 0; > meter->used = time_usec(); > + meter->id = mid; > + ovs_mutex_init(&meter->lock); > > /* set up bands */ > for (i = 0; i < config->n_bands; ++i) { > @@ -5942,12 +6113,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > } > } > > - meter_lock(dp, mid); > - dp_delete_meter(dp, mid); /* Free existing meter, if any */ > - dp->meters[mid] = meter; > - meter_unlock(dp, mid); > + ovs_mutex_lock(&meter_tbl->lock); > + > + dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */ This doesn't look correct. Why should we destroy some other meter to create this new one? > + err = dp_meter_attach(meter_tbl, meter); > + if (err) { > + goto unlock_out; > + } > + > + ovs_mutex_unlock(&meter_tbl->lock); > > return 0; > + > +unlock_out: > + ovs_mutex_unlock(&meter_tbl->lock); > + dp_meter_free(meter); > + return err; > } > > static int > @@ -5955,23 +6136,23 @@ dpif_netdev_meter_get(const struct dpif *dpif, > ofproto_meter_id meter_id_, > struct ofputil_meter_stats *stats, uint16_t n_bands) > { > - const struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_netdev *dp = get_dp_netdev(dpif); > uint32_t meter_id = meter_id_.uint32; > - int retval = 0; > + const struct dp_meter *meter; > > - if (meter_id >= MAX_METERS) { > + if (meter_id >= METER_ENTRY_MAX) { > return EFBIG; > } > > - meter_lock(dp, meter_id); > - const struct dp_meter *meter = dp->meters[meter_id]; > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > if (!meter) { > - retval = ENOENT; > - goto done; > + return ENOENT; > } > + > if (stats) { > int i = 0; > > + ovs_mutex_lock(&meter->lock); > stats->packet_in_count = meter->packet_count; > stats->byte_in_count = meter->byte_count; > > @@ -5979,13 +6160,12 @@ dpif_netdev_meter_get(const struct dpif *dpif, > stats->bands[i].packet_count = meter->bands[i].packet_count; > stats->bands[i].byte_count = meter->bands[i].byte_count; > } > + ovs_mutex_unlock(&meter->lock); > > stats->n_bands = i; > } > > -done: > - meter_unlock(dp, meter_id); > - return retval; > + return 0; > } > > static int > @@ -5994,15 +6174,16 @@ dpif_netdev_meter_del(struct dpif *dpif, > struct ofputil_meter_stats *stats, uint16_t n_bands) > { > struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > int error; > > error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands); > if (!error) { > uint32_t meter_id = meter_id_.uint32; > > - meter_lock(dp, meter_id); > - dp_delete_meter(dp, meter_id); > - meter_unlock(dp, meter_id); > + ovs_mutex_lock(&meter_tbl->lock); > + dp_meter_detach_free(meter_tbl, meter_id); > + ovs_mutex_unlock(&meter_tbl->lock); > } > return error; > } >
On Sat, May 16, 2020 at 1:09 AM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 5/13/20 3:31 PM, xiangxia.m.yue@gmail.com wrote: > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > > > For now, ovs-vswitchd use the array of the dp_meter struct > > to store meter's data, and at most, there are only 65536 > > (defined by MAX_METERS) meters that can be used. But in some > > case, for example, in the edge gateway, we should use 200,000, > > at least, meters for IP address bandwidth limitation. > > Every one IP address will use two meters for its rx and tx > > path[1]. In other way, ovs-vswitchd should support meter-offload > > (rte_mtr_xxx api introduced by dpdk.), but there are more than > > 65536 meters in the hardware, such as Mellanox ConnectX-6. > > > > This patch use array to manage the meter, but it can ben expanded. > > > > [1]. > > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1 > > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0 > > > > Cc: Ilya Maximets <i.maximets@ovn.org> > > Cc: William Tu <u9012063@gmail.com> > > Cc: Jarno Rajahalme <jarno@ovn.org> > > Cc: Ben Pfaff <blp@ovn.org> > > Cc: Andy Zhou <azhou@ovn.org> > > Cc: Pravin Shelar <pshelar@ovn.org> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > --- > > v2: > > * add comments for dp_meter_instance > > * change the log > > * remove extra newline > > * I don't move the dp_netdev_meter_init/destroy up. because > > them depends other meters function and put all meter function > > together may make the codes clean. > > --- > > Hi. Thanks for working on this! > > This is not a full review, just a few things that I spotted on a quick glance. > I didn't review any thread safety/rcu aspects yet. > > Best regards, Ilya Maximets. > > > > lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++---------- > > 1 file changed, 250 insertions(+), 69 deletions(-) > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index ef14e83b5f06..b5deaab31eb0 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) > > > > /* Configuration parameters. */ > > enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ > > -enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ > > -enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ > > -enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ > > + > > +/* Maximum number of meters in the table. */ > > +#define METER_ENTRY_MAX (200000ULL) > > +/* Maximum number of bands / meter. */ > > +#define METER_BAND_MAX (8) > > +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10) > > Why we need to change enums to defines and also rename them? Hi, thanks for reviews. 1. I send a patch which will remove the MAX_FLOWS, but may be discussed again: http://patchwork.ozlabs.org/project/openvswitch/patch/1584309363-15048-1-git-send-email-xiangxia.m.yue@gmail.com/ 2. I introduce the DP_METER_ARRAY_SIZE_MIN which use the #define. so I change the enums to defines, and make the name more readable. > > > > COVERAGE_DEFINE(datapath_drop_meter); > > COVERAGE_DEFINE(datapath_drop_upcall_error); > > @@ -283,12 +286,26 @@ struct dp_meter { > > uint16_t flags; > > uint16_t n_bands; > > uint32_t max_delta_t; > > + uint32_t id; > > + struct ovs_mutex lock; > > uint64_t used; > > uint64_t packet_count; > > uint64_t byte_count; > > struct dp_meter_band bands[]; > > }; > > > > +struct dp_meter_instance { > > + uint32_t n_meters; > > This should be called 'n_allocated' or smething like this. > 'n_meters' makes me think that it's the number of actually used meters. OK > > + /* Followed by struct dp_meter[n]; where n is the n_meters. */ > > + OVSRCU_TYPE(struct dp_meter *) dp_meters[]; > > +}; > > + > > +struct dp_meter_table { > > + OVSRCU_TYPE(struct dp_meter_instance *) ti; > > What does 'ti' mean? I looked throught the code it always stands for meter instance, Yes > but how 'meter instance' relates to 'ti'? That is confusing. Change "ti" to "meter_instance" is better. > > + uint32_t count; > > Why count is part of 'dp_meter_table'? I think it should be part of 'dp_meter_instance' when we expand the table, just relloc the instance, instance is only a array. > and named something like 'n_used', or actually 'n_meters'. yes, we also can store it to instance. > > + struct ovs_mutex lock; > > +}; > > Why we need this structure at all? Can it be just 3 fields inside struct dp_netdev? I try to make the meter a separate module like kernel. and make the code readable. > Why it is table? It's not a table. 'instance' is a table. Confusing. introduce the instance is for easily expanding and shrinking the table. > > + > > struct pmd_auto_lb { > > bool auto_lb_requested; /* Auto load balancing requested by user. */ > > bool is_enabled; /* Current status of Auto load balancing. */ > > @@ -329,8 +346,7 @@ struct dp_netdev { > > atomic_uint32_t tx_flush_interval; > > > > /* Meters. */ > > - struct ovs_mutex meter_locks[N_METER_LOCKS]; > > - struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > > + struct dp_meter_table meter_tbl; > > > > /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/ > > OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min; > > @@ -378,19 +394,6 @@ struct dp_netdev { > > struct pmd_auto_lb pmd_alb; > > }; > > > > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id) > > - OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > > -{ > > - ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > > -} > > - > > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id) > > - OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS]) > > -{ > > - ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > > -} > > - > > - > > static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, > > odp_port_t) > > OVS_REQUIRES(dp->port_mutex); > > @@ -1523,6 +1526,9 @@ choose_port(struct dp_netdev *dp, const char *name) > > return ODPP_NONE; > > } > > > > +static void dp_netdev_meter_init(struct dp_meter_table *tbl); > > +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl); > > These functions should be named dp_netdev_meter_table_{init,destroy}. Ok > > + > > static int > > create_dp_netdev(const char *name, const struct dpif_class *class, > > struct dp_netdev **dpp) > > @@ -1556,9 +1562,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class, > > dp->reconfigure_seq = seq_create(); > > dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq); > > > > - for (int i = 0; i < N_METER_LOCKS; ++i) { > > - ovs_mutex_init_adaptive(&dp->meter_locks[i]); > > - } > > + dp_netdev_meter_init(&dp->meter_tbl); > > > > /* Disable upcalls by default. */ > > dp_netdev_disable_upcall(dp); > > @@ -1647,16 +1651,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp) > > fat_rwlock_destroy(&dp->upcall_rwlock); > > } > > > > -static void > > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id) > > - OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > > -{ > > - if (dp->meters[meter_id]) { > > - free(dp->meters[meter_id]); > > - dp->meters[meter_id] = NULL; > > - } > > -} > > - > > /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' > > * through the 'dp_netdevs' shash while freeing 'dp'. */ > > static void > > @@ -1694,16 +1688,7 @@ dp_netdev_free(struct dp_netdev *dp) > > /* Upcalls must be disabled at this point */ > > dp_netdev_destroy_upcall_lock(dp); > > > > - int i; > > - > > - for (i = 0; i < MAX_METERS; ++i) { > > - meter_lock(dp, i); > > - dp_delete_meter(dp, i); > > - meter_unlock(dp, i); > > - } > > - for (i = 0; i < N_METER_LOCKS; ++i) { > > - ovs_mutex_destroy(&dp->meter_locks[i]); > > - } > > + dp_netdev_meter_destroy(&dp->meter_tbl); > > > > free(dp->pmd_cmask); > > free(CONST_CAST(char *, dp->name)); > > @@ -5713,14 +5698,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp) > > > > > > /* Meters */ > > +static uint32_t > > +meter_hash(struct dp_meter_instance *ti, uint32_t id) > > +{ > > + uint32_t n_meters = ti->n_meters; > > + > > + return id % n_meters; > > +} > > Why we need a hash here in this implementation? > Below code will be broken if meter_hash will hash different ids to the I guess that may avoid a bug which generate a meter id is larger than "ti->n_meters", and then access the memory which not allocated. That does not break the codes. because "id % n_meters" always is id. because meter id is generated by id pool. > same hash value. There should be no hash or there should be good collision > protection. Yes, a little confused. > > + > > +static void > > +dp_meter_free(struct dp_meter *meter) > > +{ > > + ovs_mutex_destroy(&meter->lock); > > + free(meter); > > +} > > + > > +static struct dp_meter_instance * > > +dp_meter_instance_alloc(const uint32_t size) > > +{ > > + struct dp_meter_instance *ti; > > + > > + ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size); > > Don't parenthesize argument of sizeof if it's a variable. Ok > > + ti->n_meters = size; > > + > > + return ti; > > +} > > + > > +static void > > +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size) > > +{ > > + struct dp_meter_instance *new_ti; > > + struct dp_meter_instance *ti; > > + int n_meters; > > + int i; > > + > > + new_ti = dp_meter_instance_alloc(size); > > + > > + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); > > + n_meters = MIN(size, ti->n_meters); > > + > > + for (i = 0; i < n_meters; i++) { > > + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { > > + new_ti->dp_meters[i] = ti->dp_meters[i]; > > + } > > + } > > + > > + ovsrcu_set(&tbl->ti, new_ti); > > + ovsrcu_postpone(free, ti); > > +} > > + > > +static void > > +dp_meter_instance_insert(struct dp_meter_instance *ti, > > 'dp_meter_instance_insert' sounds like we're going to create a new > dp_meter_instance and insert it to dp_meter_table, but it's not the > case. In this patch, dp_meter_instance_alloc/relloc will create the meter instance. and dp_meter_instance_insert/remove will insert or remove the meter. I guess we introduce the meter instance in table, will make the expand/shrink the table easy: realloc a instance and switch it protected by rcu lock. and I don't introduce instance to netdev datapath, because I think meter should be a separate mode, like it implemented in kernel datapath. > > + struct dp_meter *meter) > > +{ > > + uint32_t hash; > > + > > + hash = meter_hash(ti, meter->id); > > + ovsrcu_set(&ti->dp_meters[hash], meter); > > +} > > + > > +static void > > +dp_meter_instance_remove(struct dp_meter_instance *ti, > > + struct dp_meter *meter) > > +{ > > + uint32_t hash; > > + > > + hash = meter_hash(ti, meter->id); > > + ovsrcu_set(&ti->dp_meters[hash], NULL); > > +} > > + > > +static void > > +dp_netdev_meter_init(struct dp_meter_table *tbl) > > +{ > > + struct dp_meter_instance *ti; > > + > > + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); > > + ovsrcu_set(&tbl->ti, ti); > > + > > + ovs_mutex_init(&tbl->lock); > > + tbl->count = 0; > > +} > > + > > +static void > > +dp_netdev_meter_destroy(struct dp_meter_table *tbl) > > 'dp_netdev_meter_destroy' sounds like we're going to destroy a single meter. will be updated in v3 > > +{ > > + struct dp_meter_instance *ti; > > + int i; > > + > > + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); > > + for (i = 0; i < ti->n_meters; i++) { > > + struct dp_meter *meter; > > + > > + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]); > > + if (meter) { > > + ovsrcu_postpone(dp_meter_free, meter); > > + } > > + } > > + > > + ovsrcu_postpone(free, ti); > > + ovs_mutex_destroy(&tbl->lock); > > +} > > + > > +static struct dp_meter * > > +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id) > > +{ > > + struct dp_meter_instance *ti; > > + struct dp_meter *meter; > > + uint32_t hash; > > + > > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); > > After gitting rcu protected pointer, you have to check if it's a valid pointer. I guess meter_tbl->ti is set protected by rcu. there shoud not a NULL. but we can check it again. > > + hash = meter_hash(ti, meter_id); > > + > > + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]); > > + if (meter && meter->id == meter_id) { > > + return meter; > > + } > > + > > + return NULL; > > +} > > + > > +static void > > +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id) > > + OVS_REQUIRES(meter_tbl->lock) > > Please, keep thread safety annotations indented with 4 spaces from the left. Ok > > +{ > > + struct dp_meter_instance *ti; > > + struct dp_meter *meter; > > + > > + meter = dp_meter_lookup(meter_tbl, meter_id); > > + if (!meter) { > > + return; > > + } > > + > > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); > > + dp_meter_instance_remove(ti, meter); > > + ovsrcu_postpone(dp_meter_free, meter); > > + > > + meter_tbl->count--; > > + /* Shrink the meter array if necessary. */ > > + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && > > + meter_tbl->count <= (ti->n_meters / 4)) { > > + int half_size = ti->n_meters / 2; > > + int i; > > + > > + /* Avoid hash collision, don't move slots to other place. > > + * Make sure there are no references of meters in array > > + * which will be released. > > + */ > > + for (i = half_size; i < ti->n_meters; i++) { > > + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { > > + return; > > + } > > + } > > + > > + dp_meter_instance_realloc(meter_tbl, half_size); > > + } > > +} > > + > > +static int > > +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter) > > + OVS_REQUIRES(meter_tbl->lock) > > ditto. > > > +{ > > + struct dp_meter_instance *ti; > > + uint32_t hash; > > + > > + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); > > + hash = meter_hash(ti, meter->id); > > + > > + if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *, > > + &ti->dp_meters[hash]))) { > > + VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n", > > + meter->id, hash, ti->n_meters); > > + return EBUSY; > > How this could happen if you're always calling _detach_free before calling attach? I guess what you mean is that: 1. mod_meter comand 2. del the meter while adding the meter * that code is checking the whether the slot is used, if used, there may be a bug. but we log the info: meter id, and slot, and return EBUSY. * calling the _deatch_free is for mod the meter case, in that case we should delete it and then attach a new one. * when we add/del/mod the meter, we will get the lock in the table. > > + } > > + > > + dp_meter_instance_insert(ti, meter); > > + > > + meter_tbl->count++; > > + if (meter_tbl->count >= ti->n_meters) { > > + dp_meter_instance_realloc(meter_tbl, ti->n_meters * 2); > > + } > > + > > + return 0; > > +} > > + > > static void > > dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, > > struct ofputil_meter_features *features) > > { > > - features->max_meters = MAX_METERS; > > + features->max_meters = METER_ENTRY_MAX; > > features->band_types = DP_SUPPORTED_METER_BAND_TYPES; > > features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK; > > - features->max_bands = MAX_BANDS; > > + features->max_bands = METER_BAND_MAX; > > features->max_color = 0; > > } > > > > @@ -5742,14 +5910,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > > uint32_t exceeded_rate[NETDEV_MAX_BURST]; > > int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */ > > > > - if (meter_id >= MAX_METERS) { > > + if (meter_id >= METER_ENTRY_MAX) { > > return; > > } > > > > - meter_lock(dp, meter_id); > > - meter = dp->meters[meter_id]; > > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > > if (!meter) { > > - goto out; > > + return; > > } > > > > /* Initialize as negative values. */ > > @@ -5757,6 +5924,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > > /* Initialize as zeroes. */ > > memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate); > > > > + ovs_mutex_lock(&meter->lock); > > /* All packets will hit the meter at the same time. */ > > long_delta_t = now / 1000 - meter->used / 1000; /* msec */ > > > > @@ -5874,8 +6042,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, > > dp_packet_batch_refill(packets_, packet, j); > > } > > } > > - out: > > - meter_unlock(dp, meter_id); > > + > > + ovs_mutex_unlock(&meter->lock); > > } > > > > /* Meter set/get/del processing is still single-threaded. */ > > @@ -5884,11 +6052,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > > struct ofputil_meter_config *config) > > { > > struct dp_netdev *dp = get_dp_netdev(dpif); > > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > > uint32_t mid = meter_id.uint32; > > struct dp_meter *meter; > > - int i; > > + int err, i; > > > > - if (mid >= MAX_METERS) { > > + if (mid >= METER_ENTRY_MAX) { > > return EFBIG; /* Meter_id out of range. */ > > } > > > > @@ -5896,7 +6065,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > > return EBADF; /* Unsupported flags set */ > > } > > > > - if (config->n_bands > MAX_BANDS) { > > + if (config->n_bands > METER_BAND_MAX) { > > return EINVAL; > > } > > > > @@ -5917,6 +6086,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > > meter->n_bands = config->n_bands; > > meter->max_delta_t = 0; > > meter->used = time_usec(); > > + meter->id = mid; > > + ovs_mutex_init(&meter->lock); > > > > /* set up bands */ > > for (i = 0; i < config->n_bands; ++i) { > > @@ -5942,12 +6113,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, > > } > > } > > > > - meter_lock(dp, mid); > > - dp_delete_meter(dp, mid); /* Free existing meter, if any */ > > - dp->meters[mid] = meter; > > - meter_unlock(dp, mid); > > + ovs_mutex_lock(&meter_tbl->lock); > > + > > + dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */ > > This doesn't look correct. Why should we destroy some other meter to create > this new one? Think about modifying the meter case, do it as original logic. and this patch does not change the logic. > > + err = dp_meter_attach(meter_tbl, meter); > > + if (err) { > > + goto unlock_out; > > + } > > + > > + ovs_mutex_unlock(&meter_tbl->lock); > > > > return 0; > > + > > +unlock_out: > > + ovs_mutex_unlock(&meter_tbl->lock); > > + dp_meter_free(meter); > > + return err; > > } > > > > static int > > @@ -5955,23 +6136,23 @@ dpif_netdev_meter_get(const struct dpif *dpif, > > ofproto_meter_id meter_id_, > > struct ofputil_meter_stats *stats, uint16_t n_bands) > > { > > - const struct dp_netdev *dp = get_dp_netdev(dpif); > > + struct dp_netdev *dp = get_dp_netdev(dpif); > > uint32_t meter_id = meter_id_.uint32; > > - int retval = 0; > > + const struct dp_meter *meter; > > > > - if (meter_id >= MAX_METERS) { > > + if (meter_id >= METER_ENTRY_MAX) { > > return EFBIG; > > } > > > > - meter_lock(dp, meter_id); > > - const struct dp_meter *meter = dp->meters[meter_id]; > > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > > if (!meter) { > > - retval = ENOENT; > > - goto done; > > + return ENOENT; > > } > > + > > if (stats) { > > int i = 0; > > > > + ovs_mutex_lock(&meter->lock); > > stats->packet_in_count = meter->packet_count; > > stats->byte_in_count = meter->byte_count; > > > > @@ -5979,13 +6160,12 @@ dpif_netdev_meter_get(const struct dpif *dpif, > > stats->bands[i].packet_count = meter->bands[i].packet_count; > > stats->bands[i].byte_count = meter->bands[i].byte_count; > > } > > + ovs_mutex_unlock(&meter->lock); > > > > stats->n_bands = i; > > } > > > > -done: > > - meter_unlock(dp, meter_id); > > - return retval; > > + return 0; > > } > > > > static int > > @@ -5994,15 +6174,16 @@ dpif_netdev_meter_del(struct dpif *dpif, > > struct ofputil_meter_stats *stats, uint16_t n_bands) > > { > > struct dp_netdev *dp = get_dp_netdev(dpif); > > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > > int error; > > > > error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands); > > if (!error) { > > uint32_t meter_id = meter_id_.uint32; > > > > - meter_lock(dp, meter_id); > > - dp_delete_meter(dp, meter_id); > > - meter_unlock(dp, meter_id); > > + ovs_mutex_lock(&meter_tbl->lock); > > + dp_meter_detach_free(meter_tbl, meter_id); > > + ovs_mutex_unlock(&meter_tbl->lock); > > } > > return error; > > } > > >
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ef14e83b5f06..b5deaab31eb0 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) /* Configuration parameters. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ -enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ -enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ -enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ + +/* Maximum number of meters in the table. */ +#define METER_ENTRY_MAX (200000ULL) +/* Maximum number of bands / meter. */ +#define METER_BAND_MAX (8) +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10) COVERAGE_DEFINE(datapath_drop_meter); COVERAGE_DEFINE(datapath_drop_upcall_error); @@ -283,12 +286,26 @@ struct dp_meter { uint16_t flags; uint16_t n_bands; uint32_t max_delta_t; + uint32_t id; + struct ovs_mutex lock; uint64_t used; uint64_t packet_count; uint64_t byte_count; struct dp_meter_band bands[]; }; +struct dp_meter_instance { + uint32_t n_meters; + /* Followed by struct dp_meter[n]; where n is the n_meters. */ + OVSRCU_TYPE(struct dp_meter *) dp_meters[]; +}; + +struct dp_meter_table { + OVSRCU_TYPE(struct dp_meter_instance *) ti; + uint32_t count; + struct ovs_mutex lock; +}; + struct pmd_auto_lb { bool auto_lb_requested; /* Auto load balancing requested by user. */ bool is_enabled; /* Current status of Auto load balancing. */ @@ -329,8 +346,7 @@ struct dp_netdev { atomic_uint32_t tx_flush_interval; /* Meters. */ - struct ovs_mutex meter_locks[N_METER_LOCKS]; - struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ + struct dp_meter_table meter_tbl; /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/ OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min; @@ -378,19 +394,6 @@ struct dp_netdev { struct pmd_auto_lb pmd_alb; }; -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id) - OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) -{ - ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]); -} - -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id) - OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS]) -{ - ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]); -} - - static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t) OVS_REQUIRES(dp->port_mutex); @@ -1523,6 +1526,9 @@ choose_port(struct dp_netdev *dp, const char *name) return ODPP_NONE; } +static void dp_netdev_meter_init(struct dp_meter_table *tbl); +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl); + static int create_dp_netdev(const char *name, const struct dpif_class *class, struct dp_netdev **dpp) @@ -1556,9 +1562,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class, dp->reconfigure_seq = seq_create(); dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq); - for (int i = 0; i < N_METER_LOCKS; ++i) { - ovs_mutex_init_adaptive(&dp->meter_locks[i]); - } + dp_netdev_meter_init(&dp->meter_tbl); /* Disable upcalls by default. */ dp_netdev_disable_upcall(dp); @@ -1647,16 +1651,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp) fat_rwlock_destroy(&dp->upcall_rwlock); } -static void -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id) - OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) -{ - if (dp->meters[meter_id]) { - free(dp->meters[meter_id]); - dp->meters[meter_id] = NULL; - } -} - /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' * through the 'dp_netdevs' shash while freeing 'dp'. */ static void @@ -1694,16 +1688,7 @@ dp_netdev_free(struct dp_netdev *dp) /* Upcalls must be disabled at this point */ dp_netdev_destroy_upcall_lock(dp); - int i; - - for (i = 0; i < MAX_METERS; ++i) { - meter_lock(dp, i); - dp_delete_meter(dp, i); - meter_unlock(dp, i); - } - for (i = 0; i < N_METER_LOCKS; ++i) { - ovs_mutex_destroy(&dp->meter_locks[i]); - } + dp_netdev_meter_destroy(&dp->meter_tbl); free(dp->pmd_cmask); free(CONST_CAST(char *, dp->name)); @@ -5713,14 +5698,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp) /* Meters */ +static uint32_t +meter_hash(struct dp_meter_instance *ti, uint32_t id) +{ + uint32_t n_meters = ti->n_meters; + + return id % n_meters; +} + +static void +dp_meter_free(struct dp_meter *meter) +{ + ovs_mutex_destroy(&meter->lock); + free(meter); +} + +static struct dp_meter_instance * +dp_meter_instance_alloc(const uint32_t size) +{ + struct dp_meter_instance *ti; + + ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size); + ti->n_meters = size; + + return ti; +} + +static void +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size) +{ + struct dp_meter_instance *new_ti; + struct dp_meter_instance *ti; + int n_meters; + int i; + + new_ti = dp_meter_instance_alloc(size); + + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); + n_meters = MIN(size, ti->n_meters); + + for (i = 0; i < n_meters; i++) { + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { + new_ti->dp_meters[i] = ti->dp_meters[i]; + } + } + + ovsrcu_set(&tbl->ti, new_ti); + ovsrcu_postpone(free, ti); +} + +static void +dp_meter_instance_insert(struct dp_meter_instance *ti, + struct dp_meter *meter) +{ + uint32_t hash; + + hash = meter_hash(ti, meter->id); + ovsrcu_set(&ti->dp_meters[hash], meter); +} + +static void +dp_meter_instance_remove(struct dp_meter_instance *ti, + struct dp_meter *meter) +{ + uint32_t hash; + + hash = meter_hash(ti, meter->id); + ovsrcu_set(&ti->dp_meters[hash], NULL); +} + +static void +dp_netdev_meter_init(struct dp_meter_table *tbl) +{ + struct dp_meter_instance *ti; + + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); + ovsrcu_set(&tbl->ti, ti); + + ovs_mutex_init(&tbl->lock); + tbl->count = 0; +} + +static void +dp_netdev_meter_destroy(struct dp_meter_table *tbl) +{ + struct dp_meter_instance *ti; + int i; + + ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti); + for (i = 0; i < ti->n_meters; i++) { + struct dp_meter *meter; + + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]); + if (meter) { + ovsrcu_postpone(dp_meter_free, meter); + } + } + + ovsrcu_postpone(free, ti); + ovs_mutex_destroy(&tbl->lock); +} + +static struct dp_meter * +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id) +{ + struct dp_meter_instance *ti; + struct dp_meter *meter; + uint32_t hash; + + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); + hash = meter_hash(ti, meter_id); + + meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]); + if (meter && meter->id == meter_id) { + return meter; + } + + return NULL; +} + +static void +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id) + OVS_REQUIRES(meter_tbl->lock) +{ + struct dp_meter_instance *ti; + struct dp_meter *meter; + + meter = dp_meter_lookup(meter_tbl, meter_id); + if (!meter) { + return; + } + + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); + dp_meter_instance_remove(ti, meter); + ovsrcu_postpone(dp_meter_free, meter); + + meter_tbl->count--; + /* Shrink the meter array if necessary. */ + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && + meter_tbl->count <= (ti->n_meters / 4)) { + int half_size = ti->n_meters / 2; + int i; + + /* Avoid hash collision, don't move slots to other place. + * Make sure there are no references of meters in array + * which will be released. + */ + for (i = half_size; i < ti->n_meters; i++) { + if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) { + return; + } + } + + dp_meter_instance_realloc(meter_tbl, half_size); + } +} + +static int +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter) + OVS_REQUIRES(meter_tbl->lock) +{ + struct dp_meter_instance *ti; + uint32_t hash; + + ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti); + hash = meter_hash(ti, meter->id); + + if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *, + &ti->dp_meters[hash]))) { + VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n", + meter->id, hash, ti->n_meters); + return EBUSY; + } + + dp_meter_instance_insert(ti, meter); + + meter_tbl->count++; + if (meter_tbl->count >= ti->n_meters) { + dp_meter_instance_realloc(meter_tbl, ti->n_meters * 2); + } + + return 0; +} + static void dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, struct ofputil_meter_features *features) { - features->max_meters = MAX_METERS; + features->max_meters = METER_ENTRY_MAX; features->band_types = DP_SUPPORTED_METER_BAND_TYPES; features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK; - features->max_bands = MAX_BANDS; + features->max_bands = METER_BAND_MAX; features->max_color = 0; } @@ -5742,14 +5910,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, uint32_t exceeded_rate[NETDEV_MAX_BURST]; int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */ - if (meter_id >= MAX_METERS) { + if (meter_id >= METER_ENTRY_MAX) { return; } - meter_lock(dp, meter_id); - meter = dp->meters[meter_id]; + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); if (!meter) { - goto out; + return; } /* Initialize as negative values. */ @@ -5757,6 +5924,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Initialize as zeroes. */ memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate); + ovs_mutex_lock(&meter->lock); /* All packets will hit the meter at the same time. */ long_delta_t = now / 1000 - meter->used / 1000; /* msec */ @@ -5874,8 +6042,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, dp_packet_batch_refill(packets_, packet, j); } } - out: - meter_unlock(dp, meter_id); + + ovs_mutex_unlock(&meter->lock); } /* Meter set/get/del processing is still single-threaded. */ @@ -5884,11 +6052,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, struct ofputil_meter_config *config) { struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_meter_table *meter_tbl = &dp->meter_tbl; uint32_t mid = meter_id.uint32; struct dp_meter *meter; - int i; + int err, i; - if (mid >= MAX_METERS) { + if (mid >= METER_ENTRY_MAX) { return EFBIG; /* Meter_id out of range. */ } @@ -5896,7 +6065,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, return EBADF; /* Unsupported flags set */ } - if (config->n_bands > MAX_BANDS) { + if (config->n_bands > METER_BAND_MAX) { return EINVAL; } @@ -5917,6 +6086,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, meter->n_bands = config->n_bands; meter->max_delta_t = 0; meter->used = time_usec(); + meter->id = mid; + ovs_mutex_init(&meter->lock); /* set up bands */ for (i = 0; i < config->n_bands; ++i) { @@ -5942,12 +6113,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id, } } - meter_lock(dp, mid); - dp_delete_meter(dp, mid); /* Free existing meter, if any */ - dp->meters[mid] = meter; - meter_unlock(dp, mid); + ovs_mutex_lock(&meter_tbl->lock); + + dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */ + err = dp_meter_attach(meter_tbl, meter); + if (err) { + goto unlock_out; + } + + ovs_mutex_unlock(&meter_tbl->lock); return 0; + +unlock_out: + ovs_mutex_unlock(&meter_tbl->lock); + dp_meter_free(meter); + return err; } static int @@ -5955,23 +6136,23 @@ dpif_netdev_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id_, struct ofputil_meter_stats *stats, uint16_t n_bands) { - const struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev *dp = get_dp_netdev(dpif); uint32_t meter_id = meter_id_.uint32; - int retval = 0; + const struct dp_meter *meter; - if (meter_id >= MAX_METERS) { + if (meter_id >= METER_ENTRY_MAX) { return EFBIG; } - meter_lock(dp, meter_id); - const struct dp_meter *meter = dp->meters[meter_id]; + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); if (!meter) { - retval = ENOENT; - goto done; + return ENOENT; } + if (stats) { int i = 0; + ovs_mutex_lock(&meter->lock); stats->packet_in_count = meter->packet_count; stats->byte_in_count = meter->byte_count; @@ -5979,13 +6160,12 @@ dpif_netdev_meter_get(const struct dpif *dpif, stats->bands[i].packet_count = meter->bands[i].packet_count; stats->bands[i].byte_count = meter->bands[i].byte_count; } + ovs_mutex_unlock(&meter->lock); stats->n_bands = i; } -done: - meter_unlock(dp, meter_id); - return retval; + return 0; } static int @@ -5994,15 +6174,16 @@ dpif_netdev_meter_del(struct dpif *dpif, struct ofputil_meter_stats *stats, uint16_t n_bands) { struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_meter_table *meter_tbl = &dp->meter_tbl; int error; error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands); if (!error) { uint32_t meter_id = meter_id_.uint32; - meter_lock(dp, meter_id); - dp_delete_meter(dp, meter_id); - meter_unlock(dp, meter_id); + ovs_mutex_lock(&meter_tbl->lock); + dp_meter_detach_free(meter_tbl, meter_id); + ovs_mutex_unlock(&meter_tbl->lock); } return error; }