diff mbox series

[ovs-dev,OVS,1/4] dpif-netdev: Expand the meters supported number

Message ID 1588244439-58766-2-git-send-email-xiangxia.m.yue@gmail.com
State Changes Requested
Headers show
Series expand the meter table and fix bug | expand

Commit Message

Tonghao Zhang April 30, 2020, 11 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

For now, ovs-vswitchd use the array of the dp_meter struct
to store meter's data, and at most, there are only 65536
(defined by MAX_METERS) meters that can be used. But in some
case, for example, in the edge gateway, we should use 200,000,
at least, meters for IP address bandwidth limitation.
Every one IP address will use two meters for its rx and tx
path[1]. In other way, ovs-vswitchd should support meter-offload
(rte_mtr_xxx api introduced by dpdk.), but there are more than
65536 meters in the hardware, such as Mellanox ConnectX-6.

This patch use array to manage the meter, but it can ben expanded.

[1].
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: William Tu <u9012063@gmail.com>
Cc: Jarno Rajahalme <jarno@ovn.org>
Cc: Ben Pfaff <blp@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 lib/dpif-netdev.c | 320 ++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 251 insertions(+), 69 deletions(-)

Comments

William Tu May 9, 2020, 4:19 p.m. UTC | #1
On Thu, Apr 30, 2020 at 07:00:36PM +0800, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
> 
> This patch use array to manage the meter, but it can ben expanded.
> 
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

I'm just curious why you need so many 'unique' meters?
can you share the meter id if their settings are the same?

ex:
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:X ,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:X, output:p0

if both flows have the same meter setup in X.

> 
> Cc: Ilya Maximets <i.maximets@ovn.org>
> Cc: William Tu <u9012063@gmail.com>
> Cc: Jarno Rajahalme <jarno@ovn.org>
> Cc: Ben Pfaff <blp@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---

Thanks for working on both kernel and userspace datapath!
Also add Pravin in the loop.

>  lib/dpif-netdev.c | 320 ++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 251 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ef14e83b5f06..17c0241aa2e2 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>  
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */
> +#define METER_ENTRY_MAX (200000ULL)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX  (8)
> +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
>  
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -283,12 +286,25 @@ struct dp_meter {
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> +    uint32_t id;
> +    struct ovs_mutex lock;
>      uint64_t used;
>      uint64_t packet_count;
>      uint64_t byte_count;
>      struct dp_meter_band bands[];
>  };
>  
> +struct dp_meter_instance {
> +    uint32_t n_meters;
> +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
usually we add a comments here, saying
	/* Followed by
     *    struct dp_meter[n];
     * where n is the n_meters.
     */

> +};
> +
> +struct dp_meter_table {
> +    OVSRCU_TYPE(struct dp_meter_instance *) ti;
> +    uint32_t count;
> +    struct ovs_mutex lock;
> +};
> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -329,8 +345,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>  
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];

Why removing the multiple locks and using only one lock?
Do you see any performance overhead when switching to sinlge lock?

> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_meter_table meter_tbl;
>  
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -378,19 +393,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>  
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1523,6 +1525,9 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>  
> +static void dp_netdev_meter_init(struct dp_meter_table *tbl);
> +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
Maybe move them up?

> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1556,9 +1561,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>  
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_init(&dp->meter_tbl);
>  
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1647,16 +1650,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>  
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */
>  static void
> @@ -1694,16 +1687,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>  
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(&dp->meter_tbl);
>  
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -5713,14 +5697,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
>  
>  
>  /* Meters */
> +static uint32_t
> +meter_hash(struct dp_meter_instance *ti, uint32_t id)
> +{
> +    uint32_t n_meters = ti->n_meters;
> +
> +    return id % n_meters;
> +}
> +
> +static void
> +dp_meter_free(struct dp_meter *meter)
> +{
> +    ovs_mutex_destroy(&meter->lock);
> +    free(meter);
> +}
> +
> +static struct dp_meter_instance *
> +dp_meter_instance_alloc(const uint32_t size)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
> +    ti->n_meters = size;
> +
> +    return ti;
> +}
> +
> +static void
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> +{
> +    struct dp_meter_instance *new_ti;
> +    struct dp_meter_instance *ti;
> +    int n_meters;
> +    int i;
> +
> +    new_ti = dp_meter_instance_alloc(size);
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    n_meters = MIN(size, ti->n_meters);
> +
> +    for (i = 0; i < n_meters; i++) {
> +        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +            new_ti->dp_meters[i] = ti->dp_meters[i];
> +        }
> +    }
> +
> +    ovsrcu_set(&tbl->ti, new_ti);
> +    ovsrcu_postpone(free, ti);
> +}
> +
> +static void
> +dp_meter_instance_insert(struct dp_meter_instance *ti,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], meter);
> +}
> +
> +static void
> +dp_meter_instance_remove(struct dp_meter_instance *ti,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], NULL);
> +}
> +
> +static void
> +dp_netdev_meter_init(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> +    ovsrcu_set(&tbl->ti, ti);
> +
> +    ovs_mutex_init(&tbl->lock);
> +    tbl->count = 0;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *ti;
> +    int i;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    for (i = 0; i < ti->n_meters; i++) {
> +        struct dp_meter *meter;
> +
> +        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
> +        if (meter) {
> +            ovsrcu_postpone(dp_meter_free, meter);
> +        }
> +    }
> +
> +    ovsrcu_postpone(free, ti);
> +    ovs_mutex_destroy(&tbl->lock);
> +}
> +
> +static struct dp_meter *
> +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    hash = meter_hash(ti, meter_id);
> +
> +    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
> +    if (meter && meter->id == meter_id) {
> +        return meter;
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +                     OVS_REQUIRES(meter_tbl->lock)
> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(meter_tbl, meter_id);
> +    if (!meter) {
> +        return;
> +    }
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    dp_meter_instance_remove(ti, meter);
> +    ovsrcu_postpone(dp_meter_free, meter);
> +
> +    meter_tbl->count--;
> +    /* Shrink the meter array if necessary. */
> +    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> +        meter_tbl->count <= (ti->n_meters / 4)) {
> +        int half_size = ti->n_meters / 2;
> +        int i;
> +
> +        /* Avoid hash collision, don't move slots to other place.
> +         * Make sure there are no references of meters in array
> +         * which will be released.
> +         */
> +        for (i = half_size; i < ti->n_meters; i++) {
> +            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +                return;
> +            }
> +        }
> +
> +        dp_meter_instance_realloc(meter_tbl, half_size);
> +    }
> +}
> +
> +static int
> +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
> +                OVS_REQUIRES(meter_tbl->lock)
> +{
> +    struct dp_meter_instance *ti;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    hash = meter_hash(ti, meter->id);
> +
> +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> +                                &ti->dp_meters[hash]))) {
> +        VLOG_WARN("When installing meter to %u/%u slot, but it's used.\n",
> +                  hash, ti->n_meters);
how about
VLOG_WANR("Failed to attach meter id %u to slot %u/%u.\n"

> +        return EBUSY;
> +    }
> +
> +    dp_meter_instance_insert(ti, meter);
> +
> +    meter_tbl->count++;
> +    if (meter_tbl->count >= ti->n_meters) {
> +        dp_meter_instance_realloc(meter_tbl, ti->n_meters *2);
space missing between "*2"

> +    }
> +
> +    return 0;
> +}
> +
>  static void
>  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)
>  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>  
> @@ -5742,14 +5909,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>  
>      /* Initialize as negative values. */
> @@ -5757,6 +5923,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>  
> +    ovs_mutex_lock(&meter->lock);

do you want to use multiple lock, like the current design?

>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>  
> @@ -5874,8 +6041,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>  
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -5884,11 +6051,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>                        struct ofputil_meter_config *config)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
> -    int i;
> +    int err, i;
>  
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>  
> @@ -5896,7 +6064,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>  
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>  
> @@ -5917,6 +6085,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>  
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -5942,12 +6112,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          }
>      }
>  
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&meter_tbl->lock);
> +
> +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
> +    err = dp_meter_attach(meter_tbl, meter);
> +    if (err) {
> +        goto unlock_out;
> +    }
> +
> +    ovs_mutex_unlock(&meter_tbl->lock);
>  
>      return 0;
> +
> +unlock_out:
> +    ovs_mutex_unlock(&meter_tbl->lock);
> +    dp_meter_free(meter);
> +    return err;
>  }
>  
>  static int
> @@ -5955,23 +6135,24 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
We can still keep the 'const' 

> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
>          int i = 0;
>  
> +        ovs_mutex_lock(&meter->lock);
> +
remove extra newline
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
>  
> @@ -5980,12 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
>  
> +        ovs_mutex_unlock(&meter->lock);
> +
>          stats->n_bands = i;
>      }
>  
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>  
>  static int
> @@ -5994,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      int error;
>  
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>  
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&meter_tbl->lock);
> +        dp_meter_detach_free(meter_tbl, meter_id);
> +        ovs_mutex_unlock(&meter_tbl->lock);
>      }
>      return error;
>  }
> -- 
> 1.8.3.1
> 
Thanks,
William
Tonghao Zhang May 11, 2020, 2:29 a.m. UTC | #2
On Sun, May 10, 2020 at 12:19 AM William Tu <u9012063@gmail.com> wrote:
>
> On Thu, Apr 30, 2020 at 07:00:36PM +0800, xiangxia.m.yue@gmail.com wrote:
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > For now, ovs-vswitchd use the array of the dp_meter struct
> > to store meter's data, and at most, there are only 65536
> > (defined by MAX_METERS) meters that can be used. But in some
> > case, for example, in the edge gateway, we should use 200,000,
> > at least, meters for IP address bandwidth limitation.
> > Every one IP address will use two meters for its rx and tx
> > path[1]. In other way, ovs-vswitchd should support meter-offload
> > (rte_mtr_xxx api introduced by dpdk.), but there are more than
> > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> >
> > This patch use array to manage the meter, but it can ben expanded.
> >
> > [1].
> > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
>
> I'm just curious why you need so many 'unique' meters?
> can you share the meter id if their settings are the same?
Hi William, thanks for your review.
In our production environment, we use the OVS-DPDK as a gateway. In
that gateway, we
use the meter to limit bandwidth for inbound/outbound for every IP
address of public network.

In other gateway, we also use shared meter for SNAT case.
> ex:
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:X ,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:X, output:p0
>
> if both flows have the same meter setup in X.
>
> >
> > Cc: Ilya Maximets <i.maximets@ovn.org>
> > Cc: William Tu <u9012063@gmail.com>
> > Cc: Jarno Rajahalme <jarno@ovn.org>
> > Cc: Ben Pfaff <blp@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
>
> Thanks for working on both kernel and userspace datapath!
> Also add Pravin in the loop.
>
> >  lib/dpif-netdev.c | 320 ++++++++++++++++++++++++++++++++++++++++++------------
> >  1 file changed, 251 insertions(+), 69 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index ef14e83b5f06..17c0241aa2e2 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
> >
> >  /* Configuration parameters. */
> >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > +
> > +/* Maximum number of meters in the table. */
> > +#define METER_ENTRY_MAX (200000ULL)
> > +/* Maximum number of bands / meter. */
> > +#define METER_BAND_MAX  (8)
> > +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
> >
> >  COVERAGE_DEFINE(datapath_drop_meter);
> >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > @@ -283,12 +286,25 @@ struct dp_meter {
> >      uint16_t flags;
> >      uint16_t n_bands;
> >      uint32_t max_delta_t;
> > +    uint32_t id;
> > +    struct ovs_mutex lock;
> >      uint64_t used;
> >      uint64_t packet_count;
> >      uint64_t byte_count;
> >      struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_meter_instance {
> > +    uint32_t n_meters;
> > +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
> usually we add a comments here, saying
>         /* Followed by
>      *    struct dp_meter[n];
>      * where n is the n_meters.
>      */
OK
> > +};
> > +
> > +struct dp_meter_table {
> > +    OVSRCU_TYPE(struct dp_meter_instance *) ti;
> > +    uint32_t count;
> > +    struct ovs_mutex lock;
> > +};
> > +
> >  struct pmd_auto_lb {
> >      bool auto_lb_requested;     /* Auto load balancing requested by user. */
> >      bool is_enabled;            /* Current status of Auto load balancing. */
> > @@ -329,8 +345,7 @@ struct dp_netdev {
> >      atomic_uint32_t tx_flush_interval;
> >
> >      /* Meters. */
> > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
>
> Why removing the multiple locks and using only one lock?
> Do you see any performance overhead when switching to sinlge lock?
As I know, without this patch, multiple meters may share the one lock.
N_METER_LOCKS == 64, but MAX_METERS == 65535.
so in my patch, every meter has itself lock to protect
stats/buckets/timestamp of that meter.

> > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > +    struct dp_meter_table meter_tbl;
> >
> >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > @@ -378,19 +393,6 @@ struct dp_netdev {
> >      struct pmd_auto_lb pmd_alb;
> >  };
> >
> > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -
> >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
> >                                                      odp_port_t)
> >      OVS_REQUIRES(dp->port_mutex);
> > @@ -1523,6 +1525,9 @@ choose_port(struct dp_netdev *dp, const char *name)
> >      return ODPP_NONE;
> >  }
> >
> > +static void dp_netdev_meter_init(struct dp_meter_table *tbl);
> > +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
> Maybe move them up?
The function about meter, I put them together, include that
dp_netdev_meter_init/destroy as below.
But I am fine move them up.
> > +
> >  static int
> >  create_dp_netdev(const char *name, const struct dpif_class *class,
> >                   struct dp_netdev **dpp)
> > @@ -1556,9 +1561,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
> >      dp->reconfigure_seq = seq_create();
> >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> >
> > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_init(&dp->meter_tbl);
> >
> >      /* Disable upcalls by default. */
> >      dp_netdev_disable_upcall(dp);
> > @@ -1647,16 +1650,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
> >      fat_rwlock_destroy(&dp->upcall_rwlock);
> >  }
> >
> > -static void
> > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    if (dp->meters[meter_id]) {
> > -        free(dp->meters[meter_id]);
> > -        dp->meters[meter_id] = NULL;
> > -    }
> > -}
> > -
> >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> >   * through the 'dp_netdevs' shash while freeing 'dp'. */
> >  static void
> > @@ -1694,16 +1687,7 @@ dp_netdev_free(struct dp_netdev *dp)
> >      /* Upcalls must be disabled at this point */
> >      dp_netdev_destroy_upcall_lock(dp);
> >
> > -    int i;
> > -
> > -    for (i = 0; i < MAX_METERS; ++i) {
> > -        meter_lock(dp, i);
> > -        dp_delete_meter(dp, i);
> > -        meter_unlock(dp, i);
> > -    }
> > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_destroy(&dp->meter_tbl);
> >
> >      free(dp->pmd_cmask);
> >      free(CONST_CAST(char *, dp->name));
> > @@ -5713,14 +5697,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
> >
> >
> >  /* Meters */
> > +static uint32_t
> > +meter_hash(struct dp_meter_instance *ti, uint32_t id)
> > +{
> > +    uint32_t n_meters = ti->n_meters;
> > +
> > +    return id % n_meters;
> > +}
> > +
> > +static void
> > +dp_meter_free(struct dp_meter *meter)
> > +{
> > +    ovs_mutex_destroy(&meter->lock);
> > +    free(meter);
> > +}
> > +
> > +static struct dp_meter_instance *
> > +dp_meter_instance_alloc(const uint32_t size)
> > +{
> > +    struct dp_meter_instance *ti;
> > +
> > +    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
> > +    ti->n_meters = size;
> > +
> > +    return ti;
> > +}
> > +
> > +static void
> > +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> > +{
> > +    struct dp_meter_instance *new_ti;
> > +    struct dp_meter_instance *ti;
> > +    int n_meters;
> > +    int i;
> > +
> > +    new_ti = dp_meter_instance_alloc(size);
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> > +    n_meters = MIN(size, ti->n_meters);
> > +
> > +    for (i = 0; i < n_meters; i++) {
> > +        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> > +            new_ti->dp_meters[i] = ti->dp_meters[i];
> > +        }
> > +    }
> > +
> > +    ovsrcu_set(&tbl->ti, new_ti);
> > +    ovsrcu_postpone(free, ti);
> > +}
> > +
> > +static void
> > +dp_meter_instance_insert(struct dp_meter_instance *ti,
> > +                         struct dp_meter *meter)
> > +{
> > +    uint32_t hash;
> > +
> > +    hash = meter_hash(ti, meter->id);
> > +    ovsrcu_set(&ti->dp_meters[hash], meter);
> > +}
> > +
> > +static void
> > +dp_meter_instance_remove(struct dp_meter_instance *ti,
> > +                         struct dp_meter *meter)
> > +{
> > +    uint32_t hash;
> > +
> > +    hash = meter_hash(ti, meter->id);
> > +    ovsrcu_set(&ti->dp_meters[hash], NULL);
> > +}
> > +
> > +static void
> > +dp_netdev_meter_init(struct dp_meter_table *tbl)
> > +{
> > +    struct dp_meter_instance *ti;
> > +
> > +    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> > +    ovsrcu_set(&tbl->ti, ti);
> > +
> > +    ovs_mutex_init(&tbl->lock);
> > +    tbl->count = 0;
> > +}
> > +
> > +static void
> > +dp_netdev_meter_destroy(struct dp_meter_table *tbl)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    int i;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> > +    for (i = 0; i < ti->n_meters; i++) {
> > +        struct dp_meter *meter;
> > +
> > +        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
> > +        if (meter) {
> > +            ovsrcu_postpone(dp_meter_free, meter);
> > +        }
> > +    }
> > +
> > +    ovsrcu_postpone(free, ti);
> > +    ovs_mutex_destroy(&tbl->lock);
> > +}
> > +
> > +static struct dp_meter *
> > +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    struct dp_meter *meter;
> > +    uint32_t hash;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    hash = meter_hash(ti, meter_id);
> > +
> > +    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
> > +    if (meter && meter->id == meter_id) {
> > +        return meter;
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> > +                     OVS_REQUIRES(meter_tbl->lock)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    struct dp_meter *meter;
> > +
> > +    meter = dp_meter_lookup(meter_tbl, meter_id);
> > +    if (!meter) {
> > +        return;
> > +    }
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    dp_meter_instance_remove(ti, meter);
> > +    ovsrcu_postpone(dp_meter_free, meter);
> > +
> > +    meter_tbl->count--;
> > +    /* Shrink the meter array if necessary. */
> > +    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> > +        meter_tbl->count <= (ti->n_meters / 4)) {
> > +        int half_size = ti->n_meters / 2;
> > +        int i;
> > +
> > +        /* Avoid hash collision, don't move slots to other place.
> > +         * Make sure there are no references of meters in array
> > +         * which will be released.
> > +         */
> > +        for (i = half_size; i < ti->n_meters; i++) {
> > +            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> > +                return;
> > +            }
> > +        }
> > +
> > +        dp_meter_instance_realloc(meter_tbl, half_size);
> > +    }
> > +}
> > +
> > +static int
> > +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
> > +                OVS_REQUIRES(meter_tbl->lock)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    uint32_t hash;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    hash = meter_hash(ti, meter->id);
> > +
> > +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> > +                                &ti->dp_meters[hash]))) {
> > +        VLOG_WARN("When installing meter to %u/%u slot, but it's used.\n",
> > +                  hash, ti->n_meters);
> how about
> VLOG_WANR("Failed to attach meter id %u to slot %u/%u.\n"
OK
> > +        return EBUSY;
> > +    }
> > +
> > +    dp_meter_instance_insert(ti, meter);
> > +
> > +    meter_tbl->count++;
> > +    if (meter_tbl->count >= ti->n_meters) {
> > +        dp_meter_instance_realloc(meter_tbl, ti->n_meters *2);
> space missing between "*2"
OK
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static void
> >  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> >                                 struct ofputil_meter_features *features)
> >  {
> > -    features->max_meters = MAX_METERS;
> > +    features->max_meters = METER_ENTRY_MAX;
> >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > -    features->max_bands = MAX_BANDS;
> > +    features->max_bands = METER_BAND_MAX;
> >      features->max_color = 0;
> >  }
> >
> > @@ -5742,14 +5909,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
> >      if (!meter) {
> > -        goto out;
> > +        return;
> >      }
> >
> >      /* Initialize as negative values. */
> > @@ -5757,6 +5923,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >      /* Initialize as zeroes. */
> >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> >
> > +    ovs_mutex_lock(&meter->lock);
>
> do you want to use multiple lock, like the current design?
As explain above, there should not be a performance drop.
> >      /* All packets will hit the meter at the same time. */
> >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> >
> > @@ -5874,8 +6041,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >              dp_packet_batch_refill(packets_, packet, j);
> >          }
> >      }
> > - out:
> > -    meter_unlock(dp, meter_id);
> > +
> > +    ovs_mutex_unlock(&meter->lock);
> >  }
> >
> >  /* Meter set/get/del processing is still single-threaded. */
> > @@ -5884,11 +6051,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >                        struct ofputil_meter_config *config)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
> >      uint32_t mid = meter_id.uint32;
> >      struct dp_meter *meter;
> > -    int i;
> > +    int err, i;
> >
> > -    if (mid >= MAX_METERS) {
> > +    if (mid >= METER_ENTRY_MAX) {
> >          return EFBIG; /* Meter_id out of range. */
> >      }
> >
> > @@ -5896,7 +6064,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >          return EBADF; /* Unsupported flags set */
> >      }
> >
> > -    if (config->n_bands > MAX_BANDS) {
> > +    if (config->n_bands > METER_BAND_MAX) {
> >          return EINVAL;
> >      }
> >
> > @@ -5917,6 +6085,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >      meter->n_bands = config->n_bands;
> >      meter->max_delta_t = 0;
> >      meter->used = time_usec();
> > +    meter->id = mid;
> > +    ovs_mutex_init(&meter->lock);
> >
> >      /* set up bands */
> >      for (i = 0; i < config->n_bands; ++i) {
> > @@ -5942,12 +6112,22 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >          }
> >      }
> >
> > -    meter_lock(dp, mid);
> > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > -    dp->meters[mid] = meter;
> > -    meter_unlock(dp, mid);
> > +    ovs_mutex_lock(&meter_tbl->lock);
> > +
> > +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
> > +    err = dp_meter_attach(meter_tbl, meter);
> > +    if (err) {
> > +        goto unlock_out;
> > +    }
> > +
> > +    ovs_mutex_unlock(&meter_tbl->lock);
> >
> >      return 0;
> > +
> > +unlock_out:
> > +    ovs_mutex_unlock(&meter_tbl->lock);
> > +    dp_meter_free(meter);
> > +    return err;
> >  }
> >
> >  static int
> > @@ -5955,23 +6135,24 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >                        ofproto_meter_id meter_id_,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> We can still keep the 'const'
>
> > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> >      uint32_t meter_id = meter_id_.uint32;
> > -    int retval = 0;
> > +    const struct dp_meter *meter;
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return EFBIG;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    const struct dp_meter *meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
> >      if (!meter) {
> > -        retval = ENOENT;
> > -        goto done;
> > +        return ENOENT;
> >      }
> > +
> >      if (stats) {
> >          int i = 0;
> >
> > +        ovs_mutex_lock(&meter->lock);
> > +
> remove extra newline
OK
> >          stats->packet_in_count = meter->packet_count;
> >          stats->byte_in_count = meter->byte_count;
> >
> > @@ -5980,12 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> >          }
> >
> > +        ovs_mutex_unlock(&meter->lock);
> > +
> >          stats->n_bands = i;
> >      }
> >
> > -done:
> > -    meter_unlock(dp, meter_id);
> > -    return retval;
> > +    return 0;
> >  }
> >
> >  static int
> > @@ -5994,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
> >      int error;
> >
> >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> >      if (!error) {
> >          uint32_t meter_id = meter_id_.uint32;
> >
> > -        meter_lock(dp, meter_id);
> > -        dp_delete_meter(dp, meter_id);
> > -        meter_unlock(dp, meter_id);
> > +        ovs_mutex_lock(&meter_tbl->lock);
> > +        dp_meter_detach_free(meter_tbl, meter_id);
> > +        ovs_mutex_unlock(&meter_tbl->lock);
> >      }
> >      return error;
> >  }
> > --
> > 1.8.3.1
> >
> Thanks,
> William
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ef14e83b5f06..17c0241aa2e2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,12 @@  DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
-enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
-enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
+
+/* Maximum number of meters in the table. */
+#define METER_ENTRY_MAX (200000ULL)
+/* Maximum number of bands / meter. */
+#define METER_BAND_MAX  (8)
+#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
 
 COVERAGE_DEFINE(datapath_drop_meter);
 COVERAGE_DEFINE(datapath_drop_upcall_error);
@@ -283,12 +286,25 @@  struct dp_meter {
     uint16_t flags;
     uint16_t n_bands;
     uint32_t max_delta_t;
+    uint32_t id;
+    struct ovs_mutex lock;
     uint64_t used;
     uint64_t packet_count;
     uint64_t byte_count;
     struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+    uint32_t n_meters;
+    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
+};
+
+struct dp_meter_table {
+    OVSRCU_TYPE(struct dp_meter_instance *) ti;
+    uint32_t count;
+    struct ovs_mutex lock;
+};
+
 struct pmd_auto_lb {
     bool auto_lb_requested;     /* Auto load balancing requested by user. */
     bool is_enabled;            /* Current status of Auto load balancing. */
@@ -329,8 +345,7 @@  struct dp_netdev {
     atomic_uint32_t tx_flush_interval;
 
     /* Meters. */
-    struct ovs_mutex meter_locks[N_METER_LOCKS];
-    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+    struct dp_meter_table meter_tbl;
 
     /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
     OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -378,19 +393,6 @@  struct dp_netdev {
     struct pmd_auto_lb pmd_alb;
 };
 
-static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
@@ -1523,6 +1525,9 @@  choose_port(struct dp_netdev *dp, const char *name)
     return ODPP_NONE;
 }
 
+static void dp_netdev_meter_init(struct dp_meter_table *tbl);
+static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
+
 static int
 create_dp_netdev(const char *name, const struct dpif_class *class,
                  struct dp_netdev **dpp)
@@ -1556,9 +1561,7 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->reconfigure_seq = seq_create();
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
 
-    for (int i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_init(&dp->meter_tbl);
 
     /* Disable upcalls by default. */
     dp_netdev_disable_upcall(dp);
@@ -1647,16 +1650,6 @@  dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
     fat_rwlock_destroy(&dp->upcall_rwlock);
 }
 
-static void
-dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
-    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    if (dp->meters[meter_id]) {
-        free(dp->meters[meter_id]);
-        dp->meters[meter_id] = NULL;
-    }
-}
-
 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
  * through the 'dp_netdevs' shash while freeing 'dp'. */
 static void
@@ -1694,16 +1687,7 @@  dp_netdev_free(struct dp_netdev *dp)
     /* Upcalls must be disabled at this point */
     dp_netdev_destroy_upcall_lock(dp);
 
-    int i;
-
-    for (i = 0; i < MAX_METERS; ++i) {
-        meter_lock(dp, i);
-        dp_delete_meter(dp, i);
-        meter_unlock(dp, i);
-    }
-    for (i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_destroy(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_destroy(&dp->meter_tbl);
 
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -5713,14 +5697,197 @@  dp_netdev_disable_upcall(struct dp_netdev *dp)
 
 
 /* Meters */
+static uint32_t
+meter_hash(struct dp_meter_instance *ti, uint32_t id)
+{
+    uint32_t n_meters = ti->n_meters;
+
+    return id % n_meters;
+}
+
+static void
+dp_meter_free(struct dp_meter *meter)
+{
+    ovs_mutex_destroy(&meter->lock);
+    free(meter);
+}
+
+static struct dp_meter_instance *
+dp_meter_instance_alloc(const uint32_t size)
+{
+    struct dp_meter_instance *ti;
+
+    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
+    ti->n_meters = size;
+
+    return ti;
+}
+
+static void
+dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
+{
+    struct dp_meter_instance *new_ti;
+    struct dp_meter_instance *ti;
+    int n_meters;
+    int i;
+
+    new_ti = dp_meter_instance_alloc(size);
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
+    n_meters = MIN(size, ti->n_meters);
+
+    for (i = 0; i < n_meters; i++) {
+        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
+            new_ti->dp_meters[i] = ti->dp_meters[i];
+        }
+    }
+
+    ovsrcu_set(&tbl->ti, new_ti);
+    ovsrcu_postpone(free, ti);
+}
+
+static void
+dp_meter_instance_insert(struct dp_meter_instance *ti,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(ti, meter->id);
+    ovsrcu_set(&ti->dp_meters[hash], meter);
+}
+
+static void
+dp_meter_instance_remove(struct dp_meter_instance *ti,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(ti, meter->id);
+    ovsrcu_set(&ti->dp_meters[hash], NULL);
+}
+
+static void
+dp_netdev_meter_init(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *ti;
+
+    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+    ovsrcu_set(&tbl->ti, ti);
+
+    ovs_mutex_init(&tbl->lock);
+    tbl->count = 0;
+}
+
+static void
+dp_netdev_meter_destroy(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *ti;
+    int i;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
+    for (i = 0; i < ti->n_meters; i++) {
+        struct dp_meter *meter;
+
+        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
+        if (meter) {
+            ovsrcu_postpone(dp_meter_free, meter);
+        }
+    }
+
+    ovsrcu_postpone(free, ti);
+    ovs_mutex_destroy(&tbl->lock);
+}
+
+static struct dp_meter *
+dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
+{
+    struct dp_meter_instance *ti;
+    struct dp_meter *meter;
+    uint32_t hash;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    hash = meter_hash(ti, meter_id);
+
+    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
+    if (meter && meter->id == meter_id) {
+        return meter;
+    }
+
+    return NULL;
+}
+
+static void
+dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
+                     OVS_REQUIRES(meter_tbl->lock)
+{
+    struct dp_meter_instance *ti;
+    struct dp_meter *meter;
+
+    meter = dp_meter_lookup(meter_tbl, meter_id);
+    if (!meter) {
+        return;
+    }
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    dp_meter_instance_remove(ti, meter);
+    ovsrcu_postpone(dp_meter_free, meter);
+
+    meter_tbl->count--;
+    /* Shrink the meter array if necessary. */
+    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+        meter_tbl->count <= (ti->n_meters / 4)) {
+        int half_size = ti->n_meters / 2;
+        int i;
+
+        /* Avoid hash collision, don't move slots to other place.
+         * Make sure there are no references of meters in array
+         * which will be released.
+         */
+        for (i = half_size; i < ti->n_meters; i++) {
+            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
+                return;
+            }
+        }
+
+        dp_meter_instance_realloc(meter_tbl, half_size);
+    }
+}
+
+static int
+dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
+                OVS_REQUIRES(meter_tbl->lock)
+{
+    struct dp_meter_instance *ti;
+    uint32_t hash;
+
+    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
+    hash = meter_hash(ti, meter->id);
+
+    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
+                                &ti->dp_meters[hash]))) {
+        VLOG_WARN("When installing meter to %u/%u slot, but it's used.\n",
+                  hash, ti->n_meters);
+        return EBUSY;
+    }
+
+    dp_meter_instance_insert(ti, meter);
+
+    meter_tbl->count++;
+    if (meter_tbl->count >= ti->n_meters) {
+        dp_meter_instance_realloc(meter_tbl, ti->n_meters *2);
+    }
+
+    return 0;
+}
+
 static void
 dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
                                struct ofputil_meter_features *features)
 {
-    features->max_meters = MAX_METERS;
+    features->max_meters = METER_ENTRY_MAX;
     features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
     features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
-    features->max_bands = MAX_BANDS;
+    features->max_bands = METER_BAND_MAX;
     features->max_color = 0;
 }
 
@@ -5742,14 +5909,13 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     uint32_t exceeded_rate[NETDEV_MAX_BURST];
     int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return;
     }
 
-    meter_lock(dp, meter_id);
-    meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        goto out;
+        return;
     }
 
     /* Initialize as negative values. */
@@ -5757,6 +5923,7 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     /* Initialize as zeroes. */
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
+    ovs_mutex_lock(&meter->lock);
     /* All packets will hit the meter at the same time. */
     long_delta_t = now / 1000 - meter->used / 1000; /* msec */
 
@@ -5874,8 +6041,8 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
             dp_packet_batch_refill(packets_, packet, j);
         }
     }
- out:
-    meter_unlock(dp, meter_id);
+
+    ovs_mutex_unlock(&meter->lock);
 }
 
 /* Meter set/get/del processing is still single-threaded. */
@@ -5884,11 +6051,12 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
                       struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     uint32_t mid = meter_id.uint32;
     struct dp_meter *meter;
-    int i;
+    int err, i;
 
-    if (mid >= MAX_METERS) {
+    if (mid >= METER_ENTRY_MAX) {
         return EFBIG; /* Meter_id out of range. */
     }
 
@@ -5896,7 +6064,7 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         return EBADF; /* Unsupported flags set */
     }
 
-    if (config->n_bands > MAX_BANDS) {
+    if (config->n_bands > METER_BAND_MAX) {
         return EINVAL;
     }
 
@@ -5917,6 +6085,8 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
     meter->n_bands = config->n_bands;
     meter->max_delta_t = 0;
     meter->used = time_usec();
+    meter->id = mid;
+    ovs_mutex_init(&meter->lock);
 
     /* set up bands */
     for (i = 0; i < config->n_bands; ++i) {
@@ -5942,12 +6112,22 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    meter_lock(dp, mid);
-    dp_delete_meter(dp, mid); /* Free existing meter, if any */
-    dp->meters[mid] = meter;
-    meter_unlock(dp, mid);
+    ovs_mutex_lock(&meter_tbl->lock);
+
+    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
+    err = dp_meter_attach(meter_tbl, meter);
+    if (err) {
+        goto unlock_out;
+    }
+
+    ovs_mutex_unlock(&meter_tbl->lock);
 
     return 0;
+
+unlock_out:
+    ovs_mutex_unlock(&meter_tbl->lock);
+    dp_meter_free(meter);
+    return err;
 }
 
 static int
@@ -5955,23 +6135,24 @@  dpif_netdev_meter_get(const struct dpif *dpif,
                       ofproto_meter_id meter_id_,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
-    const struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t meter_id = meter_id_.uint32;
-    int retval = 0;
+    const struct dp_meter *meter;
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return EFBIG;
     }
 
-    meter_lock(dp, meter_id);
-    const struct dp_meter *meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        retval = ENOENT;
-        goto done;
+        return ENOENT;
     }
+
     if (stats) {
         int i = 0;
 
+        ovs_mutex_lock(&meter->lock);
+
         stats->packet_in_count = meter->packet_count;
         stats->byte_in_count = meter->byte_count;
 
@@ -5980,12 +6161,12 @@  dpif_netdev_meter_get(const struct dpif *dpif,
             stats->bands[i].byte_count = meter->bands[i].byte_count;
         }
 
+        ovs_mutex_unlock(&meter->lock);
+
         stats->n_bands = i;
     }
 
-done:
-    meter_unlock(dp, meter_id);
-    return retval;
+    return 0;
 }
 
 static int
@@ -5994,15 +6175,16 @@  dpif_netdev_meter_del(struct dpif *dpif,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     int error;
 
     error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
     if (!error) {
         uint32_t meter_id = meter_id_.uint32;
 
-        meter_lock(dp, meter_id);
-        dp_delete_meter(dp, meter_id);
-        meter_unlock(dp, meter_id);
+        ovs_mutex_lock(&meter_tbl->lock);
+        dp_meter_detach_free(meter_tbl, meter_id);
+        ovs_mutex_unlock(&meter_tbl->lock);
     }
     return error;
 }