diff mbox

[ovs-dev,ovs,V2,09/21] dpif-netlink: Dump netdevs flows on flow dump

Message ID 1482665989-791-10-git-send-email-paulb@mellanox.com
State Changes Requested
Headers show

Commit Message

Paul Blakey Dec. 25, 2016, 11:39 a.m. UTC
While dumping flows, dump flows that were offloaded to
netdev and parse them back to dpif flow.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
---
 lib/dpif-netlink.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

Comments

Joe Stringer Jan. 5, 2017, 9:27 p.m. UTC | #1
On 25 December 2016 at 03:39, Paul Blakey <paulb@mellanox.com> wrote:
> While dumping flows, dump flows that were offloaded to
> netdev and parse them back to dpif flow.
>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> ---
>  lib/dpif-netlink.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 179 insertions(+)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 36f2888..3d8940e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -38,6 +38,7 @@
>  #include "flow.h"
>  #include "fat-rwlock.h"
>  #include "netdev.h"
> +#include "netdev-provider.h"
>  #include "netdev-linux.h"
>  #include "netdev-vport.h"
>  #include "netlink-conntrack.h"
> @@ -55,6 +56,7 @@
>  #include "unaligned.h"
>  #include "util.h"
>  #include "openvswitch/vlog.h"
> +#include "openvswitch/match.h"
>
>  VLOG_DEFINE_THIS_MODULE(dpif_netlink);
>  #ifdef _WIN32
> @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX };
>   * missing if we have old headers. */
>  #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
>
> +#define FLOW_DUMP_MAX_BATCH 50
> +
>  struct dpif_netlink_dp {
>      /* Generic Netlink header. */
>      uint8_t cmd;
> @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump {
>      struct dpif_flow_dump up;
>      struct nl_dump nl_dump;
>      atomic_int status;
> +    struct netdev_flow_dump **netdev_dumps;
> +    int netdev_num;
> +    int netdev_given;
> +    struct ovs_mutex netdev_lock;

Could you add a brief comment above these variables that describes
their use? (It's also common in OVS code to mention that, eg,
netdev_lock protects the following elements. )

If there's a more descriptive name than "netdev_num", like
netdev_max_dumps or something then please use that instead. At a
glance, "given" and "num" don't provide particularly much context
about how they relate to each other or to the dump.

>  };
>
>  static struct dpif_netlink_flow_dump *
> @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
>      return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
>  }
>
> +static void start_netdev_dump(const struct dpif *dpif_,
> +                              struct dpif_netlink_flow_dump *dump) {
> +
> +    if (!netdev_flow_api_enabled) {
> +        dump->netdev_num = 0;
> +        return;
> +    }

Typically for style we still place all variable declarations at the
top of a function, in a christmas tree long lines to short lines,
before functional code like this.

> +
> +    struct netdev_list_element *element;
> +    struct ovs_list port_list;
> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
> +    int i = 0;
> +
> +    dump->netdev_dumps =
> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;

Can this be sizeof(dump->netdev_dumps)?

> +    dump->netdev_num = ports;
> +    dump->netdev_given = 0;
> +
> +    LIST_FOR_EACH(element, node, &port_list) {
> +        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
> +        dump->netdev_dumps[i]->port = element->port_no;
> +        i++;
> +    }

As a matter of style, it's easier to see that this loop is bounded by
'ports' (and that number is correct) if it's structured as

for (i = 0; i < ports; i++) {
element = get_next_node;
...
}

Also, it seems that even if the netdev doesn't support flow_dump, we
allocate a netdev_flow_dump and add it to the netdev_dumps here..
perhaps we could/should skip it for these netdevs instead?

> +    netdev_port_list_del(&port_list);
> +
> +    ovs_mutex_init(&dump->netdev_lock);

I don't see a corresponding ovs_mutex_destroy() call for this.

> +}
> +
>  static struct dpif_flow_dump *
>  dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>  {
> @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>      atomic_init(&dump->status, 0);
>      dump->up.terse = terse;
>
> +    start_netdev_dump(dpif_, dump);
> +
>      return &dump->up;
>  }
>
> @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
>      unsigned int nl_status = nl_dump_done(&dump->nl_dump);
>      int dump_status;
>
> +    if (netdev_flow_api_enabled) {
> +        for (int i = 0; i < dump->netdev_num; i++) {
> +            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
> +            if (err != 0 && err != EOPNOTSUPP) {
> +                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
> +            }
> +        }
> +        free(dump->netdev_dumps);
> +    }

You don't really need to check for netdev_flow_api_enabled here;
netdev_num will be 0 if it is disabled, so that for loop turns into a
no-op; then you could initialize dump->netdev_dumps to NULL in that
case and unconditionally free it. It's a bit simpler to read the code
if you don't have to think about whether or not hardware offloads are
enabled.

> +
>      /* No other thread has access to 'dump' at this point. */
>      atomic_read_relaxed(&dump->status, &dump_status);
>      free(dump);
> @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread {
>      struct dpif_flow_stats stats;
>      struct ofpbuf nl_flows;     /* Always used to store flows. */
>      struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
> +    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
> +    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
> +    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
> +    int netdev_cur_dump;
> +    bool netdev_done;

I wonder if it's worthwhile to reuse 'nl_flows' to store all of these
netlink-formatted key/mask/acts instead of having these keybufs? It
seems that it is currently unused for the first half of the
dpif_netlink_flow_dump while the flows are being dumped from the
netdev.

Regardless of the above question, I also question whether
FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many
tc flows will we really get from the kernel at once?

>  };
>
>  static struct dpif_netlink_flow_dump_thread *
> @@ -1429,6 +1482,8 @@ dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
>      thread->dump = dump;
>      ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
>      thread->nl_actions = NULL;
> +    thread->netdev_cur_dump = 0;
> +    thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num);
>
>      return &thread->up;
>  }
> @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
>      dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
>  }
>
> +static void
> +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
> +{
> +    struct dpif_netlink_flow_dump *dump = thread->dump;
> +
> +    ovs_mutex_lock(&dump->netdev_lock);
> +    /* if we haven't finished (dumped everything) */
> +    if (dump->netdev_given < dump->netdev_num) {
> +        /* if we are the first to find that given dump is finished
> +         * (for race condition, e.g 3 finish dump 0 at the same time) */

Why is there a race condition here if this is executed under netdev_lock?

> +        if (thread->netdev_cur_dump == dump->netdev_given) {
> +            thread->netdev_cur_dump = ++dump->netdev_given;
> +            /* did we just finish the last dump? done. */
> +            if (dump->netdev_given == dump->netdev_num) {
> +                thread->netdev_done = true;
> +            }
> +        } else {
> +            /* otherwise, we are behind, catch up */
> +            thread->netdev_cur_dump = dump->netdev_given;
> +        }
> +    } else {
> +        /* some other thread finished */
> +        thread->netdev_done = true;
> +    }
> +    ovs_mutex_unlock(&dump->netdev_lock);
> +}
> +
> +static struct odp_support netdev_flow_support = {
> +    .max_mpls_depth = SIZE_MAX,
> +    .recirc = false,
> +    .ct_state = false,
> +    .ct_zone = false,
> +    .ct_mark = false,
> +    .ct_label = false,
> +};
> +
> +static int
> +dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
> +                                       struct ofpbuf *key_buf,
> +                                       struct ofpbuf *mask_buf,
> +                                       struct nlattr *actions,
> +                                       struct dpif_flow_stats *stats,
> +                                       ovs_u128 *ufid,
> +                                       struct dpif_flow *flow,
> +                                       bool terse OVS_UNUSED)
> +{
> +
> +    struct odp_flow_key_parms odp_parms = {
> +        .flow = &match->flow,
> +        .mask = &match->wc.masks,
> +        .support = netdev_flow_support,

There's also 'key_buf' field in parms that may be needed.

> +    };
> +    size_t offset;
> +
> +    memset(flow, 0, sizeof *flow);
> +
> +    /* Key */
> +    offset = key_buf->size;
> +    flow->key = ofpbuf_tail(key_buf);
> +    odp_flow_key_from_flow(&odp_parms, key_buf);
> +    flow->key_len = key_buf->size - offset;
> +
> +    /* Mask */
> +    offset = mask_buf->size;
> +    flow->mask = ofpbuf_tail(mask_buf);
> +    odp_parms.key_buf = key_buf;
> +    odp_flow_key_from_mask(&odp_parms, mask_buf);
> +    flow->mask_len = mask_buf->size - offset;
> +
> +    /* Actions */
> +    flow->actions = nl_attr_get(actions);
> +    flow->actions_len = nl_attr_get_size(actions);
> +
> +    /* Stats */
> +    memcpy(&flow->stats, stats, sizeof *stats);
> +
> +    /* UFID */
> +    flow->ufid_present = true;
> +    flow->ufid = *ufid;
> +
> +    flow->pmd_id = PMD_ID_NULL;
> +    return 0;
> +}
> +
>  static int
>  dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
>                              struct dpif_flow *flows, int max_flows)
> @@ -1475,11 +1614,51 @@ dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
>      struct dpif_netlink_flow_dump *dump = thread->dump;
>      struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
>      int n_flows;
> +    int i = 0;
>
>      ofpbuf_delete(thread->nl_actions);
>      thread->nl_actions = NULL;
>
>      n_flows = 0;
> +
> +    while (!thread->netdev_done && n_flows < max_flows
> +           && i < FLOW_DUMP_MAX_BATCH) {
> +        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
> +        struct odputil_keybuf *keybuf = &thread->keybuf[i];
> +        struct odputil_keybuf *actbuf = &thread->actbuf[i];
> +        struct ofpbuf key, mask, act;
> +        struct dpif_flow *f = &flows[n_flows];
> +        int cur = thread->netdev_cur_dump;
> +        struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
> +        struct match match;
> +        struct nlattr *actions;
> +        struct dpif_flow_stats stats;
> +        ovs_u128 ufid;
> +        bool has_next;
> +
> +        ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
> +        ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
> +        ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
> +        has_next = netdev_flow_dump_next(netdev_dump, &match,
> +                                        &actions, &stats,
> +                                        &ufid,
> +                                        &thread->nl_flows,
> +                                        &act);
> +        if (has_next) {
> +            dpif_netlink_netdev_match_to_dpif_flow(&match,
> +                                                   &key, &mask,
> +                                                   actions,
> +                                                   &stats,
> +                                                   &ufid,
> +                                                   f,
> +                                                   dump->up.terse);
> +            n_flows++;
> +            i++;

Seems like 'i' and 'n_flows' are trying to achieve the same objective.
Can we just drop 'i'?

> +        } else {
> +            dpif_netlink_advance_netdev_dump(thread);
> +        }
> +    }
> +
>      while (!n_flows
>             || (n_flows < max_flows && thread->nl_flows.size)) {
>          struct dpif_netlink_flow datapath_flow;
> --
> 1.8.3.1
>
Paul Blakey Jan. 10, 2017, 11:45 a.m. UTC | #2
On 05/01/2017 23:27, Joe Stringer wrote:
> On 25 December 2016 at 03:39, Paul Blakey <paulb@mellanox.com> wrote:
>> While dumping flows, dump flows that were offloaded to
>> netdev and parse them back to dpif flow.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>> ---
>>   lib/dpif-netlink.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 179 insertions(+)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 36f2888..3d8940e 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -38,6 +38,7 @@
>>   #include "flow.h"
>>   #include "fat-rwlock.h"
>>   #include "netdev.h"
>> +#include "netdev-provider.h"
>>   #include "netdev-linux.h"
>>   #include "netdev-vport.h"
>>   #include "netlink-conntrack.h"
>> @@ -55,6 +56,7 @@
>>   #include "unaligned.h"
>>   #include "util.h"
>>   #include "openvswitch/vlog.h"
>> +#include "openvswitch/match.h"
>>
>>   VLOG_DEFINE_THIS_MODULE(dpif_netlink);
>>   #ifdef _WIN32
>> @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX };
>>    * missing if we have old headers. */
>>   #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
>>
>> +#define FLOW_DUMP_MAX_BATCH 50
>> +
>>   struct dpif_netlink_dp {
>>       /* Generic Netlink header. */
>>       uint8_t cmd;
>> @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump {
>>       struct dpif_flow_dump up;
>>       struct nl_dump nl_dump;
>>       atomic_int status;
>> +    struct netdev_flow_dump **netdev_dumps;
>> +    int netdev_num;
>> +    int netdev_given;
>> +    struct ovs_mutex netdev_lock;
> Could you add a brief comment above these variables that describes
> their use? (It's also common in OVS code to mention that, eg,
> netdev_lock protects the following elements. )
>
> If there's a more descriptive name than "netdev_num", like
> netdev_max_dumps or something then please use that instead. At a
> glance, "given" and "num" don't provide particularly much context
> about how they relate to each other or to the dump.
sure, thanks.
>
>>   };
>>
>>   static struct dpif_netlink_flow_dump *
>> @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
>>       return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
>>   }
>>
>> +static void start_netdev_dump(const struct dpif *dpif_,
>> +                              struct dpif_netlink_flow_dump *dump) {
>> +
>> +    if (!netdev_flow_api_enabled) {
>> +        dump->netdev_num = 0;
>> +        return;
>> +    }
> Typically for style we still place all variable declarations at the
> top of a function, in a christmas tree long lines to short lines,
> before functional code like this.
>
>> +
>> +    struct netdev_list_element *element;
>> +    struct ovs_list port_list;
>> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
>> +    int i = 0;
>> +
>> +    dump->netdev_dumps =
>> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;
> Can this be sizeof(dump->netdev_dumps)?
Do you mean sizeof(*dump-netdev_dumps), or 
sizeof(dump->netdev_dumps[0]), if so yes.

>> +    dump->netdev_num = ports;
>> +    dump->netdev_given = 0;
>> +
>> +    LIST_FOR_EACH(element, node, &port_list) {
>> +        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
>> +        dump->netdev_dumps[i]->port = element->port_no;
>> +        i++;
>> +    }
> As a matter of style, it's easier to see that this loop is bounded by
> 'ports' (and that number is correct) if it's structured as
>
> for (i = 0; i < ports; i++) {
> element = get_next_node;
> ...
> }
>
> Also, it seems that even if the netdev doesn't support flow_dump, we
> allocate a netdev_flow_dump and add it to the netdev_dumps here..
> perhaps we could/should skip it for these netdevs instead?
>
>> +    netdev_port_list_del(&port_list);
>> +
>> +    ovs_mutex_init(&dump->netdev_lock);
> I don't see a corresponding ovs_mutex_destroy() call for this.
Good catch, thanks.
>> +}
>> +
>>   static struct dpif_flow_dump *
>>   dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>   {
>> @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>       atomic_init(&dump->status, 0);
>>       dump->up.terse = terse;
>>
>> +    start_netdev_dump(dpif_, dump);
>> +
>>       return &dump->up;
>>   }
>>
>> @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
>>       unsigned int nl_status = nl_dump_done(&dump->nl_dump);
>>       int dump_status;
>>
>> +    if (netdev_flow_api_enabled) {
>> +        for (int i = 0; i < dump->netdev_num; i++) {
>> +            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
>> +            if (err != 0 && err != EOPNOTSUPP) {
>> +                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
>> +            }
>> +        }
>> +        free(dump->netdev_dumps);
>> +    }
> You don't really need to check for netdev_flow_api_enabled here;
> netdev_num will be 0 if it is disabled, so that for loop turns into a
> no-op; then you could initialize dump->netdev_dumps to NULL in that
> case and unconditionally free it. It's a bit simpler to read the code
> if you don't have to think about whether or not hardware offloads are
> enabled.
>
>> +
>>       /* No other thread has access to 'dump' at this point. */
>>       atomic_read_relaxed(&dump->status, &dump_status);
>>       free(dump);
>> @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread {
>>       struct dpif_flow_stats stats;
>>       struct ofpbuf nl_flows;     /* Always used to store flows. */
>>       struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
>> +    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
>> +    int netdev_cur_dump;
>> +    bool netdev_done;
> I wonder if it's worthwhile to reuse 'nl_flows' to store all of these
> netlink-formatted key/mask/acts instead of having these keybufs? It
> seems that it is currently unused for the first half of the
> dpif_netlink_flow_dump while the flows are being dumped from the
> netdev.
>
> Regardless of the above question, I also question whether
> FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many
> tc flows will we really get from the kernel at once?
>
>>   };
>>
>>   static struct dpif_netlink_flow_dump_thread *
>> @@ -1429,6 +1482,8 @@ dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
>>       thread->dump = dump;
>>       ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
>>       thread->nl_actions = NULL;
>> +    thread->netdev_cur_dump = 0;
>> +    thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num);
>>
>>       return &thread->up;
>>   }
>> @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
>>       dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
>>   }
>>
>> +static void
>> +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
>> +{
>> +    struct dpif_netlink_flow_dump *dump = thread->dump;
>> +
>> +    ovs_mutex_lock(&dump->netdev_lock);
>> +    /* if we haven't finished (dumped everything) */
>> +    if (dump->netdev_given < dump->netdev_num) {
>> +        /* if we are the first to find that given dump is finished
>> +         * (for race condition, e.g 3 finish dump 0 at the same time) */
> Why is there a race condition here if this is executed under netdev_lock?
The design is such that all threads are working together on the first 
dump to the last, in order. (at first they all on dump 0),
and when one thread finds that the given dump is finished, they all move 
to the next.
As the comment tried to explain, if 3 (or 2+) threads are working on the 
first dump, dump 0,
(thread->netdev_cur_dump == 0) and finish at the same time, they all 
call advance func.
Now the first one to get the lock advances the shared given dump, which 
signify which highest dump we have given
(and all lower dumps have finished). The rest now enter and we check if 
the dump they have found to be
finished is higher then the new one that was given, if not they catch 
up, so now all of them will work on dump 1.

The race is that if 2 or more threads worked on the same dump and 
finished at the same time,
if we just increased netdev_given without checking (thread->cur == 
given) for both of them,
we would have increased given twice and skip one dump.

>
>> +        if (thread->netdev_cur_dump == dump->netdev_given) {
>> +            thread->netdev_cur_dump = ++dump->netdev_given;
>> +            /* did we just finish the last dump? done. */
>> +            if (dump->netdev_given == dump->netdev_num) {
>> +                thread->netdev_done = true;
>> +            }
>> +        } else {
>> +            /* otherwise, we are behind, catch up */
>> +            thread->netdev_cur_dump = dump->netdev_given;
>> +        }
>> +    } else {
>> +        /* some other thread finished */
>> +        thread->netdev_done = true;
>> +    }
>> +    ovs_mutex_unlock(&dump->netdev_lock);
>> +}
>> +
>> +static struct odp_support netdev_flow_support = {
>> +    .max_mpls_depth = SIZE_MAX,
>> +    .recirc = false,
>> +    .ct_state = false,
>> +    .ct_zone = false,
>> +    .ct_mark = false,
>> +    .ct_label = false,
>> +};
>> +
>> +static int
>> +dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
>> +                                       struct ofpbuf *key_buf,
>> +                                       struct ofpbuf *mask_buf,
>> +                                       struct nlattr *actions,
>> +                                       struct dpif_flow_stats *stats,
>> +                                       ovs_u128 *ufid,
>> +                                       struct dpif_flow *flow,
>> +                                       bool terse OVS_UNUSED)
>> +{
>> +
>> +    struct odp_flow_key_parms odp_parms = {
>> +        .flow = &match->flow,
>> +        .mask = &match->wc.masks,
>> +        .support = netdev_flow_support,
> There's also 'key_buf' field in parms that may be needed.
>
>> +    };
>> +    size_t offset;
>> +
>> +    memset(flow, 0, sizeof *flow);
>> +
>> +    /* Key */
>> +    offset = key_buf->size;
>> +    flow->key = ofpbuf_tail(key_buf);
>> +    odp_flow_key_from_flow(&odp_parms, key_buf);
>> +    flow->key_len = key_buf->size - offset;
>> +
>> +    /* Mask */
>> +    offset = mask_buf->size;
>> +    flow->mask = ofpbuf_tail(mask_buf);
>> +    odp_parms.key_buf = key_buf;
>> +    odp_flow_key_from_mask(&odp_parms, mask_buf);
>> +    flow->mask_len = mask_buf->size - offset;
>> +
>> +    /* Actions */
>> +    flow->actions = nl_attr_get(actions);
>> +    flow->actions_len = nl_attr_get_size(actions);
>> +
>> +    /* Stats */
>> +    memcpy(&flow->stats, stats, sizeof *stats);
>> +
>> +    /* UFID */
>> +    flow->ufid_present = true;
>> +    flow->ufid = *ufid;
>> +
>> +    flow->pmd_id = PMD_ID_NULL;
>> +    return 0;
>> +}
>> +
>>   static int
>>   dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
>>                               struct dpif_flow *flows, int max_flows)
>> @@ -1475,11 +1614,51 @@ dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
>>       struct dpif_netlink_flow_dump *dump = thread->dump;
>>       struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
>>       int n_flows;
>> +    int i = 0;
>>
>>       ofpbuf_delete(thread->nl_actions);
>>       thread->nl_actions = NULL;
>>
>>       n_flows = 0;
>> +
>> +    while (!thread->netdev_done && n_flows < max_flows
>> +           && i < FLOW_DUMP_MAX_BATCH) {
>> +        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
>> +        struct odputil_keybuf *keybuf = &thread->keybuf[i];
>> +        struct odputil_keybuf *actbuf = &thread->actbuf[i];
>> +        struct ofpbuf key, mask, act;
>> +        struct dpif_flow *f = &flows[n_flows];
>> +        int cur = thread->netdev_cur_dump;
>> +        struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
>> +        struct match match;
>> +        struct nlattr *actions;
>> +        struct dpif_flow_stats stats;
>> +        ovs_u128 ufid;
>> +        bool has_next;
>> +
>> +        ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
>> +        ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
>> +        ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
>> +        has_next = netdev_flow_dump_next(netdev_dump, &match,
>> +                                        &actions, &stats,
>> +                                        &ufid,
>> +                                        &thread->nl_flows,
>> +                                        &act);
>> +        if (has_next) {
>> +            dpif_netlink_netdev_match_to_dpif_flow(&match,
>> +                                                   &key, &mask,
>> +                                                   actions,
>> +                                                   &stats,
>> +                                                   &ufid,
>> +                                                   f,
>> +                                                   dump->up.terse);
>> +            n_flows++;
>> +            i++;
> Seems like 'i' and 'n_flows' are trying to achieve the same objective.
> Can we just drop 'i'?
>
>> +        } else {
>> +            dpif_netlink_advance_netdev_dump(thread);
>> +        }
>> +    }
>> +
>>       while (!n_flows
>>              || (n_flows < max_flows && thread->nl_flows.size)) {
>>           struct dpif_netlink_flow datapath_flow;
>> --
>> 1.8.3.1
>>
Joe Stringer Jan. 10, 2017, 9:41 p.m. UTC | #3
On 10 January 2017 at 03:45, Paul Blakey <paulb@mellanox.com> wrote:
>>> +    struct netdev_list_element *element;
>>> +    struct ovs_list port_list;
>>> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class,
>>> &port_list);
>>> +    int i = 0;
>>> +
>>> +    dump->netdev_dumps =
>>> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;
>>
>> Can this be sizeof(dump->netdev_dumps)?
>
> Do you mean sizeof(*dump-netdev_dumps), or sizeof(dump->netdev_dumps[0]), if
> so yes.

Yes that's what I meant.

<snip>

>>> @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif,
>>> struct dpif_flow *dpif_flow,
>>>       dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
>>>   }
>>>
>>> +static void
>>> +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread
>>> *thread)
>>> +{
>>> +    struct dpif_netlink_flow_dump *dump = thread->dump;
>>> +
>>> +    ovs_mutex_lock(&dump->netdev_lock);
>>> +    /* if we haven't finished (dumped everything) */
>>> +    if (dump->netdev_given < dump->netdev_num) {
>>> +        /* if we are the first to find that given dump is finished
>>> +         * (for race condition, e.g 3 finish dump 0 at the same time) */
>>
>> Why is there a race condition here if this is executed under netdev_lock?
>
> The design is such that all threads are working together on the first dump
> to the last, in order. (at first they all on dump 0),
> and when one thread finds that the given dump is finished, they all move to
> the next.
> As the comment tried to explain, if 3 (or 2+) threads are working on the
> first dump, dump 0,
> (thread->netdev_cur_dump == 0) and finish at the same time, they all call
> advance func.
> Now the first one to get the lock advances the shared given dump, which
> signify which highest dump we have given
> (and all lower dumps have finished). The rest now enter and we check if the
> dump they have found to be
> finished is higher then the new one that was given, if not they catch up, so
> now all of them will work on dump 1.
>
> The race is that if 2 or more threads worked on the same dump and finished
> at the same time,
> if we just increased netdev_given without checking (thread->cur == given)
> for both of them,
> we would have increased given twice and skip one dump.

Thanks for the explanation. I think that the code would benefit from
having this written somewhere - for instance, above the
dpif_netlink_advance_netdev_dump() function.
Paul Blakey Jan. 22, 2017, 4:08 p.m. UTC | #4
On 05/01/2017 23:27, Joe Stringer wrote:
> On 25 December 2016 at 03:39, Paul Blakey <paulb@mellanox.com> wrote:
>> While dumping flows, dump flows that were offloaded to
>> netdev and parse them back to dpif flow.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Reviewed-by: Roi Dayan <roid@mellanox.com>
>> ---
>>  lib/dpif-netlink.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 179 insertions(+)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 36f2888..3d8940e 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -38,6 +38,7 @@
>>  #include "flow.h"
>>  #include "fat-rwlock.h"
>>  #include "netdev.h"
>> +#include "netdev-provider.h"
>>  #include "netdev-linux.h"
>>  #include "netdev-vport.h"
>>  #include "netlink-conntrack.h"
>> @@ -55,6 +56,7 @@
>>  #include "unaligned.h"
>>  #include "util.h"
>>  #include "openvswitch/vlog.h"
>> +#include "openvswitch/match.h"
>>
>>  VLOG_DEFINE_THIS_MODULE(dpif_netlink);
>>  #ifdef _WIN32
>> @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX };
>>   * missing if we have old headers. */
>>  #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
>>
>> +#define FLOW_DUMP_MAX_BATCH 50
>> +
>>  struct dpif_netlink_dp {
>>      /* Generic Netlink header. */
>>      uint8_t cmd;
>> @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump {
>>      struct dpif_flow_dump up;
>>      struct nl_dump nl_dump;
>>      atomic_int status;
>> +    struct netdev_flow_dump **netdev_dumps;
>> +    int netdev_num;
>> +    int netdev_given;
>> +    struct ovs_mutex netdev_lock;
>
> Could you add a brief comment above these variables that describes
> their use? (It's also common in OVS code to mention that, eg,
> netdev_lock protects the following elements. )
>
> If there's a more descriptive name than "netdev_num", like
> netdev_max_dumps or something then please use that instead. At a
> glance, "given" and "num" don't provide particularly much context
> about how they relate to each other or to the dump.
>
>>  };
>>
>>  static struct dpif_netlink_flow_dump *
>> @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
>>      return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
>>  }
>>
>> +static void start_netdev_dump(const struct dpif *dpif_,
>> +                              struct dpif_netlink_flow_dump *dump) {
>> +
>> +    if (!netdev_flow_api_enabled) {
>> +        dump->netdev_num = 0;
>> +        return;
>> +    }
>
> Typically for style we still place all variable declarations at the
> top of a function, in a christmas tree long lines to short lines,
> before functional code like this.
>
>> +
>> +    struct netdev_list_element *element;
>> +    struct ovs_list port_list;
>> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
>> +    int i = 0;
>> +
>> +    dump->netdev_dumps =
>> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;
>
> Can this be sizeof(dump->netdev_dumps)?
>
>> +    dump->netdev_num = ports;
>> +    dump->netdev_given = 0;
>> +
>> +    LIST_FOR_EACH(element, node, &port_list) {
>> +        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
>> +        dump->netdev_dumps[i]->port = element->port_no;
>> +        i++;
>> +    }
>
> As a matter of style, it's easier to see that this loop is bounded by
> 'ports' (and that number is correct) if it's structured as
>
> for (i = 0; i < ports; i++) {
> element = get_next_node;
> ...
> }
>
> Also, it seems that even if the netdev doesn't support flow_dump, we
> allocate a netdev_flow_dump and add it to the netdev_dumps here..
> perhaps we could/should skip it for these netdevs instead?
>
>> +    netdev_port_list_del(&port_list);
>> +
>> +    ovs_mutex_init(&dump->netdev_lock);
>
> I don't see a corresponding ovs_mutex_destroy() call for this.
>
>> +}
>> +
>>  static struct dpif_flow_dump *
>>  dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>  {
>> @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>      atomic_init(&dump->status, 0);
>>      dump->up.terse = terse;
>>
>> +    start_netdev_dump(dpif_, dump);
>> +
>>      return &dump->up;
>>  }
>>
>> @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
>>      unsigned int nl_status = nl_dump_done(&dump->nl_dump);
>>      int dump_status;
>>
>> +    if (netdev_flow_api_enabled) {
>> +        for (int i = 0; i < dump->netdev_num; i++) {
>> +            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
>> +            if (err != 0 && err != EOPNOTSUPP) {
>> +                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
>> +            }
>> +        }
>> +        free(dump->netdev_dumps);
>> +    }
>
> You don't really need to check for netdev_flow_api_enabled here;
> netdev_num will be 0 if it is disabled, so that for loop turns into a
> no-op; then you could initialize dump->netdev_dumps to NULL in that
> case and unconditionally free it. It's a bit simpler to read the code
> if you don't have to think about whether or not hardware offloads are
> enabled.
>
>> +
>>      /* No other thread has access to 'dump' at this point. */
>>      atomic_read_relaxed(&dump->status, &dump_status);
>>      free(dump);
>> @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread {
>>      struct dpif_flow_stats stats;
>>      struct ofpbuf nl_flows;     /* Always used to store flows. */
>>      struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
>> +    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
>> +    int netdev_cur_dump;
>> +    bool netdev_done;
>
> I wonder if it's worthwhile to reuse 'nl_flows' to store all of these
> netlink-formatted key/mask/acts instead of having these keybufs? It
> seems that it is currently unused for the first half of the
> dpif_netlink_flow_dump while the flows are being dumped from the
> netdev.
>
> Regardless of the above question, I also question whether
> FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many
> tc flows will we really get from the kernel at once?

Hi,
Unlike dpif_netlink dumping nl_flows, which already gets the correct 
netlink stream format and can just take pointers to it, We need a 
temporary buffers to store the conversion from struct match to dpif_flow 
key/mask/actions netlink attributes.
So that's a KEY_SIZE*2*DUMP_MAX (size*(key+mask)*max) right there, same 
with actions (but probably can be smaller). We can't reuse nl_flows for 
that because that is used for nl_dump_next to keep the buffer filled 
with multiple replies.


The flow described in kernel datapath netlink format and tc format is 
should be about the same size but tc doesn't support getting only stats 
(terse). NL_DUMP_BUFSIZE should be enough for
~30 flows (20bytes for struct tcmsg, 32 for macs & masks, 12 for 
eth_type and ip_proto,  12 per port, actions and stats.... not all are 
present), But don't nl_dump_next refills it anyways (won't just stop at
buffer size).


<snip>
diff mbox

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 36f2888..3d8940e 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -38,6 +38,7 @@ 
 #include "flow.h"
 #include "fat-rwlock.h"
 #include "netdev.h"
+#include "netdev-provider.h"
 #include "netdev-linux.h"
 #include "netdev-vport.h"
 #include "netlink-conntrack.h"
@@ -55,6 +56,7 @@ 
 #include "unaligned.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
+#include "openvswitch/match.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_netlink);
 #ifdef _WIN32
@@ -68,6 +70,8 @@  enum { MAX_PORTS = USHRT_MAX };
  * missing if we have old headers. */
 #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
 
+#define FLOW_DUMP_MAX_BATCH 50
+
 struct dpif_netlink_dp {
     /* Generic Netlink header. */
     uint8_t cmd;
@@ -1355,6 +1359,10 @@  struct dpif_netlink_flow_dump {
     struct dpif_flow_dump up;
     struct nl_dump nl_dump;
     atomic_int status;
+    struct netdev_flow_dump **netdev_dumps;
+    int netdev_num;
+    int netdev_given;
+    struct ovs_mutex netdev_lock;
 };
 
 static struct dpif_netlink_flow_dump *
@@ -1363,6 +1371,34 @@  dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
     return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
 }
 
+static void start_netdev_dump(const struct dpif *dpif_,
+                              struct dpif_netlink_flow_dump *dump) {
+
+    if (!netdev_flow_api_enabled) {
+        dump->netdev_num = 0;
+        return;
+    }
+
+    struct netdev_list_element *element;
+    struct ovs_list port_list;
+    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
+    int i = 0;
+
+    dump->netdev_dumps =
+        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;
+    dump->netdev_num = ports;
+    dump->netdev_given = 0;
+
+    LIST_FOR_EACH(element, node, &port_list) {
+        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
+        dump->netdev_dumps[i]->port = element->port_no;
+        i++;
+    }
+    netdev_port_list_del(&port_list);
+
+    ovs_mutex_init(&dump->netdev_lock);
+}
+
 static struct dpif_flow_dump *
 dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
 {
@@ -1387,6 +1423,8 @@  dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
     atomic_init(&dump->status, 0);
     dump->up.terse = terse;
 
+    start_netdev_dump(dpif_, dump);
+
     return &dump->up;
 }
 
@@ -1397,6 +1435,16 @@  dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
     unsigned int nl_status = nl_dump_done(&dump->nl_dump);
     int dump_status;
 
+    if (netdev_flow_api_enabled) {
+        for (int i = 0; i < dump->netdev_num; i++) {
+            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
+            if (err != 0 && err != EOPNOTSUPP) {
+                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
+            }
+        }
+        free(dump->netdev_dumps);
+    }
+
     /* No other thread has access to 'dump' at this point. */
     atomic_read_relaxed(&dump->status, &dump_status);
     free(dump);
@@ -1410,6 +1458,11 @@  struct dpif_netlink_flow_dump_thread {
     struct dpif_flow_stats stats;
     struct ofpbuf nl_flows;     /* Always used to store flows. */
     struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
+    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
+    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
+    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
+    int netdev_cur_dump;
+    bool netdev_done;
 };
 
 static struct dpif_netlink_flow_dump_thread *
@@ -1429,6 +1482,8 @@  dpif_netlink_flow_dump_thread_create(struct dpif_flow_dump *dump_)
     thread->dump = dump;
     ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
     thread->nl_actions = NULL;
+    thread->netdev_cur_dump = 0;
+    thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num);
 
     return &thread->up;
 }
@@ -1466,6 +1521,90 @@  dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, struct dpif_flow *dpif_flow,
     dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
 }
 
+static void
+dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread *thread)
+{
+    struct dpif_netlink_flow_dump *dump = thread->dump;
+
+    ovs_mutex_lock(&dump->netdev_lock);
+    /* if we haven't finished (dumped everything) */
+    if (dump->netdev_given < dump->netdev_num) {
+        /* if we are the first to find that given dump is finished
+         * (for race condition, e.g 3 finish dump 0 at the same time) */
+        if (thread->netdev_cur_dump == dump->netdev_given) {
+            thread->netdev_cur_dump = ++dump->netdev_given;
+            /* did we just finish the last dump? done. */
+            if (dump->netdev_given == dump->netdev_num) {
+                thread->netdev_done = true;
+            }
+        } else {
+            /* otherwise, we are behind, catch up */
+            thread->netdev_cur_dump = dump->netdev_given;
+        }
+    } else {
+        /* some other thread finished */
+        thread->netdev_done = true;
+    }
+    ovs_mutex_unlock(&dump->netdev_lock);
+}
+
+static struct odp_support netdev_flow_support = {
+    .max_mpls_depth = SIZE_MAX,
+    .recirc = false,
+    .ct_state = false,
+    .ct_zone = false,
+    .ct_mark = false,
+    .ct_label = false,
+};
+
+static int
+dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
+                                       struct ofpbuf *key_buf,
+                                       struct ofpbuf *mask_buf,
+                                       struct nlattr *actions,
+                                       struct dpif_flow_stats *stats,
+                                       ovs_u128 *ufid,
+                                       struct dpif_flow *flow,
+                                       bool terse OVS_UNUSED)
+{
+
+    struct odp_flow_key_parms odp_parms = {
+        .flow = &match->flow,
+        .mask = &match->wc.masks,
+        .support = netdev_flow_support,
+    };
+    size_t offset;
+
+    memset(flow, 0, sizeof *flow);
+
+    /* Key */
+    offset = key_buf->size;
+    flow->key = ofpbuf_tail(key_buf);
+    odp_flow_key_from_flow(&odp_parms, key_buf);
+    flow->key_len = key_buf->size - offset;
+
+    /* Mask */
+    offset = mask_buf->size;
+    flow->mask = ofpbuf_tail(mask_buf);
+    odp_parms.key_buf = key_buf;
+    odp_flow_key_from_mask(&odp_parms, mask_buf);
+    flow->mask_len = mask_buf->size - offset;
+
+    /* Actions */
+    flow->actions = nl_attr_get(actions);
+    flow->actions_len = nl_attr_get_size(actions);
+
+    /* Stats */
+    memcpy(&flow->stats, stats, sizeof *stats);
+
+    /* UFID */
+    flow->ufid_present = true;
+    flow->ufid = *ufid;
+
+    flow->pmd_id = PMD_ID_NULL;
+    return 0;
+}
+
 static int
 dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
                             struct dpif_flow *flows, int max_flows)
@@ -1475,11 +1614,51 @@  dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
     struct dpif_netlink_flow_dump *dump = thread->dump;
     struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
     int n_flows;
+    int i = 0;
 
     ofpbuf_delete(thread->nl_actions);
     thread->nl_actions = NULL;
 
     n_flows = 0;
+
+    while (!thread->netdev_done && n_flows < max_flows
+           && i < FLOW_DUMP_MAX_BATCH) {
+        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
+        struct odputil_keybuf *keybuf = &thread->keybuf[i];
+        struct odputil_keybuf *actbuf = &thread->actbuf[i];
+        struct ofpbuf key, mask, act;
+        struct dpif_flow *f = &flows[n_flows];
+        int cur = thread->netdev_cur_dump;
+        struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
+        struct match match;
+        struct nlattr *actions;
+        struct dpif_flow_stats stats;
+        ovs_u128 ufid;
+        bool has_next;
+
+        ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
+        ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
+        ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
+        has_next = netdev_flow_dump_next(netdev_dump, &match,
+                                        &actions, &stats,
+                                        &ufid,
+                                        &thread->nl_flows,
+                                        &act);
+        if (has_next) {
+            dpif_netlink_netdev_match_to_dpif_flow(&match,
+                                                   &key, &mask,
+                                                   actions,
+                                                   &stats,
+                                                   &ufid,
+                                                   f,
+                                                   dump->up.terse);
+            n_flows++;
+            i++;
+        } else {
+            dpif_netlink_advance_netdev_dump(thread);
+        }
+    }
+
     while (!n_flows
            || (n_flows < max_flows && thread->nl_flows.size)) {
         struct dpif_netlink_flow datapath_flow;