Message ID | 1571135440-24313-6-git-send-email-xiangxia.m.yue@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | optimize openvswitch flow looking up | expand |
On Wed, Oct 16, 2019 at 5:54 AM <xiangxia.m.yue@gmail.com> wrote: > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > The full looking up on flow table traverses all mask array. > If mask-array is too large, the number of invalid flow-mask > increase, performance will be drop. > > One bad case, for example: M means flow-mask is valid and NULL > of flow-mask means deleted. > > +-------------------------------------------+ > | M | NULL | ... | NULL | M| > +-------------------------------------------+ > > In that case, without this patch, openvswitch will traverses all > mask array, because there will be one flow-mask in the tail. This > patch changes the way of flow-mask inserting and deleting, and the > mask array will be keep as below: there is not a NULL hole. In the > fast path, we can "break" "for" (not "continue") in flow_lookup > when we get a NULL flow-mask. > > "break" > v > +-------------------------------------------+ > | M | M | NULL |... | NULL | NULL| > +-------------------------------------------+ > > This patch don't optimize slow or control path, still using ma->max > to traverse. Slow path: > * tbl_mask_array_realloc > * ovs_flow_tbl_lookup_exact > * flow_mask_find Hi Tonghao, Does this improve performance? After all, the original code simply check whether the mask is NULL, then goto next mask. However, with your patch, isn't this invalidated the mask cache entry which point to the "M" you swap to the front? See my commands inline. > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > Tested-by: Greg Rose <gvrose8192@gmail.com> > --- > net/openvswitch/flow_table.c | 101 ++++++++++++++++++++++--------------------- > 1 file changed, 52 insertions(+), 49 deletions(-) > > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c > index 8d4f50d..a10d421 100644 > --- a/net/openvswitch/flow_table.c > +++ b/net/openvswitch/flow_table.c > @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, > > mask = rcu_dereference_ovsl(ma->masks[i]); > if (!mask) > - continue; > + break; > > flow = masked_flow_lookup(ti, key, mask, n_mask_hit); > if (flow) { /* Found */ > @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, > int ovs_flow_tbl_num_masks(const struct flow_table *table) > { > struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); > - return ma->count; > + return READ_ONCE(ma->count); > } > > static struct table_instance *table_instance_expand(struct table_instance *ti, > @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, > return table_instance_rehash(ti, ti->n_buckets * 2, ufid); > } > > -static void tbl_mask_array_delete_mask(struct mask_array *ma, > - struct sw_flow_mask *mask) > +static void tbl_mask_array_del_mask(struct flow_table *tbl, > + struct sw_flow_mask *mask) > { > - int i; > + struct mask_array *ma = ovsl_dereference(tbl->mask_array); > + int i, ma_count = READ_ONCE(ma->count); > > /* Remove the deleted mask pointers from the array */ > - for (i = 0; i < ma->max; i++) { > - if (mask == ovsl_dereference(ma->masks[i])) { > - RCU_INIT_POINTER(ma->masks[i], NULL); > - ma->count--; > - kfree_rcu(mask, rcu); > - return; > - } > + for (i = 0; i < ma_count; i++) { > + if (mask == ovsl_dereference(ma->masks[i])) > + goto found; > } > + > BUG(); > + return; > + > +found: > + WRITE_ONCE(ma->count, ma_count -1); > + > + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); > + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); So when you swap the ma->masks[ma_count -1], the mask cache entry who's 'mask_index == ma_count' become all invalid? Regards, William <snip>
On Sat, Oct 19, 2019 at 7:27 AM William Tu <u9012063@gmail.com> wrote: > > On Wed, Oct 16, 2019 at 5:54 AM <xiangxia.m.yue@gmail.com> wrote: > > > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > > > The full looking up on flow table traverses all mask array. > > If mask-array is too large, the number of invalid flow-mask > > increase, performance will be drop. > > > > One bad case, for example: M means flow-mask is valid and NULL > > of flow-mask means deleted. > > > > +-------------------------------------------+ > > | M | NULL | ... | NULL | M| > > +-------------------------------------------+ > > > > In that case, without this patch, openvswitch will traverses all > > mask array, because there will be one flow-mask in the tail. This > > patch changes the way of flow-mask inserting and deleting, and the > > mask array will be keep as below: there is not a NULL hole. In the > > fast path, we can "break" "for" (not "continue") in flow_lookup > > when we get a NULL flow-mask. > > > > "break" > > v > > +-------------------------------------------+ > > | M | M | NULL |... | NULL | NULL| > > +-------------------------------------------+ > > > > This patch don't optimize slow or control path, still using ma->max > > to traverse. Slow path: > > * tbl_mask_array_realloc > > * ovs_flow_tbl_lookup_exact > > * flow_mask_find > > > Hi Tonghao, > > Does this improve performance? After all, the original code simply > check whether the mask is NULL, then goto next mask. I tested the performance, but I disable the mask cache, and use the dpdk-pktgen to generate packets: The test ovs flow: ovs-dpctl add-dp system@ovs-system ovs-dpctl add-if system@ovs-system eth6 ovs-dpctl add-if system@ovs-system eth7 for m in $(seq 1 100 | xargs printf '%.2x\n'); do ovs-dpctl add-flow ovs-system "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" 2 done ovs-dpctl add-flow ovs-system "in_port(1),eth(dst=98:03:9b:6e:4a:f5/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 2 ovs-dpctl add-flow ovs-system "in_port(2),eth(dst=98:03:9b:6e:4a:f4/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 1 for m in $(seq 101 160 | xargs printf '%.2x\n'); do ovs-dpctl add-flow ovs-system "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" 2 done for m in $(seq 1 100 | xargs printf '%.2x\n'); do ovs-dpctl del-flow ovs-system "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" done Without this patch: 982481pps (64B) With this patch: 1112495 pps (64B), about 13% improve > However, with your patch, isn't this invalidated the mask cache entry which > point to the "M" you swap to the front? See my commands inline. > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > Tested-by: Greg Rose <gvrose8192@gmail.com> > > --- > > net/openvswitch/flow_table.c | 101 ++++++++++++++++++++++--------------------- > > 1 file changed, 52 insertions(+), 49 deletions(-) > > > > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c > > index 8d4f50d..a10d421 100644 > > --- a/net/openvswitch/flow_table.c > > +++ b/net/openvswitch/flow_table.c > > @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, > > > > mask = rcu_dereference_ovsl(ma->masks[i]); > > if (!mask) > > - continue; > > + break; > > > > flow = masked_flow_lookup(ti, key, mask, n_mask_hit); > > if (flow) { /* Found */ > > @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, > > int ovs_flow_tbl_num_masks(const struct flow_table *table) > > { > > struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); > > - return ma->count; > > + return READ_ONCE(ma->count); > > } > > > > static struct table_instance *table_instance_expand(struct table_instance *ti, > > @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, > > return table_instance_rehash(ti, ti->n_buckets * 2, ufid); > > } > > > > -static void tbl_mask_array_delete_mask(struct mask_array *ma, > > - struct sw_flow_mask *mask) > > +static void tbl_mask_array_del_mask(struct flow_table *tbl, > > + struct sw_flow_mask *mask) > > { > > - int i; > > + struct mask_array *ma = ovsl_dereference(tbl->mask_array); > > + int i, ma_count = READ_ONCE(ma->count); > > > > /* Remove the deleted mask pointers from the array */ > > - for (i = 0; i < ma->max; i++) { > > - if (mask == ovsl_dereference(ma->masks[i])) { > > - RCU_INIT_POINTER(ma->masks[i], NULL); > > - ma->count--; > > - kfree_rcu(mask, rcu); > > - return; > > - } > > + for (i = 0; i < ma_count; i++) { > > + if (mask == ovsl_dereference(ma->masks[i])) > > + goto found; > > } > > + > > BUG(); > > + return; > > + > > +found: > > + WRITE_ONCE(ma->count, ma_count -1); > > + > > + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); > > + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); > > So when you swap the ma->masks[ma_count -1], the mask cache entry > who's 'mask_index == ma_count' become all invalid? Yes, a little tricky. > Regards, > William > > <snip>
> > Hi Tonghao, > > > > Does this improve performance? After all, the original code simply > > check whether the mask is NULL, then goto next mask. > I tested the performance, but I disable the mask cache, and use the > dpdk-pktgen to generate packets: > The test ovs flow: > ovs-dpctl add-dp system@ovs-system > ovs-dpctl add-if system@ovs-system eth6 > ovs-dpctl add-if system@ovs-system eth7 > > for m in $(seq 1 100 | xargs printf '%.2x\n'); do > ovs-dpctl add-flow ovs-system > "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" > 2 > done > > ovs-dpctl add-flow ovs-system > "in_port(1),eth(dst=98:03:9b:6e:4a:f5/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" > 2 > ovs-dpctl add-flow ovs-system > "in_port(2),eth(dst=98:03:9b:6e:4a:f4/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" > 1 > > for m in $(seq 101 160 | xargs printf '%.2x\n'); do > ovs-dpctl add-flow ovs-system > "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" > 2 > done > > for m in $(seq 1 100 | xargs printf '%.2x\n'); do > ovs-dpctl del-flow ovs-system > "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)" > done > > Without this patch: 982481pps (64B) > With this patch: 1112495 pps (64B), about 13% improve > Hi Tonghao, Thanks for doing the measurement. Based on the result (skipping 100 NULL mask lookup with 13% improvement), and with additional overhead of mask cache being invalidate while refilling these 100 gap, I'd argue that this patch is not necessary. But let's wait for others comments. Regards, William > > However, with your patch, isn't this invalidated the mask cache entry which > > point to the "M" you swap to the front? See my commands inline. > > > > > > > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com> > > > Tested-by: Greg Rose <gvrose8192@gmail.com> > > > --- <snip> > > > static struct table_instance *table_instance_expand(struct table_instance *ti, > > > @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, > > > return table_instance_rehash(ti, ti->n_buckets * 2, ufid); > > > } > > > > > > -static void tbl_mask_array_delete_mask(struct mask_array *ma, > > > - struct sw_flow_mask *mask) > > > +static void tbl_mask_array_del_mask(struct flow_table *tbl, > > > + struct sw_flow_mask *mask) > > > { > > > - int i; > > > + struct mask_array *ma = ovsl_dereference(tbl->mask_array); > > > + int i, ma_count = READ_ONCE(ma->count); > > > > > > /* Remove the deleted mask pointers from the array */ > > > - for (i = 0; i < ma->max; i++) { > > > - if (mask == ovsl_dereference(ma->masks[i])) { > > > - RCU_INIT_POINTER(ma->masks[i], NULL); > > > - ma->count--; > > > - kfree_rcu(mask, rcu); > > > - return; > > > - } > > > + for (i = 0; i < ma_count; i++) { > > > + if (mask == ovsl_dereference(ma->masks[i])) > > > + goto found; > > > } > > > + > > > BUG(); > > > + return; > > > + > > > +found: > > > + WRITE_ONCE(ma->count, ma_count -1); > > > + > > > + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); > > > + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); > > > > So when you swap the ma->masks[ma_count -1], the mask cache entry > > who's 'mask_index == ma_count' become all invalid? > Yes, a little tricky. > > Regards, > > William > >
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8d4f50d..a10d421 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[i]); if (!mask) - continue; + break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); - return ma->count; + return READ_ONCE(ma->count); } static struct table_instance *table_instance_expand(struct table_instance *ti, @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_delete_mask(struct mask_array *ma, - struct sw_flow_mask *mask) +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) { - int i; + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; } + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -732,17 +744,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) BUG_ON(!mask->ref_count); mask->ref_count--; - if (!mask->ref_count) { - struct mask_array *ma; - - ma = ovsl_dereference(tbl->mask_array); - tbl_mask_array_delete_mask(ma, mask); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma->count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); - } + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); } } @@ -806,6 +809,29 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) @@ -814,9 +840,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask = flow_mask_find(tbl, new); if (!mask) { - struct mask_array *ma; - int i; - /* Allocate a new mask if none exsits. */ mask = mask_alloc(); if (!mask) @@ -825,29 +848,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask->range = new->range; /* Add mask to mask-list. */ - ma = ovsl_dereference(tbl->mask_array); - if (ma->count >= ma->max) { - int err; - - err = tbl_mask_array_realloc(tbl, ma->max + - MASK_ARRAY_SIZE_MIN); - if (err) { - kfree(mask); - return err; - } - - ma = ovsl_dereference(tbl->mask_array); - } - - for (i = 0; i < ma->max; i++) { - const struct sw_flow_mask *t; - - t = ovsl_dereference(ma->masks[i]); - if (!t) { - rcu_assign_pointer(ma->masks[i], mask); - ma->count++; - break; - } + if (tbl_mask_array_add_mask(tbl, mask)) { + kfree(mask); + return -ENOMEM; } } else { BUG_ON(!mask->ref_count);