Message ID | 20200507064953.460275-1-numans@ovn.org |
---|---|
State | Accepted |
Headers | show |
Series | [ovs-dev,ovn,v3] Support selection fields in load balancer. | expand |
Acked-by: Mark Michelson <mmichels@redhat.com> On 5/7/20 2:49 AM, numans@ovn.org wrote: > From: Numan Siddique <numans@ovn.org> > > This patch add a new column 'selection_fields' in Load_Balancer > table in NB DB. CMS can define a set of packet headers to use > while selecting a backend. If this column is set, OVN will add the > flow in group table with selection method as 'hash' with the set fields. > Otherwise it will use the default 'dp_hash' selection method. > > If a load balancer is configured with the selection_fields as > selection_fields : [ip_dst, ip_src, tp_dst, tp_src] > > then with this patch, the modified ct_lb action will look like > - ct_lb(backends=IP1:P1,IP2:P1; hash_fields="ip_dst,ip_src,tp_dst,tp_src"); > > And the OF flow will look like > - group_id=2,type=select,selection_method=hash, > fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(.... > > Tested-by: Maciej Józefczyk <mjozefcz@redhat.com> > Acked-by: Maciej Józefczyk <mjozefcz@redhat.com> > Acked-by: Han Zhou <hzhou@ovn.org> > Signed-off-by: Numan Siddique <numans@ovn.org> > --- > v2 -> v3 > ----- > * Updated the NEWS item as per Han's suggestion. > > v1 -> v2 > * The series has only 1 patch now as the first patch is merged. > * Addressed review comments from Mark and Han. > * Added test cases. > * Added documentation. > > NEWS | 6 ++ > include/ovn/actions.h | 1 + > lib/actions.c | 45 +++++++++-- > northd/ovn-northd.c | 30 ++++++-- > ovn-nb.ovsschema | 10 ++- > ovn-nb.xml | 27 +++++++ > tests/ovn-northd.at | 24 +++--- > tests/ovn.at | 49 +++++++----- > tests/system-ovn.at | 172 +++++++++++++++++++++++++++++++++++++++--- > 9 files changed, 311 insertions(+), 53 deletions(-) > > diff --git a/NEWS b/NEWS > index c2b7945df..fea196d34 100644 > --- a/NEWS > +++ b/NEWS > @@ -14,6 +14,12 @@ OVN v20.03.0 - xx xxx xxxx > - Added support for ECMP routes in OVN router. > - Added IPv6 Prefix Delegation support in OVN. > - OVN now uses OpenFlow 1.5. > + - Added support to choose selection methods - dp_hash or > + hash (with specified hash fields) for OVN load balancer > + backend selection. This is incompatible with older versions. > + Care should be taken while upgrading as the existing > + load balancer traffic will be affected if ovn-controllers > + are not stopped before uprading northd services. > > - OVN Interconnection: > * Support for L3 interconnection of multiple OVN deployments with tunnels > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 42d45a74c..4a54abe17 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -259,6 +259,7 @@ struct ovnact_ct_lb { > struct ovnact_ct_lb_dst *dsts; > size_t n_dsts; > uint8_t ltable; /* Logical table ID of next table. */ > + char *hash_fields; > }; > > struct ovnact_select_dst { > diff --git a/lib/actions.c b/lib/actions.c > index 021a376b4..c50615177 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx) > struct ovnact_ct_lb_dst *dsts = NULL; > size_t allocated_dsts = 0; > size_t n_dsts = 0; > + char *hash_fields = NULL; > > - if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (lexer_match(ctx->lexer, LEX_T_LPAREN) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (!lexer_match_id(ctx->lexer, "backends") || > + !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > + lexer_syntax_error(ctx->lexer, "expecting backends"); > + return; > + } > + > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { > struct ovnact_ct_lb_dst dst; > if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) { > /* IPv6 address and port */ > @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx) > } > dsts[n_dsts++] = dst; > } > + > + if (lexer_match_id(ctx->lexer, "hash_fields")) { > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > + ctx->lexer->token.type != LEX_T_STRING || > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > + free(dsts); > + return; > + } > + > + hash_fields = xstrdup(ctx->lexer->token.s); > + lexer_get(ctx->lexer); > + lexer_get(ctx->lexer); > + } > } > > struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts); > cl->ltable = ctx->pp->cur_ltable + 1; > cl->dsts = dsts; > cl->n_dsts = n_dsts; > + cl->hash_fields = hash_fields; > } > > static void > @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) > { > ds_put_cstr(s, "ct_lb"); > if (cl->n_dsts) { > - ds_put_char(s, '('); > + ds_put_cstr(s, "(backends="); > for (size_t i = 0; i < cl->n_dsts; i++) { > if (i) { > - ds_put_cstr(s, ", "); > + ds_put_char(s, ','); > } > > const struct ovnact_ct_lb_dst *dst = &cl->dsts[i]; > @@ -1056,7 +1080,13 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) > } > } > ds_put_char(s, ')'); > + > + if (cl->hash_fields) { > + ds_chomp(s, ')'); > + ds_put_format(s, "; hash_fields=\"%s\")", cl->hash_fields); > + } > } > + > ds_put_char(s, ';'); > } > > @@ -1103,7 +1133,11 @@ encode_CT_LB(const struct ovnact_ct_lb *cl, > : MFF_LOG_DNAT_ZONE - MFF_REG0; > > struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > + ds_put_format(&ds, "type=select,selection_method=%s", > + cl->hash_fields ? "hash": "dp_hash"); > + if (cl->hash_fields) { > + ds_put_format(&ds, ",fields(%s)", cl->hash_fields); > + } > > BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); > BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); > @@ -1145,6 +1179,7 @@ static void > ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb) > { > free(ct_lb->dsts); > + free(ct_lb->hash_fields); > } > > static void > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 3324d20f4..dd07f45d9 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -3112,7 +3112,7 @@ struct ovn_lb { > struct hmap_node hmap_node; > > const struct nbrec_load_balancer *nlb; /* May be NULL. */ > - > + char *selection_fields; > struct lb_vip *vips; > size_t n_vips; > }; > @@ -3340,6 +3340,15 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs, > n_vips++; > } > > + if (lb->nlb->n_selection_fields) { > + struct ds sel_fields = DS_EMPTY_INITIALIZER; > + for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) { > + ds_put_format(&sel_fields, "%s,", lb->nlb->selection_fields[i]); > + } > + ds_chomp(&sel_fields, ','); > + lb->selection_fields = ds_steal_cstr(&sel_fields); > + } > + > return lb; > } > > @@ -3358,13 +3367,15 @@ ovn_lb_destroy(struct ovn_lb *lb) > free(lb->vips[i].backends); > } > free(lb->vips); > + free(lb->selection_fields); > } > > static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, > - struct ds *action) > + struct ds *action, > + char *selection_fields) > { > if (lb_vip->health_check) { > - ds_put_cstr(action, "ct_lb("); > + ds_put_cstr(action, "ct_lb(backends="); > > size_t n_active_backends = 0; > for (size_t k = 0; k < lb_vip->n_backends; k++) { > @@ -3388,7 +3399,13 @@ static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, > ds_put_cstr(action, ");"); > } > } else { > - ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips); > + ds_put_format(action, "ct_lb(backends=%s);", lb_vip->backend_ips); > + } > + > + if (selection_fields && selection_fields[0]) { > + ds_chomp(action, ';'); > + ds_chomp(action, ')'); > + ds_put_format(action, "; hash_fields=\"%s\");", selection_fields); > } > } > > @@ -5664,7 +5681,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb) > > /* New connections in Ingress table. */ > struct ds action = DS_EMPTY_INITIALIZER; > - build_lb_vip_ct_lb_actions(lb_vip, &action); > + build_lb_vip_ct_lb_actions(lb_vip, &action, lb->selection_fields); > > struct ds match = DS_EMPTY_INITIALIZER; > ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, lb_vip->vip); > @@ -9315,7 +9332,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, > for (size_t j = 0; j < lb->n_vips; j++) { > struct lb_vip *lb_vip = &lb->vips[j]; > ds_clear(&actions); > - build_lb_vip_ct_lb_actions(lb_vip, &actions); > + build_lb_vip_ct_lb_actions(lb_vip, &actions, > + lb->selection_fields); > > if (!sset_contains(&all_ips, lb_vip->vip)) { > sset_add(&all_ips, lb_vip->vip); > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > index b2a046571..a06972aa0 100644 > --- a/ovn-nb.ovsschema > +++ b/ovn-nb.ovsschema > @@ -1,7 +1,7 @@ > { > "name": "OVN_Northbound", > - "version": "5.22.0", > - "cksum": "384164739 25476", > + "version": "5.23.0", > + "cksum": "111023208 25806", > "tables": { > "NB_Global": { > "columns": { > @@ -179,6 +179,12 @@ > "ip_port_mappings": { > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}}, > + "selection_fields": { > + "type": {"key": {"type": "string", > + "enum": ["set", > + ["eth_src", "eth_dst", "ip_src", "ip_dst", > + "tp_src", "tp_dst"]]}, > + "min": 0, "max": "unlimited"}}, > "external_ids": { > "type": {"key": "string", "value": "string", > "min": 0, "max": "unlimited"}}}, > diff --git a/ovn-nb.xml b/ovn-nb.xml > index af15c550a..95ee4c9e6 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -1513,6 +1513,33 @@ > </p> > </column> > > + <column name="selection_fields"> > + <p> > + OVN native load balancers are supported using the OpenFlow groups > + of type <code>select</code>. OVS supports two selection methods: > + <code>dp_hash</code> and <code>hash (with optional fields > + specified)</code> in selecting the buckets of a group. > + Please see the OVS documentation (man ovs-ofctl) > + for more details on the selection methods. Each endpoint IP (and port > + if set) is mapped to a bucket in the group flow. > + </p> > + > + <p> > + CMS can choose the <code>hash</code> selection method by setting the > + selection fields in this column. <code>ovs-vswitchd</code> uses the > + specified fields in generating the hash. > + </p> > + > + <p> > + <code>dp_hash</code> selection method uses the assistance of > + datapath to calculate the hash and it is expected to be > + faster than <code>hash</code> selection method. So CMS should take > + this into consideration before using the <code>hash</code> method. > + Please consult the OVS documentation and OVS sources for the > + implementation details. > + </p> > + </column> > + > <group title="Common Columns"> > <column name="external_ids"> > See <em>External IDs</em> at the beginning of this document. > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index a0109ae94..57c9eedd5 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1 > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > # Delete the Load_Balancer_Health_Check > @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`]) > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > # Create the Load_Balancer_Health_Check again. > @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`]) > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > # Get the uuid of both the service_monitor > @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([ > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) > ]) > > # Set the service monitor for sw0-p1 to offline > @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([ > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > # Set the service monitor for sw1-p1 to error > @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([ > ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \ > | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) > ]) > > # Add one more vip to lb1 > @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`]) > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);) > ]) > > # Set the service monitor for sw1-p1 to online > @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([ > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) > ]) > > # Associate lb1 to sw1 > ovn-nbctl --wait=sb ls-lb-add sw1 lb1 > ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) > ]) > > # Now create lb2 same as lb1 but udp protocol. > diff --git a/tests/ovn.at b/tests/ovn.at > index f168b147f..e49042eeb 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -970,29 +970,42 @@ ct_lb(); > encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat) > has prereqs ip > ct_lb(192.168.1.2:80, 192.168.1.3:80); > + Syntax error at `192.168.1.2' expecting backends. > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80); > encodes as group:1 > uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) > has prereqs ip > -ct_lb(192.168.1.2, 192.168.1.3, ); > - formats as ct_lb(192.168.1.2, 192.168.1.3); > +ct_lb(backends=192.168.1.2, 192.168.1.3, ); > + formats as ct_lb(backends=192.168.1.2,192.168.1.3); > encodes as group:2 > uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15])) > has prereqs ip > -ct_lb(fd0f::2, fd0f::3, ); > - formats as ct_lb(fd0f::2, fd0f::3); > +ct_lb(backends=fd0f::2, fd0f::3, ); > + formats as ct_lb(backends=fd0f::2,fd0f::3); > encodes as group:3 > uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) > has prereqs ip > > -ct_lb(192.168.1.2:); > +ct_lb(backends=192.168.1.2:); > Syntax error at `)' expecting port number. > -ct_lb(192.168.1.2:123456); > +ct_lb(backends=192.168.1.2:123456); > Syntax error at `123456' expecting port number. > -ct_lb(foo); > +ct_lb(backends=foo); > Syntax error at `foo' expecting IP address. > -ct_lb([192.168.1.2]); > +ct_lb(backends=[192.168.1.2]); > Syntax error at `192.168.1.2' expecting IPv6 address. > > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields=eth_src,eth_dst,ip_src); > + Syntax error at `eth_src' invalid hash_fields. > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields="eth_src,eth_dst,ip_src"); > + encodes as group:4 > + uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) > + has prereqs ip > +ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst"); > + encodes as group:5 > + uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) > + has prereqs ip > + > # ct_next > ct_next; > encodes as ct(table=19,zone=NXM_NX_REG13[0..15]) > @@ -1520,13 +1533,13 @@ handle_svc_check(reg0); > # select > reg9[16..31] = select(1=50, 2=100, 3, ); > formats as reg9[16..31] = select(1=50, 2=100, 3=100); > - encodes as group:4 > - uses group: id(4), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) > + encodes as group:6 > + uses group: id(6), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) > > reg0 = select(1, 2); > formats as reg0 = select(1=100, 2=100); > - encodes as group:5 > - uses group: id(5), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) > + encodes as group:7 > + uses group: id(7), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) > > reg0 = select(1=, 2); > Syntax error at `,' expecting weight. > @@ -1542,12 +1555,12 @@ reg0[0..14] = select(1, 2, 3); > cannot use 15-bit field reg0[0..14] for "select", which requires at least 16 bits. > > fwd_group(liveness="true", childports="eth0", "lsp1"); > - encodes as group:6 > - uses group: id(6), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > + encodes as group:8 > + uses group: id(8), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:7 > - uses group: id(7), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > + encodes as group:9 > + uses group: id(9), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > fwd_group(childports=eth0); > Syntax error at `eth0' expecting logical switch port. > @@ -18002,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`]) > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt > AT_CHECK([cat lflows.txt], [0], [dnl > - table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) > + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) > ]) > > # get the svc monitor mac. > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 117f1e835..9a5ef1ec3 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -1095,15 +1095,15 @@ ovn-nbctl lsp-add bar bar3 \ > -- lsp-set-addresses bar3 "f0:00:0f:01:02:05 172.16.1.4" > > # Config OVN load-balancer with a VIP. > -uuid=`ovn-nbctl create load_balancer vips:30.0.0.1="172.16.1.2,172.16.1.3,172.16.1.4"` > -ovn-nbctl set logical_switch foo load_balancer=$uuid > +ovn-nbctl lb-add lb1 30.0.0.1 "172.16.1.2,172.16.1.3,172.16.1.4" > +ovn-nbctl ls-lb-add foo lb1 > > # Create another load-balancer with another VIP. > -uuid=`ovn-nbctl create load_balancer vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` > -ovn-nbctl add logical_switch foo load_balancer $uuid > +lb2_uuid=`ovn-nbctl create load_balancer name=lb2 vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` > +ovn-nbctl ls-lb-add foo lb2 > > # Config OVN load-balancer with another VIP (this time with ports). > -ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' > +ovn-nbctl set load_balancer $lb2_uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' > > # Wait for ovn-controller to catch up. > ovn-nbctl --wait=hv sync > @@ -1157,6 +1157,82 @@ tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(s > tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > ]) > > +# Configure selection_fields. > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" > +OVS_WAIT_UNTIL([ > + test $(ovs-ofctl dump-groups br-int | \ > + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +dnl Test load-balancing that includes L4 ports in NAT. > +for i in `seq 1 20`; do > + echo Request $i > + NS_CHECK_EXEC([foo1], [wget 30.0.0.2:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) > +done > + > +dnl Each server should have at least one connection. > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.3,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +echo "foo" > foo > +for i in `seq 1 20`; do > + echo Request $i > + ip netns exec foo1 nc -p 30000 30.0.0.2 8000 < foo > +done > + > +dnl Only one backend should be chosen. > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 -c) -eq 1]) > + > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src" > +OVS_WAIT_UNTIL([ > + test $(ovs-ofctl dump-groups br-int | \ > + grep "selection_method=hash,fields=ip_src" -c) -eq 2 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > +for i in `seq 1 20`; do > + echo Request $i > + ip netns exec foo1 nc 30.0.0.2 8000 < foo > +done > + > +dnl Only one backend should be chosen as eth_src and ip_src is fixed. > +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.2 -c) > +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.3 -c) > +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.4 -c) > + > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1 -c) -ne 0]) > + > +if [[ "$bar1_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar1_ct -eq 0]) > +fi > + > +if [[ "$bar2_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar2_ct -eq 0]) > +fi > + > +if [[ "$bar3_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar3_ct -eq 0]) > +fi > > OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > @@ -1246,11 +1322,11 @@ uuid=`ovn-nbctl create load_balancer vips:\"fd03::1\"=\"fd02::2,fd02::3,fd02::4 > ovn-nbctl set logical_switch foo load_balancer=$uuid > > # Create another load-balancer with another VIP. > -uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` > -ovn-nbctl add logical_switch foo load_balancer $uuid > +lb2_uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` > +ovn-nbctl add logical_switch foo load_balancer $lb2_uuid > > # Config OVN load-balancer with another VIP (this time with ports). > -ovn-nbctl set load_balancer $uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' > +ovn-nbctl set load_balancer $lb2_uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' > > # Wait for ovn-controller to catch up. > ovn-nbctl --wait=hv sync > @@ -1304,7 +1380,83 @@ tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd > tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > ]) > > +# Configure selection_fields. > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" > +OVS_WAIT_UNTIL([ > + test $(ovs-ofctl dump-groups br-int | \ > + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +dnl Test load-balancing that includes L4 ports in NAT. > +for i in `seq 1 20`; do > + echo Request $i > + NS_CHECK_EXEC([foo1], [wget http://[[fd03::2]]:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) > +done > + > +dnl Each server should have at least one connection. > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::2,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::3,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > + > +echo "foo" > foo > +for i in `seq 1 20`; do > + echo Request $i > + ip netns exec foo1 nc -6 -p 30000 fd03::2 8000 < foo > +done > + > +# Only one backend should be chosen. Since the source port is fixed, > +# there should be only one conntrack entry. > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 -c) -eq 1]) > + > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="eth_src,ip_src" > +OVS_WAIT_UNTIL([ > + test $(ovs-ofctl dump-groups br-int | \ > + grep "selection_method=hash,fields(eth_src,ip_src)" -c) -eq 2 > +]) > + > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > +for i in `seq 1 20`; do > + echo Request $i > + ip netns exec foo1 nc -6 fd03::2 8000 < foo > +done > > +dnl Only one backend should be chosen as eth_src and ip_src is fixed. > +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02::2 -c) > +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::3 -c) > +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::4 -c) > + > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02 -c) -ne 0]) > + > +if [[ "$bar1_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar1_ct -eq 0]) > +fi > + > +if [[ "$bar2_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar2_ct -eq 0]) > +fi > + > +if [[ "$bar3_ct" == "20" ]]; then > + AT_CHECK([test $bar1_ct -eq 20]) > + AT_CHECK([test $bar2_ct -eq 0]) > + AT_CHECK([test $bar3_ct -eq 0]) > +else > + AT_CHECK([test $bar3_ct -eq 0]) > +fi > OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > as ovn-sb > @@ -3448,7 +3600,7 @@ service_monitor | sed '/^$/d' | grep online | wc -l`]) > > OVS_WAIT_UNTIL( > [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt > - test 1 = `cat lflows.txt | grep "ct_lb(10.0.0.3:80,20.0.0.3:80)" | wc -l`] > + test 1 = `cat lflows.txt | grep "ct_lb(backends=10.0.0.3:80,20.0.0.3:80)" | wc -l`] > ) > > # From sw0-p2 send traffic to vip - 10.0.0.10 > @@ -3474,7 +3626,7 @@ service_monitor logical_port=sw0-p1 | sed '/^$/d' | grep offline | wc -l`]) > > OVS_WAIT_UNTIL( > [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt > - test 1 = `cat lflows.txt | grep "ct_lb(20.0.0.3:80)" | wc -l`] > + test 1 = `cat lflows.txt | grep "ct_lb(backends=20.0.0.3:80)" | wc -l`] > ) > > ovs-appctl dpctl/flush-conntrack >
On Thu, May 7, 2020 at 5:58 PM Mark Michelson <mmichels@redhat.com> wrote: > Acked-by: Mark Michelson <mmichels@redhat.com> > > Thanks Mark, Han and Maciej for the reviews. I applied this patch to master. Numan > On 5/7/20 2:49 AM, numans@ovn.org wrote: > > From: Numan Siddique <numans@ovn.org> > > > > This patch add a new column 'selection_fields' in Load_Balancer > > table in NB DB. CMS can define a set of packet headers to use > > while selecting a backend. If this column is set, OVN will add the > > flow in group table with selection method as 'hash' with the set fields. > > Otherwise it will use the default 'dp_hash' selection method. > > > > If a load balancer is configured with the selection_fields as > > selection_fields : [ip_dst, ip_src, tp_dst, tp_src] > > > > then with this patch, the modified ct_lb action will look like > > - ct_lb(backends=IP1:P1,IP2:P1; > hash_fields="ip_dst,ip_src,tp_dst,tp_src"); > > > > And the OF flow will look like > > - group_id=2,type=select,selection_method=hash, > > > fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(.... > > > > Tested-by: Maciej Józefczyk <mjozefcz@redhat.com> > > Acked-by: Maciej Józefczyk <mjozefcz@redhat.com> > > Acked-by: Han Zhou <hzhou@ovn.org> > > Signed-off-by: Numan Siddique <numans@ovn.org> > > --- > > v2 -> v3 > > ----- > > * Updated the NEWS item as per Han's suggestion. > > > > v1 -> v2 > > * The series has only 1 patch now as the first patch is merged. > > * Addressed review comments from Mark and Han. > > * Added test cases. > > * Added documentation. > > > > NEWS | 6 ++ > > include/ovn/actions.h | 1 + > > lib/actions.c | 45 +++++++++-- > > northd/ovn-northd.c | 30 ++++++-- > > ovn-nb.ovsschema | 10 ++- > > ovn-nb.xml | 27 +++++++ > > tests/ovn-northd.at | 24 +++--- > > tests/ovn.at | 49 +++++++----- > > tests/system-ovn.at | 172 +++++++++++++++++++++++++++++++++++++++--- > > 9 files changed, 311 insertions(+), 53 deletions(-) > > > > diff --git a/NEWS b/NEWS > > index c2b7945df..fea196d34 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -14,6 +14,12 @@ OVN v20.03.0 - xx xxx xxxx > > - Added support for ECMP routes in OVN router. > > - Added IPv6 Prefix Delegation support in OVN. > > - OVN now uses OpenFlow 1.5. > > + - Added support to choose selection methods - dp_hash or > > + hash (with specified hash fields) for OVN load balancer > > + backend selection. This is incompatible with older versions. > > + Care should be taken while upgrading as the existing > > + load balancer traffic will be affected if ovn-controllers > > + are not stopped before uprading northd services. > > > > - OVN Interconnection: > > * Support for L3 interconnection of multiple OVN deployments with > tunnels > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > > index 42d45a74c..4a54abe17 100644 > > --- a/include/ovn/actions.h > > +++ b/include/ovn/actions.h > > @@ -259,6 +259,7 @@ struct ovnact_ct_lb { > > struct ovnact_ct_lb_dst *dsts; > > size_t n_dsts; > > uint8_t ltable; /* Logical table ID of next table. */ > > + char *hash_fields; > > }; > > > > struct ovnact_select_dst { > > diff --git a/lib/actions.c b/lib/actions.c > > index 021a376b4..c50615177 100644 > > --- a/lib/actions.c > > +++ b/lib/actions.c > > @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx) > > struct ovnact_ct_lb_dst *dsts = NULL; > > size_t allocated_dsts = 0; > > size_t n_dsts = 0; > > + char *hash_fields = NULL; > > > > - if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { > > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > > + if (lexer_match(ctx->lexer, LEX_T_LPAREN) && > > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { > > + if (!lexer_match_id(ctx->lexer, "backends") || > > + !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { > > + lexer_syntax_error(ctx->lexer, "expecting backends"); > > + return; > > + } > > + > > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { > > struct ovnact_ct_lb_dst dst; > > if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) { > > /* IPv6 address and port */ > > @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx) > > } > > dsts[n_dsts++] = dst; > > } > > + > > + if (lexer_match_id(ctx->lexer, "hash_fields")) { > > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > > + ctx->lexer->token.type != LEX_T_STRING || > > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > > + free(dsts); > > + return; > > + } > > + > > + hash_fields = xstrdup(ctx->lexer->token.s); > > + lexer_get(ctx->lexer); > > + lexer_get(ctx->lexer); > > + } > > } > > > > struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts); > > cl->ltable = ctx->pp->cur_ltable + 1; > > cl->dsts = dsts; > > cl->n_dsts = n_dsts; > > + cl->hash_fields = hash_fields; > > } > > > > static void > > @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl, > struct ds *s) > > { > > ds_put_cstr(s, "ct_lb"); > > if (cl->n_dsts) { > > - ds_put_char(s, '('); > > + ds_put_cstr(s, "(backends="); > > for (size_t i = 0; i < cl->n_dsts; i++) { > > if (i) { > > - ds_put_cstr(s, ", "); > > + ds_put_char(s, ','); > > } > > > > const struct ovnact_ct_lb_dst *dst = &cl->dsts[i]; > > @@ -1056,7 +1080,13 @@ format_CT_LB(const struct ovnact_ct_lb *cl, > struct ds *s) > > } > > } > > ds_put_char(s, ')'); > > + > > + if (cl->hash_fields) { > > + ds_chomp(s, ')'); > > + ds_put_format(s, "; hash_fields=\"%s\")", cl->hash_fields); > > + } > > } > > + > > ds_put_char(s, ';'); > > } > > > > @@ -1103,7 +1133,11 @@ encode_CT_LB(const struct ovnact_ct_lb *cl, > > : MFF_LOG_DNAT_ZONE - MFF_REG0; > > > > struct ds ds = DS_EMPTY_INITIALIZER; > > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > > + ds_put_format(&ds, "type=select,selection_method=%s", > > + cl->hash_fields ? "hash": "dp_hash"); > > + if (cl->hash_fields) { > > + ds_put_format(&ds, ",fields(%s)", cl->hash_fields); > > + } > > > > BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); > > BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); > > @@ -1145,6 +1179,7 @@ static void > > ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb) > > { > > free(ct_lb->dsts); > > + free(ct_lb->hash_fields); > > } > > > > static void > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > index 3324d20f4..dd07f45d9 100644 > > --- a/northd/ovn-northd.c > > +++ b/northd/ovn-northd.c > > @@ -3112,7 +3112,7 @@ struct ovn_lb { > > struct hmap_node hmap_node; > > > > const struct nbrec_load_balancer *nlb; /* May be NULL. */ > > - > > + char *selection_fields; > > struct lb_vip *vips; > > size_t n_vips; > > }; > > @@ -3340,6 +3340,15 @@ ovn_lb_create(struct northd_context *ctx, struct > hmap *lbs, > > n_vips++; > > } > > > > + if (lb->nlb->n_selection_fields) { > > + struct ds sel_fields = DS_EMPTY_INITIALIZER; > > + for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) { > > + ds_put_format(&sel_fields, "%s,", > lb->nlb->selection_fields[i]); > > + } > > + ds_chomp(&sel_fields, ','); > > + lb->selection_fields = ds_steal_cstr(&sel_fields); > > + } > > + > > return lb; > > } > > > > @@ -3358,13 +3367,15 @@ ovn_lb_destroy(struct ovn_lb *lb) > > free(lb->vips[i].backends); > > } > > free(lb->vips); > > + free(lb->selection_fields); > > } > > > > static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, > > - struct ds *action) > > + struct ds *action, > > + char *selection_fields) > > { > > if (lb_vip->health_check) { > > - ds_put_cstr(action, "ct_lb("); > > + ds_put_cstr(action, "ct_lb(backends="); > > > > size_t n_active_backends = 0; > > for (size_t k = 0; k < lb_vip->n_backends; k++) { > > @@ -3388,7 +3399,13 @@ static void build_lb_vip_ct_lb_actions(struct > lb_vip *lb_vip, > > ds_put_cstr(action, ");"); > > } > > } else { > > - ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips); > > + ds_put_format(action, "ct_lb(backends=%s);", > lb_vip->backend_ips); > > + } > > + > > + if (selection_fields && selection_fields[0]) { > > + ds_chomp(action, ';'); > > + ds_chomp(action, ')'); > > + ds_put_format(action, "; hash_fields=\"%s\");", > selection_fields); > > } > > } > > > > @@ -5664,7 +5681,7 @@ build_lb_rules(struct ovn_datapath *od, struct > hmap *lflows, struct ovn_lb *lb) > > > > /* New connections in Ingress table. */ > > struct ds action = DS_EMPTY_INITIALIZER; > > - build_lb_vip_ct_lb_actions(lb_vip, &action); > > + build_lb_vip_ct_lb_actions(lb_vip, &action, > lb->selection_fields); > > > > struct ds match = DS_EMPTY_INITIALIZER; > > ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, > lb_vip->vip); > > @@ -9315,7 +9332,8 @@ build_lrouter_flows(struct hmap *datapaths, struct > hmap *ports, > > for (size_t j = 0; j < lb->n_vips; j++) { > > struct lb_vip *lb_vip = &lb->vips[j]; > > ds_clear(&actions); > > - build_lb_vip_ct_lb_actions(lb_vip, &actions); > > + build_lb_vip_ct_lb_actions(lb_vip, &actions, > > + lb->selection_fields); > > > > if (!sset_contains(&all_ips, lb_vip->vip)) { > > sset_add(&all_ips, lb_vip->vip); > > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema > > index b2a046571..a06972aa0 100644 > > --- a/ovn-nb.ovsschema > > +++ b/ovn-nb.ovsschema > > @@ -1,7 +1,7 @@ > > { > > "name": "OVN_Northbound", > > - "version": "5.22.0", > > - "cksum": "384164739 25476", > > + "version": "5.23.0", > > + "cksum": "111023208 25806", > > "tables": { > > "NB_Global": { > > "columns": { > > @@ -179,6 +179,12 @@ > > "ip_port_mappings": { > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}, > > + "selection_fields": { > > + "type": {"key": {"type": "string", > > + "enum": ["set", > > + ["eth_src", "eth_dst", "ip_src", > "ip_dst", > > + "tp_src", "tp_dst"]]}, > > + "min": 0, "max": "unlimited"}}, > > "external_ids": { > > "type": {"key": "string", "value": "string", > > "min": 0, "max": "unlimited"}}}, > > diff --git a/ovn-nb.xml b/ovn-nb.xml > > index af15c550a..95ee4c9e6 100644 > > --- a/ovn-nb.xml > > +++ b/ovn-nb.xml > > @@ -1513,6 +1513,33 @@ > > </p> > > </column> > > > > + <column name="selection_fields"> > > + <p> > > + OVN native load balancers are supported using the OpenFlow > groups > > + of type <code>select</code>. OVS supports two selection methods: > > + <code>dp_hash</code> and <code>hash (with optional fields > > + specified)</code> in selecting the buckets of a group. > > + Please see the OVS documentation (man ovs-ofctl) > > + for more details on the selection methods. Each endpoint IP > (and port > > + if set) is mapped to a bucket in the group flow. > > + </p> > > + > > + <p> > > + CMS can choose the <code>hash</code> selection method by > setting the > > + selection fields in this column. <code>ovs-vswitchd</code> uses > the > > + specified fields in generating the hash. > > + </p> > > + > > + <p> > > + <code>dp_hash</code> selection method uses the assistance of > > + datapath to calculate the hash and it is expected to be > > + faster than <code>hash</code> selection method. So CMS should > take > > + this into consideration before using the <code>hash</code> > method. > > + Please consult the OVS documentation and OVS sources for the > > + implementation details. > > + </p> > > + </column> > > + > > <group title="Common Columns"> > > <column name="external_ids"> > > See <em>External IDs</em> at the beginning of this document. > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > > index a0109ae94..57c9eedd5 100644 > > --- a/tests/ovn-northd.at > > +++ b/tests/ovn-northd.at > > @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1 > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > # Delete the Load_Balancer_Health_Check > > @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list > service_monitor | wc -l`]) > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > # Create the Load_Balancer_Health_Check again. > > @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`]) > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > # Get the uuid of both the service_monitor > > @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([ > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > );) > > ]) > > > > # Set the service monitor for sw0-p1 to offline > > @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([ > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > # Set the service monitor for sw1-p1 to error > > @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([ > > ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == > 80" \ > > | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > );) > > ]) > > > > # Add one more vip to lb1 > > @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`]) > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > );) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), > action=(ct_lb(backends=10.0.0.3:1000);) > > ]) > > > > # Set the service monitor for sw1-p1 to online > > @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([ > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends= > 10.0.0.3:1000,20.0.0.3:80);) > > ]) > > > > # Associate lb1 to sw1 > > ovn-nbctl --wait=sb ls-lb-add sw1 lb1 > > ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends= > 10.0.0.3:1000,20.0.0.3:80);) > > ]) > > > > # Now create lb2 same as lb1 but udp protocol. > > diff --git a/tests/ovn.at b/tests/ovn.at > > index f168b147f..e49042eeb 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -970,29 +970,42 @@ ct_lb(); > > encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat) > > has prereqs ip > > ct_lb(192.168.1.2:80, 192.168.1.3:80); > > + Syntax error at `192.168.1.2' expecting backends. > > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80); > > encodes as group:1 > > uses group: id(1), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst= > 192.168.1.2:80 > ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst= > 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) > > has prereqs ip > > -ct_lb(192.168.1.2, 192.168.1.3, ); > > - formats as ct_lb(192.168.1.2, 192.168.1.3); > > +ct_lb(backends=192.168.1.2, 192.168.1.3, ); > > + formats as ct_lb(backends=192.168.1.2,192.168.1.3); > > encodes as group:2 > > uses group: id(2), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15])) > > has prereqs ip > > -ct_lb(fd0f::2, fd0f::3, ); > > - formats as ct_lb(fd0f::2, fd0f::3); > > +ct_lb(backends=fd0f::2, fd0f::3, ); > > + formats as ct_lb(backends=fd0f::2,fd0f::3); > > encodes as group:3 > > uses group: id(3), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) > > has prereqs ip > > > > -ct_lb(192.168.1.2:); > > +ct_lb(backends=192.168.1.2:); > > Syntax error at `)' expecting port number. > > -ct_lb(192.168.1.2:123456); > > +ct_lb(backends=192.168.1.2:123456); > > Syntax error at `123456' expecting port number. > > -ct_lb(foo); > > +ct_lb(backends=foo); > > Syntax error at `foo' expecting IP address. > > -ct_lb([192.168.1.2]); > > +ct_lb(backends=[192.168.1.2]); > > Syntax error at `192.168.1.2' expecting IPv6 address. > > > > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; > hash_fields=eth_src,eth_dst,ip_src); > > + Syntax error at `eth_src' invalid hash_fields. > > +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; > hash_fields="eth_src,eth_dst,ip_src"); > > + encodes as group:4 > > + uses group: id(4), > name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst= > 192.168.1.2:80 > ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst= > 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) > > + has prereqs ip > > +ct_lb(backends=fd0f::2,fd0f::3; > hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst"); > > + encodes as group:5 > > + uses group: id(5), > name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) > > + has prereqs ip > > + > > # ct_next > > ct_next; > > encodes as ct(table=19,zone=NXM_NX_REG13[0..15]) > > @@ -1520,13 +1533,13 @@ handle_svc_check(reg0); > > # select > > reg9[16..31] = select(1=50, 2=100, 3, ); > > formats as reg9[16..31] = select(1=50, 2=100, 3=100); > > - encodes as group:4 > > - uses group: id(4), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) > > + encodes as group:6 > > + uses group: id(6), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) > > > > reg0 = select(1, 2); > > formats as reg0 = select(1=100, 2=100); > > - encodes as group:5 > > - uses group: id(5), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) > > + encodes as group:7 > > + uses group: id(7), > name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) > > > > reg0 = select(1=, 2); > > Syntax error at `,' expecting weight. > > @@ -1542,12 +1555,12 @@ reg0[0..14] = select(1, 2, 3); > > cannot use 15-bit field reg0[0..14] for "select", which requires > at least 16 bits. > > > > fwd_group(liveness="true", childports="eth0", "lsp1"); > > - encodes as group:6 > > - uses group: id(6), > name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > + encodes as group:8 > > + uses group: id(8), > name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > > > fwd_group(childports="eth0", "lsp1"); > > - encodes as group:7 > > - uses group: id(7), > name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > + encodes as group:9 > > + uses group: id(9), > name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) > > > > fwd_group(childports=eth0); > > Syntax error at `eth0' expecting logical switch port. > > @@ -18002,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`]) > > > > ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=10(ls_in_stateful ), priority=120 , match=(ct.new && > ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt > > AT_CHECK([cat lflows.txt], [0], [dnl > > - table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && > ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && > is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80 > ,20.0.0.3:80);) > > + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && > ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && > is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80 > ,20.0.0.3:80);) > > ]) > > > > # get the svc monitor mac. > > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > > index 117f1e835..9a5ef1ec3 100644 > > --- a/tests/system-ovn.at > > +++ b/tests/system-ovn.at > > @@ -1095,15 +1095,15 @@ ovn-nbctl lsp-add bar bar3 \ > > -- lsp-set-addresses bar3 "f0:00:0f:01:02:05 172.16.1.4" > > > > # Config OVN load-balancer with a VIP. > > -uuid=`ovn-nbctl create load_balancer > vips:30.0.0.1="172.16.1.2,172.16.1.3,172.16.1.4"` > > -ovn-nbctl set logical_switch foo load_balancer=$uuid > > +ovn-nbctl lb-add lb1 30.0.0.1 "172.16.1.2,172.16.1.3,172.16.1.4" > > +ovn-nbctl ls-lb-add foo lb1 > > > > # Create another load-balancer with another VIP. > > -uuid=`ovn-nbctl create load_balancer > vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` > > -ovn-nbctl add logical_switch foo load_balancer $uuid > > +lb2_uuid=`ovn-nbctl create load_balancer name=lb2 > vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` > > +ovn-nbctl ls-lb-add foo lb2 > > > > # Config OVN load-balancer with another VIP (this time with ports). > > -ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='" > 172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' > > +ovn-nbctl set load_balancer $lb2_uuid vips:'"30.0.0.2:8000"'='" > 172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' > > > > # Wait for ovn-controller to catch up. > > ovn-nbctl --wait=hv sync > > @@ -1157,6 +1157,82 @@ > tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(s > > > tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > ]) > > > > +# Configure selection_fields. > > +ovn-nbctl set load_balancer $lb2_uuid > selection_fields="ip_src,ip_dst,tp_src,tp_dst" > > +OVS_WAIT_UNTIL([ > > + test $(ovs-ofctl dump-groups br-int | \ > > + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" > -c) -eq 2 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +dnl Test load-balancing that includes L4 ports in NAT. > > +for i in `seq 1 20`; do > > + echo Request $i > > + NS_CHECK_EXEC([foo1], [wget 30.0.0.2:8000 -t 5 -T 1 > --retry-connrefused -v -o wget$i.log]) > > +done > > + > > +dnl Each server should have at least one connection. > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.3,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > > +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +echo "foo" > foo > > +for i in `seq 1 20`; do > > + echo Request $i > > + ip netns exec foo1 nc -p 30000 30.0.0.2 8000 < foo > > +done > > + > > +dnl Only one backend should be chosen. > > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 -c) > -eq 1]) > > + > > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src" > > +OVS_WAIT_UNTIL([ > > + test $(ovs-ofctl dump-groups br-int | \ > > + grep "selection_method=hash,fields=ip_src" -c) -eq 2 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > +for i in `seq 1 20`; do > > + echo Request $i > > + ip netns exec foo1 nc 30.0.0.2 8000 < foo > > +done > > + > > +dnl Only one backend should be chosen as eth_src and ip_src is fixed. > > +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > 172.16.1.2 -c) > > +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > 172.16.1.3 -c) > > +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > 172.16.1.4 -c) > > + > > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > 172.16.1 -c) -ne 0]) > > + > > +if [[ "$bar1_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar1_ct -eq 0]) > > +fi > > + > > +if [[ "$bar2_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar2_ct -eq 0]) > > +fi > > + > > +if [[ "$bar3_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar3_ct -eq 0]) > > +fi > > > > OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > > > @@ -1246,11 +1322,11 @@ uuid=`ovn-nbctl create load_balancer > vips:\"fd03::1\"=\"fd02::2,fd02::3,fd02::4 > > ovn-nbctl set logical_switch foo load_balancer=$uuid > > > > # Create another load-balancer with another VIP. > > -uuid=`ovn-nbctl create load_balancer > vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` > > -ovn-nbctl add logical_switch foo load_balancer $uuid > > +lb2_uuid=`ovn-nbctl create load_balancer > vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` > > +ovn-nbctl add logical_switch foo load_balancer $lb2_uuid > > > > # Config OVN load-balancer with another VIP (this time with ports). > > -ovn-nbctl set load_balancer $uuid > vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@ > :>@:80,@<:@fd02::4@:>@:80"' > > +ovn-nbctl set load_balancer $lb2_uuid > vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@ > :>@:80,@<:@fd02::4@:>@:80"' > > > > # Wait for ovn-controller to catch up. > > ovn-nbctl --wait=hv sync > > @@ -1304,7 +1380,83 @@ > tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd > > > tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > ]) > > > > +# Configure selection_fields. > > +ovn-nbctl set load_balancer $lb2_uuid > selection_fields="ip_src,ip_dst,tp_src,tp_dst" > > +OVS_WAIT_UNTIL([ > > + test $(ovs-ofctl dump-groups br-int | \ > > + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" > -c) -eq 2 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +dnl Test load-balancing that includes L4 ports in NAT. > > +for i in `seq 1 20`; do > > + echo Request $i > > + NS_CHECK_EXEC([foo1], [wget http://[[fd03::2]]:8000 -t 5 -T 1 > --retry-connrefused -v -o wget$i.log]) > > +done > > + > > +dnl Each server should have at least one connection. > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep > -v fe80 | \ > > +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl > > > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::2,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::3,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > > +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > + > > +echo "foo" > foo > > +for i in `seq 1 20`; do > > + echo Request $i > > + ip netns exec foo1 nc -6 -p 30000 fd03::2 8000 < foo > > +done > > + > > +# Only one backend should be chosen. Since the source port is fixed, > > +# there should be only one conntrack entry. > > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 -c) -eq > 1]) > > + > > +ovn-nbctl set load_balancer $lb2_uuid selection_fields="eth_src,ip_src" > > +OVS_WAIT_UNTIL([ > > + test $(ovs-ofctl dump-groups br-int | \ > > + grep "selection_method=hash,fields(eth_src,ip_src)" -c) -eq 2 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) > > +for i in `seq 1 20`; do > > + echo Request $i > > + ip netns exec foo1 nc -6 fd03::2 8000 < foo > > +done > > > > +dnl Only one backend should be chosen as eth_src and ip_src is fixed. > > +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02::2 > -c) > > +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > fd02::3 -c) > > +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep > fd02::4 -c) > > + > > +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep > fd02 -c) -ne 0]) > > + > > +if [[ "$bar1_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar1_ct -eq 0]) > > +fi > > + > > +if [[ "$bar2_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar2_ct -eq 0]) > > +fi > > + > > +if [[ "$bar3_ct" == "20" ]]; then > > + AT_CHECK([test $bar1_ct -eq 20]) > > + AT_CHECK([test $bar2_ct -eq 0]) > > + AT_CHECK([test $bar3_ct -eq 0]) > > +else > > + AT_CHECK([test $bar3_ct -eq 0]) > > +fi > > OVS_APP_EXIT_AND_WAIT([ovn-controller]) > > > > as ovn-sb > > @@ -3448,7 +3600,7 @@ service_monitor | sed '/^$/d' | grep online | wc > -l`]) > > > > OVS_WAIT_UNTIL( > > [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep > "ip4.dst == 10.0.0.10" > lflows.txt > > - test 1 = `cat lflows.txt | grep "ct_lb(10.0.0.3:80,20.0.0.3:80)" > | wc -l`] > > + test 1 = `cat lflows.txt | grep "ct_lb(backends=10.0.0.3:80, > 20.0.0.3:80)" | wc -l`] > > ) > > > > # From sw0-p2 send traffic to vip - 10.0.0.10 > > @@ -3474,7 +3626,7 @@ service_monitor logical_port=sw0-p1 | sed '/^$/d' > | grep offline | wc -l`]) > > > > OVS_WAIT_UNTIL( > > [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep > "ip4.dst == 10.0.0.10" > lflows.txt > > - test 1 = `cat lflows.txt | grep "ct_lb(20.0.0.3:80)" | wc -l`] > > + test 1 = `cat lflows.txt | grep "ct_lb(backends=20.0.0.3:80)" | > wc -l`] > > ) > > > > ovs-appctl dpctl/flush-conntrack > > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
diff --git a/NEWS b/NEWS index c2b7945df..fea196d34 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,12 @@ OVN v20.03.0 - xx xxx xxxx - Added support for ECMP routes in OVN router. - Added IPv6 Prefix Delegation support in OVN. - OVN now uses OpenFlow 1.5. + - Added support to choose selection methods - dp_hash or + hash (with specified hash fields) for OVN load balancer + backend selection. This is incompatible with older versions. + Care should be taken while upgrading as the existing + load balancer traffic will be affected if ovn-controllers + are not stopped before uprading northd services. - OVN Interconnection: * Support for L3 interconnection of multiple OVN deployments with tunnels diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 42d45a74c..4a54abe17 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -259,6 +259,7 @@ struct ovnact_ct_lb { struct ovnact_ct_lb_dst *dsts; size_t n_dsts; uint8_t ltable; /* Logical table ID of next table. */ + char *hash_fields; }; struct ovnact_select_dst { diff --git a/lib/actions.c b/lib/actions.c index 021a376b4..c50615177 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx) struct ovnact_ct_lb_dst *dsts = NULL; size_t allocated_dsts = 0; size_t n_dsts = 0; + char *hash_fields = NULL; - if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { + if (lexer_match(ctx->lexer, LEX_T_LPAREN) && + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { + if (!lexer_match_id(ctx->lexer, "backends") || + !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "expecting backends"); + return; + } + + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { struct ovnact_ct_lb_dst dst; if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) { /* IPv6 address and port */ @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx) } dsts[n_dsts++] = dst; } + + if (lexer_match_id(ctx->lexer, "hash_fields")) { + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || + ctx->lexer->token.type != LEX_T_STRING || + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); + free(dsts); + return; + } + + hash_fields = xstrdup(ctx->lexer->token.s); + lexer_get(ctx->lexer); + lexer_get(ctx->lexer); + } } struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts); cl->ltable = ctx->pp->cur_ltable + 1; cl->dsts = dsts; cl->n_dsts = n_dsts; + cl->hash_fields = hash_fields; } static void @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) { ds_put_cstr(s, "ct_lb"); if (cl->n_dsts) { - ds_put_char(s, '('); + ds_put_cstr(s, "(backends="); for (size_t i = 0; i < cl->n_dsts; i++) { if (i) { - ds_put_cstr(s, ", "); + ds_put_char(s, ','); } const struct ovnact_ct_lb_dst *dst = &cl->dsts[i]; @@ -1056,7 +1080,13 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s) } } ds_put_char(s, ')'); + + if (cl->hash_fields) { + ds_chomp(s, ')'); + ds_put_format(s, "; hash_fields=\"%s\")", cl->hash_fields); + } } + ds_put_char(s, ';'); } @@ -1103,7 +1133,11 @@ encode_CT_LB(const struct ovnact_ct_lb *cl, : MFF_LOG_DNAT_ZONE - MFF_REG0; struct ds ds = DS_EMPTY_INITIALIZER; - ds_put_format(&ds, "type=select,selection_method=dp_hash"); + ds_put_format(&ds, "type=select,selection_method=%s", + cl->hash_fields ? "hash": "dp_hash"); + if (cl->hash_fields) { + ds_put_format(&ds, ",fields(%s)", cl->hash_fields); + } BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); @@ -1145,6 +1179,7 @@ static void ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb) { free(ct_lb->dsts); + free(ct_lb->hash_fields); } static void diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 3324d20f4..dd07f45d9 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -3112,7 +3112,7 @@ struct ovn_lb { struct hmap_node hmap_node; const struct nbrec_load_balancer *nlb; /* May be NULL. */ - + char *selection_fields; struct lb_vip *vips; size_t n_vips; }; @@ -3340,6 +3340,15 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs, n_vips++; } + if (lb->nlb->n_selection_fields) { + struct ds sel_fields = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) { + ds_put_format(&sel_fields, "%s,", lb->nlb->selection_fields[i]); + } + ds_chomp(&sel_fields, ','); + lb->selection_fields = ds_steal_cstr(&sel_fields); + } + return lb; } @@ -3358,13 +3367,15 @@ ovn_lb_destroy(struct ovn_lb *lb) free(lb->vips[i].backends); } free(lb->vips); + free(lb->selection_fields); } static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, - struct ds *action) + struct ds *action, + char *selection_fields) { if (lb_vip->health_check) { - ds_put_cstr(action, "ct_lb("); + ds_put_cstr(action, "ct_lb(backends="); size_t n_active_backends = 0; for (size_t k = 0; k < lb_vip->n_backends; k++) { @@ -3388,7 +3399,13 @@ static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip, ds_put_cstr(action, ");"); } } else { - ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips); + ds_put_format(action, "ct_lb(backends=%s);", lb_vip->backend_ips); + } + + if (selection_fields && selection_fields[0]) { + ds_chomp(action, ';'); + ds_chomp(action, ')'); + ds_put_format(action, "; hash_fields=\"%s\");", selection_fields); } } @@ -5664,7 +5681,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb) /* New connections in Ingress table. */ struct ds action = DS_EMPTY_INITIALIZER; - build_lb_vip_ct_lb_actions(lb_vip, &action); + build_lb_vip_ct_lb_actions(lb_vip, &action, lb->selection_fields); struct ds match = DS_EMPTY_INITIALIZER; ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, lb_vip->vip); @@ -9315,7 +9332,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, for (size_t j = 0; j < lb->n_vips; j++) { struct lb_vip *lb_vip = &lb->vips[j]; ds_clear(&actions); - build_lb_vip_ct_lb_actions(lb_vip, &actions); + build_lb_vip_ct_lb_actions(lb_vip, &actions, + lb->selection_fields); if (!sset_contains(&all_ips, lb_vip->vip)) { sset_add(&all_ips, lb_vip->vip); diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema index b2a046571..a06972aa0 100644 --- a/ovn-nb.ovsschema +++ b/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", - "version": "5.22.0", - "cksum": "384164739 25476", + "version": "5.23.0", + "cksum": "111023208 25806", "tables": { "NB_Global": { "columns": { @@ -179,6 +179,12 @@ "ip_port_mappings": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "selection_fields": { + "type": {"key": {"type": "string", + "enum": ["set", + ["eth_src", "eth_dst", "ip_src", "ip_dst", + "tp_src", "tp_dst"]]}, + "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, diff --git a/ovn-nb.xml b/ovn-nb.xml index af15c550a..95ee4c9e6 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -1513,6 +1513,33 @@ </p> </column> + <column name="selection_fields"> + <p> + OVN native load balancers are supported using the OpenFlow groups + of type <code>select</code>. OVS supports two selection methods: + <code>dp_hash</code> and <code>hash (with optional fields + specified)</code> in selecting the buckets of a group. + Please see the OVS documentation (man ovs-ofctl) + for more details on the selection methods. Each endpoint IP (and port + if set) is mapped to a bucket in the group flow. + </p> + + <p> + CMS can choose the <code>hash</code> selection method by setting the + selection fields in this column. <code>ovs-vswitchd</code> uses the + specified fields in generating the hash. + </p> + + <p> + <code>dp_hash</code> selection method uses the assistance of + datapath to calculate the hash and it is expected to be + faster than <code>hash</code> selection method. So CMS should take + this into consideration before using the <code>hash</code> method. + Please consult the OVS documentation and OVS sources for the + implementation details. + </p> + </column> + <group title="Common Columns"> <column name="external_ids"> See <em>External IDs</em> at the beginning of this document. diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index a0109ae94..57c9eedd5 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) # Delete the Load_Balancer_Health_Check @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`]) ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) # Create the Load_Balancer_Health_Check again. @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`]) ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) # Get the uuid of both the service_monitor @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([ ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) ]) # Set the service monitor for sw0-p1 to offline @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([ ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) # Set the service monitor for sw1-p1 to error @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([ ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \ | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) ]) # Add one more vip to lb1 @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`]) ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);) - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);) ]) # Set the service monitor for sw1-p1 to online @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([ ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) ]) # Associate lb1 to sw1 ovn-nbctl --wait=sb ls-lb-add sw1 lb1 ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000,20.0.0.3:80);) ]) # Now create lb2 same as lb1 but udp protocol. diff --git a/tests/ovn.at b/tests/ovn.at index f168b147f..e49042eeb 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -970,29 +970,42 @@ ct_lb(); encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat) has prereqs ip ct_lb(192.168.1.2:80, 192.168.1.3:80); + Syntax error at `192.168.1.2' expecting backends. +ct_lb(backends=192.168.1.2:80,192.168.1.3:80); encodes as group:1 uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) has prereqs ip -ct_lb(192.168.1.2, 192.168.1.3, ); - formats as ct_lb(192.168.1.2, 192.168.1.3); +ct_lb(backends=192.168.1.2, 192.168.1.3, ); + formats as ct_lb(backends=192.168.1.2,192.168.1.3); encodes as group:2 uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15])) has prereqs ip -ct_lb(fd0f::2, fd0f::3, ); - formats as ct_lb(fd0f::2, fd0f::3); +ct_lb(backends=fd0f::2, fd0f::3, ); + formats as ct_lb(backends=fd0f::2,fd0f::3); encodes as group:3 uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) has prereqs ip -ct_lb(192.168.1.2:); +ct_lb(backends=192.168.1.2:); Syntax error at `)' expecting port number. -ct_lb(192.168.1.2:123456); +ct_lb(backends=192.168.1.2:123456); Syntax error at `123456' expecting port number. -ct_lb(foo); +ct_lb(backends=foo); Syntax error at `foo' expecting IP address. -ct_lb([192.168.1.2]); +ct_lb(backends=[192.168.1.2]); Syntax error at `192.168.1.2' expecting IPv6 address. +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields=eth_src,eth_dst,ip_src); + Syntax error at `eth_src' invalid hash_fields. +ct_lb(backends=192.168.1.2:80,192.168.1.3:80; hash_fields="eth_src,eth_dst,ip_src"); + encodes as group:4 + uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15])) + has prereqs ip +ct_lb(backends=fd0f::2,fd0f::3; hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst"); + encodes as group:5 + uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15])) + has prereqs ip + # ct_next ct_next; encodes as ct(table=19,zone=NXM_NX_REG13[0..15]) @@ -1520,13 +1533,13 @@ handle_svc_check(reg0); # select reg9[16..31] = select(1=50, 2=100, 3, ); formats as reg9[16..31] = select(1=50, 2=100, 3=100); - encodes as group:4 - uses group: id(4), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) + encodes as group:6 + uses group: id(6), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19)) reg0 = select(1, 2); formats as reg0 = select(1=100, 2=100); - encodes as group:5 - uses group: id(5), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) + encodes as group:7 + uses group: id(7), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19)) reg0 = select(1=, 2); Syntax error at `,' expecting weight. @@ -1542,12 +1555,12 @@ reg0[0..14] = select(1, 2, 3); cannot use 15-bit field reg0[0..14] for "select", which requires at least 16 bits. fwd_group(liveness="true", childports="eth0", "lsp1"); - encodes as group:6 - uses group: id(6), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) + encodes as group:8 + uses group: id(8), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) fwd_group(childports="eth0", "lsp1"); - encodes as group:7 - uses group: id(7), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) + encodes as group:9 + uses group: id(9), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64)) fwd_group(childports=eth0); Syntax error at `eth0' expecting logical switch port. @@ -18002,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`]) ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);) + table=6 (lr_in_dnat ), priority=120 , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80,20.0.0.3:80);) ]) # get the svc monitor mac. diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 117f1e835..9a5ef1ec3 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -1095,15 +1095,15 @@ ovn-nbctl lsp-add bar bar3 \ -- lsp-set-addresses bar3 "f0:00:0f:01:02:05 172.16.1.4" # Config OVN load-balancer with a VIP. -uuid=`ovn-nbctl create load_balancer vips:30.0.0.1="172.16.1.2,172.16.1.3,172.16.1.4"` -ovn-nbctl set logical_switch foo load_balancer=$uuid +ovn-nbctl lb-add lb1 30.0.0.1 "172.16.1.2,172.16.1.3,172.16.1.4" +ovn-nbctl ls-lb-add foo lb1 # Create another load-balancer with another VIP. -uuid=`ovn-nbctl create load_balancer vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` -ovn-nbctl add logical_switch foo load_balancer $uuid +lb2_uuid=`ovn-nbctl create load_balancer name=lb2 vips:30.0.0.3="172.16.1.2,172.16.1.3,172.16.1.4"` +ovn-nbctl ls-lb-add foo lb2 # Config OVN load-balancer with another VIP (this time with ports). -ovn-nbctl set load_balancer $uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' +ovn-nbctl set load_balancer $lb2_uuid vips:'"30.0.0.2:8000"'='"172.16.1.2:80,172.16.1.3:80,172.16.1.4:80"' # Wait for ovn-controller to catch up. ovn-nbctl --wait=hv sync @@ -1157,6 +1157,82 @@ tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(s tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) ]) +# Configure selection_fields. +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" +OVS_WAIT_UNTIL([ + test $(ovs-ofctl dump-groups br-int | \ + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +dnl Test load-balancing that includes L4 ports in NAT. +for i in `seq 1 20`; do + echo Request $i + NS_CHECK_EXEC([foo1], [wget 30.0.0.2:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) +done + +dnl Each server should have at least one connection. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.2) | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.2,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.3,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +tcp,orig=(src=192.168.1.2,dst=30.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=172.16.1.4,dst=192.168.1.2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +echo "foo" > foo +for i in `seq 1 20`; do + echo Request $i + ip netns exec foo1 nc -p 30000 30.0.0.2 8000 < foo +done + +dnl Only one backend should be chosen. +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 -c) -eq 1]) + +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src" +OVS_WAIT_UNTIL([ + test $(ovs-ofctl dump-groups br-int | \ + grep "selection_method=hash,fields=ip_src" -c) -eq 2 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +for i in `seq 1 20`; do + echo Request $i + ip netns exec foo1 nc 30.0.0.2 8000 < foo +done + +dnl Only one backend should be chosen as eth_src and ip_src is fixed. +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.2 -c) +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.3 -c) +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1.4 -c) + +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep 172.16.1 -c) -ne 0]) + +if [[ "$bar1_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar1_ct -eq 0]) +fi + +if [[ "$bar2_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar2_ct -eq 0]) +fi + +if [[ "$bar3_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar3_ct -eq 0]) +fi OVS_APP_EXIT_AND_WAIT([ovn-controller]) @@ -1246,11 +1322,11 @@ uuid=`ovn-nbctl create load_balancer vips:\"fd03::1\"=\"fd02::2,fd02::3,fd02::4 ovn-nbctl set logical_switch foo load_balancer=$uuid # Create another load-balancer with another VIP. -uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` -ovn-nbctl add logical_switch foo load_balancer $uuid +lb2_uuid=`ovn-nbctl create load_balancer vips:\"fd03::3\"=\"fd02::2,fd02::3,fd02::4\"` +ovn-nbctl add logical_switch foo load_balancer $lb2_uuid # Config OVN load-balancer with another VIP (this time with ports). -ovn-nbctl set load_balancer $uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' +ovn-nbctl set load_balancer $lb2_uuid vips:'"[[fd03::2]]:8000"'='"@<:@fd02::2@:>@:80,@<:@fd02::3@:>@:80,@<:@fd02::4@:>@:80"' # Wait for ovn-controller to catch up. ovn-nbctl --wait=hv sync @@ -1304,7 +1380,83 @@ tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) ]) +# Configure selection_fields. +ovn-nbctl set load_balancer $lb2_uuid selection_fields="ip_src,ip_dst,tp_src,tp_dst" +OVS_WAIT_UNTIL([ + test $(ovs-ofctl dump-groups br-int | \ + grep "selection_method=hash,fields(ip_src,ip_dst,tcp_src,tcp_dst)" -c) -eq 2 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +dnl Test load-balancing that includes L4 ports in NAT. +for i in `seq 1 20`; do + echo Request $i + NS_CHECK_EXEC([foo1], [wget http://[[fd03::2]]:8000 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) +done + +dnl Each server should have at least one connection. +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd03::2) | grep -v fe80 | \ +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::2,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::3,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +tcp,orig=(src=fd01::2,dst=fd03::2,sport=<cleared>,dport=<cleared>),reply=(src=fd02::4,dst=fd01::2,sport=<cleared>,dport=<cleared>),zone=<cleared>,protoinfo=(state=<cleared>) +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + +echo "foo" > foo +for i in `seq 1 20`; do + echo Request $i + ip netns exec foo1 nc -6 -p 30000 fd03::2 8000 < foo +done + +# Only one backend should be chosen. Since the source port is fixed, +# there should be only one conntrack entry. +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 -c) -eq 1]) + +ovn-nbctl set load_balancer $lb2_uuid selection_fields="eth_src,ip_src" +OVS_WAIT_UNTIL([ + test $(ovs-ofctl dump-groups br-int | \ + grep "selection_method=hash,fields(eth_src,ip_src)" -c) -eq 2 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +for i in `seq 1 20`; do + echo Request $i + ip netns exec foo1 nc -6 fd03::2 8000 < foo +done +dnl Only one backend should be chosen as eth_src and ip_src is fixed. +bar1_ct=$(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02::2 -c) +bar2_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::3 -c) +bar3_ct=$(ovs-appctl dpctl/dump-conntrack | grep 30.0.0.2 | grep fd02::4 -c) + +AT_CHECK([test $(ovs-appctl dpctl/dump-conntrack | grep fd03::2 | grep fd02 -c) -ne 0]) + +if [[ "$bar1_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar1_ct -eq 0]) +fi + +if [[ "$bar2_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar2_ct -eq 0]) +fi + +if [[ "$bar3_ct" == "20" ]]; then + AT_CHECK([test $bar1_ct -eq 20]) + AT_CHECK([test $bar2_ct -eq 0]) + AT_CHECK([test $bar3_ct -eq 0]) +else + AT_CHECK([test $bar3_ct -eq 0]) +fi OVS_APP_EXIT_AND_WAIT([ovn-controller]) as ovn-sb @@ -3448,7 +3600,7 @@ service_monitor | sed '/^$/d' | grep online | wc -l`]) OVS_WAIT_UNTIL( [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt - test 1 = `cat lflows.txt | grep "ct_lb(10.0.0.3:80,20.0.0.3:80)" | wc -l`] + test 1 = `cat lflows.txt | grep "ct_lb(backends=10.0.0.3:80,20.0.0.3:80)" | wc -l`] ) # From sw0-p2 send traffic to vip - 10.0.0.10 @@ -3474,7 +3626,7 @@ service_monitor logical_port=sw0-p1 | sed '/^$/d' | grep offline | wc -l`]) OVS_WAIT_UNTIL( [ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 | grep "ip4.dst == 10.0.0.10" > lflows.txt - test 1 = `cat lflows.txt | grep "ct_lb(20.0.0.3:80)" | wc -l`] + test 1 = `cat lflows.txt | grep "ct_lb(backends=20.0.0.3:80)" | wc -l`] ) ovs-appctl dpctl/flush-conntrack