diff mbox series

[ovs-dev,ovn,2/2] Support selection fields in load balancer.

Message ID 20200430172028.1978594-1-numans@ovn.org
State Superseded
Headers show
Series [ovs-dev,ovn,1/2] controller: Use OpenFlow version 1.5 | expand

Commit Message

Numan Siddique April 30, 2020, 5:20 p.m. UTC
From: Numan Siddique <numans@ovn.org>

This patch add a new column 'selection_fields' in Load_Balancer
table in NB DB. CMS can define a set of packet headers to use
while selecting a backend. If this column is set, OVN will add the
flow in group table with selection method as 'hash' with the set fields.
Otherwise it will use the default 'dp_hash' selection method.

If a load balancer is configured with the selection_fields as
selection_fields    : [ip_dst, ip_src, tp_dst, tp_src]

then with this patch, the modified ct_lb action will look like
 - ct_lb(backends=IP1:P1-IP2:P1, hash_fields="ip_dst,ip_src,tp_dst,tp_src");

And the OF flow will look like
 - group_id=2,type=select,selection_method=hash,
   fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(....

Signed-off-by: Numan Siddique <numans@ovn.org>
---
 include/ovn/actions.h |  1 +
 lib/actions.c         | 45 +++++++++++++++++++++++++++++++++-----
 northd/ovn-northd.c   | 47 ++++++++++++++++++++++++++++++++-------
 ovn-nb.ovsschema      | 10 +++++++--
 ovn-nb.xml            |  7 ++++++
 tests/ovn-northd.at   | 24 ++++++++++----------
 tests/ovn.at          | 51 ++++++++++++++++++++++++++++---------------
 7 files changed, 139 insertions(+), 46 deletions(-)

Comments

0-day Robot April 30, 2020, 6:05 p.m. UTC | #1
Bleep bloop.  Greetings Numan Siddique, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 95 characters long (recommended limit is 79)
#137 FILE: lib/actions.c:1135:
    ds_put_format(&ds, "type=select,selection_method=%s", cl->hash_fields ? "hash": "dp_hash");

Lines checked: 512, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
Mark Michelson May 1, 2020, 8:17 p.m. UTC | #2
I think you should follow this up with some tests.

On 4/30/20 1:20 PM, numans@ovn.org wrote:
> From: Numan Siddique <numans@ovn.org>
> 
> This patch add a new column 'selection_fields' in Load_Balancer
> table in NB DB. CMS can define a set of packet headers to use
> while selecting a backend. If this column is set, OVN will add the
> flow in group table with selection method as 'hash' with the set fields.
> Otherwise it will use the default 'dp_hash' selection method.
> 
> If a load balancer is configured with the selection_fields as
> selection_fields    : [ip_dst, ip_src, tp_dst, tp_src]
> 
> then with this patch, the modified ct_lb action will look like
>   - ct_lb(backends=IP1:P1-IP2:P1, hash_fields="ip_dst,ip_src,tp_dst,tp_src");

Hm, I'm trying not to bikeshed here, but the syntax of using '-' as the 
separator for IP:port is unfortunate. It makes it seem like it 
represents a range of backends instead of distinct backends. Could it 
maybe be changed to:

ct_lb(backends=IP1:P1,IP2:P2; hash_fields="ip_dst,ip_src,tp_dst,tp_src");

?

See below for a couple other minor critiques.

> 
> And the OF flow will look like
>   - group_id=2,type=select,selection_method=hash,
>     fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(....
> 
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>   include/ovn/actions.h |  1 +
>   lib/actions.c         | 45 +++++++++++++++++++++++++++++++++-----
>   northd/ovn-northd.c   | 47 ++++++++++++++++++++++++++++++++-------
>   ovn-nb.ovsschema      | 10 +++++++--
>   ovn-nb.xml            |  7 ++++++
>   tests/ovn-northd.at   | 24 ++++++++++----------
>   tests/ovn.at          | 51 ++++++++++++++++++++++++++++---------------
>   7 files changed, 139 insertions(+), 46 deletions(-)
> 
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 42d45a74c..4a54abe17 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -259,6 +259,7 @@ struct ovnact_ct_lb {
>       struct ovnact_ct_lb_dst *dsts;
>       size_t n_dsts;
>       uint8_t ltable;             /* Logical table ID of next table. */
> +    char *hash_fields;
>   };
>   
>   struct ovnact_select_dst {
> diff --git a/lib/actions.c b/lib/actions.c
> index 021a376b4..6d840a73f 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx)
>       struct ovnact_ct_lb_dst *dsts = NULL;
>       size_t allocated_dsts = 0;
>       size_t n_dsts = 0;
> +    char *hash_fields = NULL;
>   
> -    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> -        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN) &&
> +        !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +        if (!lexer_match_id(ctx->lexer, "backends") ||
> +            !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> +            lexer_syntax_error(ctx->lexer, "expecting backends");
> +            return;
> +        }
> +
> +        while (!lexer_match(ctx->lexer, LEX_T_COMMA) &&
> +               !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
>               struct ovnact_ct_lb_dst dst;
>               if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
>                   /* IPv6 address and port */
> @@ -1012,7 +1021,7 @@ parse_ct_lb_action(struct action_context *ctx)
>                       }
>                   }
>               }
> -            lexer_match(ctx->lexer, LEX_T_COMMA);
> +            lexer_match(ctx->lexer, LEX_T_HYPHEN);
>   
>               /* Append to dsts. */
>               if (n_dsts >= allocated_dsts) {
> @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx)
>               }
>               dsts[n_dsts++] = dst;
>           }
> +
> +        if (lexer_match_id(ctx->lexer, "hash_fields")) {
> +            if (!lexer_match(ctx->lexer, LEX_T_EQUALS) ||
> +                ctx->lexer->token.type != LEX_T_STRING ||
> +                lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) {
> +                lexer_syntax_error(ctx->lexer, "invalid hash_fields");
> +                free(dsts);
> +                return;
> +            }
> +
> +            hash_fields = xstrdup(ctx->lexer->token.s);
> +            lexer_get(ctx->lexer);
> +            lexer_get(ctx->lexer);
> +        }
>       }
>   
>       struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
>       cl->ltable = ctx->pp->cur_ltable + 1;
>       cl->dsts = dsts;
>       cl->n_dsts = n_dsts;
> +    cl->hash_fields = hash_fields;
>   }
>   
>   static void
> @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
>   {
>       ds_put_cstr(s, "ct_lb");
>       if (cl->n_dsts) {
> -        ds_put_char(s, '(');
> +        ds_put_cstr(s, "(backends=");
>           for (size_t i = 0; i < cl->n_dsts; i++) {
>               if (i) {
> -                ds_put_cstr(s, ", ");
> +                ds_put_char(s, '-');
>               }
>   
>               const struct ovnact_ct_lb_dst *dst = &cl->dsts[i];
> @@ -1057,6 +1081,11 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
>           }
>           ds_put_char(s, ')');
>       }
> +
> +    if (cl->hash_fields) {
> +        ds_chomp(s, ')');
> +        ds_put_format(s, ", hash_fields=\"%s\")", cl->hash_fields);
> +    }
>       ds_put_char(s, ';');
>   }
>   
> @@ -1103,7 +1132,10 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
>                               : MFF_LOG_DNAT_ZONE - MFF_REG0;
>   
>       struct ds ds = DS_EMPTY_INITIALIZER;
> -    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> +    ds_put_format(&ds, "type=select,selection_method=%s", cl->hash_fields ? "hash": "dp_hash");
> +    if (cl->hash_fields) {
> +        ds_put_format(&ds, ",fields(%s)", cl->hash_fields);
> +    }
>   
>       BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
>       BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> @@ -1145,6 +1177,7 @@ static void
>   ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
>   {
>       free(ct_lb->dsts);
> +    free(ct_lb->hash_fields);
>   }
>   
>   static void
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index e31794c4b..e18dd03df 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -3101,7 +3101,7 @@ struct ovn_lb {
>       struct hmap_node hmap_node;
>   
>       const struct nbrec_load_balancer *nlb; /* May be NULL. */
> -
> +    char *selection_fields;
>       struct lb_vip *vips;
>       size_t n_vips;
>   };
> @@ -3214,6 +3214,18 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs,
>           lb->vips[n_vips].addr_family = addr_family;
>           lb->vips[n_vips].backend_ips = xstrdup(node->value);
>   
> +        char *ps1 = lb->vips[n_vips].backend_ips;
> +        char *ps2 = ps1;
> +        /* Replace all occurances of ',' with '-'. */
> +        while (ps2) {
> +            ps2 = strstr(ps1, ",");

A minor improvement here would be to use strchr() instead of strstr().

Optionally, if you don't mind doing an assignment in a while statement, 
you could write this as:

while ((ps2 = strchr(ps1, ','))) {
     *ps2 = '-';
     ps2++;
     ps1 = ps2;
}

> +            if (ps2) {
> +                *ps2 = '-';
> +                ps2++;
> +                ps1 = ps2;
> +            }
> +        }
> +
>           struct nbrec_load_balancer_health_check *lb_health_check = NULL;
>           if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp")) {
>               if (nbrec_lb->n_health_check > 0) {
> @@ -3329,6 +3341,16 @@ ovn_lb_create(struct northd_context *ctx, struct hmap *lbs,
>           n_vips++;
>       }
>   
> +    if (lb->nlb->n_selection_fields) {
> +        struct ds sel_fields = DS_EMPTY_INITIALIZER;
> +        for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) {
> +            ds_put_format(&sel_fields, "%s,", lb->nlb->selection_fields[i]);
> +        }
> +        ds_chomp(&sel_fields, ',');
> +        lb->selection_fields = ds_steal_cstr(&sel_fields);
> +        ds_destroy(&sel_fields);

ds_destroy() is unnecessary since you call ds_steal_cstr()

> +    }
> +
>       return lb;
>   }
>   
> @@ -3347,13 +3369,15 @@ ovn_lb_destroy(struct ovn_lb *lb)
>           free(lb->vips[i].backends);
>       }
>       free(lb->vips);
> +    free(lb->selection_fields);
>   }
>   
>   static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
> -                                       struct ds *action)
> +                                       struct ds *action,
> +                                       char *selection_fields)
>   {
>       if (lb_vip->health_check) {
> -        ds_put_cstr(action, "ct_lb(");
> +        ds_put_cstr(action, "ct_lb(backends=");
>   
>           size_t n_active_backends = 0;
>           for (size_t k = 0; k < lb_vip->n_backends; k++) {
> @@ -3365,7 +3389,7 @@ static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
>               }
>   
>               n_active_backends++;
> -            ds_put_format(action, "%s:%"PRIu16",",
> +            ds_put_format(action, "%s:%"PRIu16"-",
>               backend->ip, backend->port);
>           }
>   
> @@ -3373,11 +3397,17 @@ static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
>               ds_clear(action);
>               ds_put_cstr(action, "drop;");
>           } else {
> -            ds_chomp(action, ',');
> +            ds_chomp(action, '-');
>               ds_put_cstr(action, ");");
>           }
>       } else {
> -        ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips);
> +        ds_put_format(action, "ct_lb(backends=%s);", lb_vip->backend_ips);
> +    }
> +
> +    if (selection_fields && selection_fields[0]) {
> +        ds_chomp(action, ';');
> +        ds_chomp(action, ')');
> +        ds_put_format(action, ", hash_fields=\"%s\");", selection_fields);
>       }
>   }
>   
> @@ -5650,7 +5680,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb)
>   
>           /* New connections in Ingress table. */
>           struct ds action = DS_EMPTY_INITIALIZER;
> -        build_lb_vip_ct_lb_actions(lb_vip, &action);
> +        build_lb_vip_ct_lb_actions(lb_vip, &action, lb->selection_fields);
>   
>           struct ds match = DS_EMPTY_INITIALIZER;
>           ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, lb_vip->vip);
> @@ -9301,7 +9331,8 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>               for (size_t j = 0; j < lb->n_vips; j++) {
>                   struct lb_vip *lb_vip = &lb->vips[j];
>                   ds_clear(&actions);
> -                build_lb_vip_ct_lb_actions(lb_vip, &actions);
> +                build_lb_vip_ct_lb_actions(lb_vip, &actions,
> +                                           lb->selection_fields);
>   
>                   if (!sset_contains(&all_ips, lb_vip->vip)) {
>                       sset_add(&all_ips, lb_vip->vip);
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b2a046571..a06972aa0 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>   {
>       "name": "OVN_Northbound",
> -    "version": "5.22.0",
> -    "cksum": "384164739 25476",
> +    "version": "5.23.0",
> +    "cksum": "111023208 25806",
>       "tables": {
>           "NB_Global": {
>               "columns": {
> @@ -179,6 +179,12 @@
>                   "ip_port_mappings": {
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}},
> +                "selection_fields": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set",
> +                                ["eth_src", "eth_dst", "ip_src", "ip_dst",
> +                                 "tp_src", "tp_dst"]]},
> +                             "min": 0, "max": "unlimited"}},
>                   "external_ids": {
>                       "type": {"key": "string", "value": "string",
>                                "min": 0, "max": "unlimited"}}},
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index af15c550a..243a25bce 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1513,6 +1513,13 @@
>         </p>
>       </column>
>   
> +    <column name="selection_fields">
> +      Set of packet header fields to use for selecting a backend to
> +      load balance the VIP. This can be used if CMS desires that the
> +      backend selection is consistent and deterministic for a given
> +      set of packet header fields.
> +    </column>
> +
>       <group title="Common Columns">
>         <column name="external_ids">
>           See <em>External IDs</em> at the beginning of this document.
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 8cc3f7002..c665db89b 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   # Delete the Load_Balancer_Health_Check
> @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor |  wc -l`])
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   # Create the Load_Balancer_Health_Check again.
> @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`])
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   # Get the uuid of both the service_monitor
> @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
>   ])
>   
>   # Set the service monitor for sw0-p1 to offline
> @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   # Set the service monitor for sw1-p1 to error
> @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([
>   ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \
>   | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
>   ])
>   
>   # Add one more vip to lb1
> @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`])
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);)
>   ])
>   
>   # Set the service monitor for sw1-p1 to online
> @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
>   ])
>   
>   # Associate lb1 to sw1
>   ovn-nbctl --wait=sb ls-lb-add sw1 lb1
>   ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
>   ])
>   
>   # Now create lb2 same as lb1 but udp protocol.
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7befc8224..729bc1853 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -970,29 +970,44 @@ ct_lb();
>       encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat)
>       has prereqs ip
>   ct_lb(192.168.1.2:80, 192.168.1.3:80);
> +    Syntax error at `192.168.1.2' expecting backends.
> +ct_lb(backends=192.168.1.2:80, 192.168.1.3:80);
> +    Syntax error at `192.168.1.3' expecting `;'.
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80);
>       encodes as group:1
>       uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
>       has prereqs ip
> -ct_lb(192.168.1.2, 192.168.1.3, );
> -    formats as ct_lb(192.168.1.2, 192.168.1.3);
> +ct_lb(backends=192.168.1.2- 192.168.1.3- );
> +    formats as ct_lb(backends=192.168.1.2-192.168.1.3);
>       encodes as group:2
>       uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15]))
>       has prereqs ip
> -ct_lb(fd0f::2, fd0f::3, );
> -    formats as ct_lb(fd0f::2, fd0f::3);
> +ct_lb(backends=fd0f::2- fd0f::3- );
> +    formats as ct_lb(backends=fd0f::2-fd0f::3);
>       encodes as group:3
>       uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
>       has prereqs ip
>   
> -ct_lb(192.168.1.2:);
> +ct_lb(backends=192.168.1.2:);
>       Syntax error at `)' expecting port number.
> -ct_lb(192.168.1.2:123456);
> +ct_lb(backends=192.168.1.2:123456);
>       Syntax error at `123456' expecting port number.
> -ct_lb(foo);
> +ct_lb(backends=foo);
>       Syntax error at `foo' expecting IP address.
> -ct_lb([192.168.1.2]);
> +ct_lb(backends=[192.168.1.2]);
>       Syntax error at `192.168.1.2' expecting IPv6 address.
>   
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80, hash_fields=eth_src,eth_dst,ip_src);
> +    Syntax error at `eth_src' invalid hash_fields.
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80, hash_fields="eth_src,eth_dst,ip_src");
> +    encodes as group:4
> +    uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> +    has prereqs ip
> +ct_lb(backends=fd0f::2-fd0f::3, hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst");
> +    encodes as group:5
> +    uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> +    has prereqs ip
> +
>   # ct_next
>   ct_next;
>       encodes as ct(table=19,zone=NXM_NX_REG13[0..15])
> @@ -1520,13 +1535,13 @@ handle_svc_check(reg0);
>   # select
>   reg9[16..31] = select(1=50, 2=100, 3, );
>       formats as reg9[16..31] = select(1=50, 2=100, 3=100);
> -    encodes as group:4
> -    uses group: id(4), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> +    encodes as group:6
> +    uses group: id(6), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
>   
>   reg0 = select(1, 2);
>       formats as reg0 = select(1=100, 2=100);
> -    encodes as group:5
> -    uses group: id(5), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> +    encodes as group:7
> +    uses group: id(7), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
>   
>   reg0 = select(1=, 2);
>       Syntax error at `,' expecting weight.
> @@ -1542,12 +1557,12 @@ reg0[0..14] = select(1, 2, 3);
>       cannot use 15-bit field reg0[0..14] for "select", which requires at least 16 bits.
>   
>   fwd_group(liveness="true", childports="eth0", "lsp1");
> -    encodes as group:6
> -    uses group: id(6), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> +    encodes as group:8
> +    uses group: id(8), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
>   
>   fwd_group(childports="eth0", "lsp1");
> -    encodes as group:7
> -    uses group: id(7), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> +    encodes as group:9
> +    uses group: id(9), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
>   
>   fwd_group(childports=eth0);
>       Syntax error at `eth0' expecting logical switch port.
> @@ -18000,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`])
>   
>   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt
>   AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
> +  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
>   ])
>   
>   # get the svc monitor mac.
>
Han Zhou May 4, 2020, 3:51 a.m. UTC | #3
On Thu, Apr 30, 2020 at 10:20 AM <numans@ovn.org> wrote:
>
> From: Numan Siddique <numans@ovn.org>
>
> This patch add a new column 'selection_fields' in Load_Balancer
> table in NB DB. CMS can define a set of packet headers to use
> while selecting a backend. If this column is set, OVN will add the
> flow in group table with selection method as 'hash' with the set fields.
> Otherwise it will use the default 'dp_hash' selection method.
>
> If a load balancer is configured with the selection_fields as
> selection_fields    : [ip_dst, ip_src, tp_dst, tp_src]
>
> then with this patch, the modified ct_lb action will look like
>  - ct_lb(backends=IP1:P1-IP2:P1,
hash_fields="ip_dst,ip_src,tp_dst,tp_src");
>
The change is not backward compatible, so it is better warned in the NEWS
for upgrading considerations.

> And the OF flow will look like
>  - group_id=2,type=select,selection_method=hash,
>
 fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(....
>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>  include/ovn/actions.h |  1 +
>  lib/actions.c         | 45 +++++++++++++++++++++++++++++++++-----
>  northd/ovn-northd.c   | 47 ++++++++++++++++++++++++++++++++-------
>  ovn-nb.ovsschema      | 10 +++++++--
>  ovn-nb.xml            |  7 ++++++
>  tests/ovn-northd.at   | 24 ++++++++++----------
>  tests/ovn.at          | 51 ++++++++++++++++++++++++++++---------------
>  7 files changed, 139 insertions(+), 46 deletions(-)
>
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 42d45a74c..4a54abe17 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -259,6 +259,7 @@ struct ovnact_ct_lb {
>      struct ovnact_ct_lb_dst *dsts;
>      size_t n_dsts;
>      uint8_t ltable;             /* Logical table ID of next table. */
> +    char *hash_fields;
>  };
>
>  struct ovnact_select_dst {
> diff --git a/lib/actions.c b/lib/actions.c
> index 021a376b4..6d840a73f 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx)
>      struct ovnact_ct_lb_dst *dsts = NULL;
>      size_t allocated_dsts = 0;
>      size_t n_dsts = 0;
> +    char *hash_fields = NULL;
>
> -    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> -        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN) &&
> +        !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +        if (!lexer_match_id(ctx->lexer, "backends") ||
> +            !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> +            lexer_syntax_error(ctx->lexer, "expecting backends");
> +            return;
> +        }
> +
> +        while (!lexer_match(ctx->lexer, LEX_T_COMMA) &&
> +               !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
>              struct ovnact_ct_lb_dst dst;
>              if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
>                  /* IPv6 address and port */
> @@ -1012,7 +1021,7 @@ parse_ct_lb_action(struct action_context *ctx)
>                      }
>                  }
>              }
> -            lexer_match(ctx->lexer, LEX_T_COMMA);
> +            lexer_match(ctx->lexer, LEX_T_HYPHEN);
>
>              /* Append to dsts. */
>              if (n_dsts >= allocated_dsts) {
> @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx)
>              }
>              dsts[n_dsts++] = dst;
>          }
> +
> +        if (lexer_match_id(ctx->lexer, "hash_fields")) {
> +            if (!lexer_match(ctx->lexer, LEX_T_EQUALS) ||
> +                ctx->lexer->token.type != LEX_T_STRING ||
> +                lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) {
> +                lexer_syntax_error(ctx->lexer, "invalid hash_fields");
> +                free(dsts);
> +                return;
> +            }
> +
> +            hash_fields = xstrdup(ctx->lexer->token.s);
> +            lexer_get(ctx->lexer);
> +            lexer_get(ctx->lexer);
> +        }
>      }
>
>      struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
>      cl->ltable = ctx->pp->cur_ltable + 1;
>      cl->dsts = dsts;
>      cl->n_dsts = n_dsts;
> +    cl->hash_fields = hash_fields;
>  }
>
>  static void
> @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl,
struct ds *s)
>  {
>      ds_put_cstr(s, "ct_lb");
>      if (cl->n_dsts) {
> -        ds_put_char(s, '(');
> +        ds_put_cstr(s, "(backends=");
>          for (size_t i = 0; i < cl->n_dsts; i++) {
>              if (i) {
> -                ds_put_cstr(s, ", ");
> +                ds_put_char(s, '-');
>              }
>
>              const struct ovnact_ct_lb_dst *dst = &cl->dsts[i];
> @@ -1057,6 +1081,11 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct
ds *s)
>          }
>          ds_put_char(s, ')');
>      }
> +
> +    if (cl->hash_fields) {
> +        ds_chomp(s, ')');
> +        ds_put_format(s, ", hash_fields=\"%s\")", cl->hash_fields);
> +    }
>      ds_put_char(s, ';');
>  }
>
> @@ -1103,7 +1132,10 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
>                              : MFF_LOG_DNAT_ZONE - MFF_REG0;
>
>      struct ds ds = DS_EMPTY_INITIALIZER;
> -    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> +    ds_put_format(&ds, "type=select,selection_method=%s",
cl->hash_fields ? "hash": "dp_hash");
> +    if (cl->hash_fields) {
> +        ds_put_format(&ds, ",fields(%s)", cl->hash_fields);
> +    }
>
>      BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
>      BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> @@ -1145,6 +1177,7 @@ static void
>  ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
>  {
>      free(ct_lb->dsts);
> +    free(ct_lb->hash_fields);
>  }
>
>  static void
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index e31794c4b..e18dd03df 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -3101,7 +3101,7 @@ struct ovn_lb {
>      struct hmap_node hmap_node;
>
>      const struct nbrec_load_balancer *nlb; /* May be NULL. */
> -
> +    char *selection_fields;
>      struct lb_vip *vips;
>      size_t n_vips;
>  };
> @@ -3214,6 +3214,18 @@ ovn_lb_create(struct northd_context *ctx, struct
hmap *lbs,
>          lb->vips[n_vips].addr_family = addr_family;
>          lb->vips[n_vips].backend_ips = xstrdup(node->value);
>
> +        char *ps1 = lb->vips[n_vips].backend_ips;
> +        char *ps2 = ps1;
> +        /* Replace all occurances of ',' with '-'. */
> +        while (ps2) {
> +            ps2 = strstr(ps1, ",");
> +            if (ps2) {
> +                *ps2 = '-';
> +                ps2++;
> +                ps1 = ps2;
> +            }
> +        }
> +
>          struct nbrec_load_balancer_health_check *lb_health_check = NULL;
>          if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp")) {
>              if (nbrec_lb->n_health_check > 0) {
> @@ -3329,6 +3341,16 @@ ovn_lb_create(struct northd_context *ctx, struct
hmap *lbs,
>          n_vips++;
>      }
>
> +    if (lb->nlb->n_selection_fields) {
> +        struct ds sel_fields = DS_EMPTY_INITIALIZER;
> +        for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) {
> +            ds_put_format(&sel_fields, "%s,",
lb->nlb->selection_fields[i]);
> +        }
> +        ds_chomp(&sel_fields, ',');
> +        lb->selection_fields = ds_steal_cstr(&sel_fields);
> +        ds_destroy(&sel_fields);
> +    }
> +
>      return lb;
>  }
>
> @@ -3347,13 +3369,15 @@ ovn_lb_destroy(struct ovn_lb *lb)
>          free(lb->vips[i].backends);
>      }
>      free(lb->vips);
> +    free(lb->selection_fields);
>  }
>
>  static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
> -                                       struct ds *action)
> +                                       struct ds *action,
> +                                       char *selection_fields)
>  {
>      if (lb_vip->health_check) {
> -        ds_put_cstr(action, "ct_lb(");
> +        ds_put_cstr(action, "ct_lb(backends=");
>
>          size_t n_active_backends = 0;
>          for (size_t k = 0; k < lb_vip->n_backends; k++) {
> @@ -3365,7 +3389,7 @@ static void build_lb_vip_ct_lb_actions(struct
lb_vip *lb_vip,
>              }
>
>              n_active_backends++;
> -            ds_put_format(action, "%s:%"PRIu16",",
> +            ds_put_format(action, "%s:%"PRIu16"-",
>              backend->ip, backend->port);
>          }
>
> @@ -3373,11 +3397,17 @@ static void build_lb_vip_ct_lb_actions(struct
lb_vip *lb_vip,
>              ds_clear(action);
>              ds_put_cstr(action, "drop;");
>          } else {
> -            ds_chomp(action, ',');
> +            ds_chomp(action, '-');
>              ds_put_cstr(action, ");");
>          }
>      } else {
> -        ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips);
> +        ds_put_format(action, "ct_lb(backends=%s);",
lb_vip->backend_ips);
> +    }
> +
> +    if (selection_fields && selection_fields[0]) {
> +        ds_chomp(action, ';');
> +        ds_chomp(action, ')');
> +        ds_put_format(action, ", hash_fields=\"%s\");",
selection_fields);
>      }
>  }
>
> @@ -5650,7 +5680,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap
*lflows, struct ovn_lb *lb)
>
>          /* New connections in Ingress table. */
>          struct ds action = DS_EMPTY_INITIALIZER;
> -        build_lb_vip_ct_lb_actions(lb_vip, &action);
> +        build_lb_vip_ct_lb_actions(lb_vip, &action,
lb->selection_fields);
>
>          struct ds match = DS_EMPTY_INITIALIZER;
>          ds_put_format(&match, "ct.new && %s.dst == %s", ip_match,
lb_vip->vip);
> @@ -9301,7 +9331,8 @@ build_lrouter_flows(struct hmap *datapaths, struct
hmap *ports,
>              for (size_t j = 0; j < lb->n_vips; j++) {
>                  struct lb_vip *lb_vip = &lb->vips[j];
>                  ds_clear(&actions);
> -                build_lb_vip_ct_lb_actions(lb_vip, &actions);
> +                build_lb_vip_ct_lb_actions(lb_vip, &actions,
> +                                           lb->selection_fields);
>
>                  if (!sset_contains(&all_ips, lb_vip->vip)) {
>                      sset_add(&all_ips, lb_vip->vip);
> diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> index b2a046571..a06972aa0 100644
> --- a/ovn-nb.ovsschema
> +++ b/ovn-nb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Northbound",
> -    "version": "5.22.0",
> -    "cksum": "384164739 25476",
> +    "version": "5.23.0",
> +    "cksum": "111023208 25806",
>      "tables": {
>          "NB_Global": {
>              "columns": {
> @@ -179,6 +179,12 @@
>                  "ip_port_mappings": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}},
> +                "selection_fields": {
> +                    "type": {"key": {"type": "string",
> +                             "enum": ["set",
> +                                ["eth_src", "eth_dst", "ip_src",
"ip_dst",
> +                                 "tp_src", "tp_dst"]]},
> +                             "min": 0, "max": "unlimited"}},
>                  "external_ids": {
>                      "type": {"key": "string", "value": "string",
>                               "min": 0, "max": "unlimited"}}},
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index af15c550a..243a25bce 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1513,6 +1513,13 @@
>        </p>
>      </column>
>
> +    <column name="selection_fields">
> +      Set of packet header fields to use for selecting a backend to
> +      load balance the VIP. This can be used if CMS desires that the
> +      backend selection is consistent and deterministic for a given
> +      set of packet header fields.
> +    </column>
> +

This description may or may not be true, depending on dp_hash
implementation of different OVS versions and it can change in the future,
so I think it is better to document more details about what OVS group
selection method is used (dp_hash v.s. hash), and direct user to read the
OVS document to understand the implications more accurately. Performance
implications should also be mentioned here, but it is also better to let
user check OVS documentation for details.

>      <group title="Common Columns">
>        <column name="external_ids">
>          See <em>External IDs</em> at the beginning of this document.
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 8cc3f7002..c665db89b 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  # Delete the Load_Balancer_Health_Check
> @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list
service_monitor |  wc -l`])
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  # Create the Load_Balancer_Health_Check again.
> @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`])
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  # Get the uuid of both the service_monitor
> @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
);)
>  ])
>
>  # Set the service monitor for sw0-p1 to offline
> @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  # Set the service monitor for sw1-p1 to error
> @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([
>  ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \
>  | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
);)
>  ])
>
>  # Add one more vip to lb1
> @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`])
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000),
action=(ct_lb(backends=10.0.0.3:1000);)
>  ])
>
>  # Set the service monitor for sw1-p1 to online
> @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000),
action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
>  ])
>
>  # Associate lb1 to sw1
>  ovn-nbctl --wait=sb ls-lb-add sw1 lb1
>  ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.40 && tcp.dst == 1000),
action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
>  ])
>
>  # Now create lb2 same as lb1 but udp protocol.
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 7befc8224..729bc1853 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -970,29 +970,44 @@ ct_lb();
>      encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat)
>      has prereqs ip
>  ct_lb(192.168.1.2:80, 192.168.1.3:80);
> +    Syntax error at `192.168.1.2' expecting backends.
> +ct_lb(backends=192.168.1.2:80, 192.168.1.3:80);
> +    Syntax error at `192.168.1.3' expecting `;'.
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80);
>      encodes as group:1
>      uses group: id(1),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
192.168.1.2:80
),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
>      has prereqs ip
> -ct_lb(192.168.1.2, 192.168.1.3, );
> -    formats as ct_lb(192.168.1.2, 192.168.1.3);
> +ct_lb(backends=192.168.1.2- 192.168.1.3- );
> +    formats as ct_lb(backends=192.168.1.2-192.168.1.3);
>      encodes as group:2
>      uses group: id(2),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15]))
>      has prereqs ip
> -ct_lb(fd0f::2, fd0f::3, );
> -    formats as ct_lb(fd0f::2, fd0f::3);
> +ct_lb(backends=fd0f::2- fd0f::3- );
> +    formats as ct_lb(backends=fd0f::2-fd0f::3);
>      encodes as group:3
>      uses group: id(3),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
>      has prereqs ip
>
> -ct_lb(192.168.1.2:);
> +ct_lb(backends=192.168.1.2:);
>      Syntax error at `)' expecting port number.
> -ct_lb(192.168.1.2:123456);
> +ct_lb(backends=192.168.1.2:123456);
>      Syntax error at `123456' expecting port number.
> -ct_lb(foo);
> +ct_lb(backends=foo);
>      Syntax error at `foo' expecting IP address.
> -ct_lb([192.168.1.2]);
> +ct_lb(backends=[192.168.1.2]);
>      Syntax error at `192.168.1.2' expecting IPv6 address.
>
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
hash_fields=eth_src,eth_dst,ip_src);
> +    Syntax error at `eth_src' invalid hash_fields.
> +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
hash_fields="eth_src,eth_dst,ip_src");
> +    encodes as group:4
> +    uses group: id(4),
name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
192.168.1.2:80
),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> +    has prereqs ip
> +ct_lb(backends=fd0f::2-fd0f::3,
hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst");
> +    encodes as group:5
> +    uses group: id(5),
name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> +    has prereqs ip
> +
>  # ct_next
>  ct_next;
>      encodes as ct(table=19,zone=NXM_NX_REG13[0..15])
> @@ -1520,13 +1535,13 @@ handle_svc_check(reg0);
>  # select
>  reg9[16..31] = select(1=50, 2=100, 3, );
>      formats as reg9[16..31] = select(1=50, 2=100, 3=100);
> -    encodes as group:4
> -    uses group: id(4),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> +    encodes as group:6
> +    uses group: id(6),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
>
>  reg0 = select(1, 2);
>      formats as reg0 = select(1=100, 2=100);
> -    encodes as group:5
> -    uses group: id(5),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> +    encodes as group:7
> +    uses group: id(7),
name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
>
>  reg0 = select(1=, 2);
>      Syntax error at `,' expecting weight.
> @@ -1542,12 +1557,12 @@ reg0[0..14] = select(1, 2, 3);
>      cannot use 15-bit field reg0[0..14] for "select", which requires at
least 16 bits.
>
>  fwd_group(liveness="true", childports="eth0", "lsp1");
> -    encodes as group:6
> -    uses group: id(6),
name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> +    encodes as group:8
> +    uses group: id(8),
name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
>
>  fwd_group(childports="eth0", "lsp1");
> -    encodes as group:7
> -    uses group: id(7),
name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> +    encodes as group:9
> +    uses group: id(9),
name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
>
>  fwd_group(childports=eth0);
>      Syntax error at `eth0' expecting logical switch port.
> @@ -18000,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`])
>
>  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
,20.0.0.3:80);)
> +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt
>  AT_CHECK([cat lflows.txt], [0], [dnl
> -  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80
);)
> +  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80
-20.0.0.3:80);)
>  ])
>
>  # get the svc monitor mac.
> --
> 2.25.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique May 5, 2020, 6:02 p.m. UTC | #4
On Mon, May 4, 2020 at 9:22 AM Han Zhou <hzhou@ovn.org> wrote:

> On Thu, Apr 30, 2020 at 10:20 AM <numans@ovn.org> wrote:
> >
> > From: Numan Siddique <numans@ovn.org>
> >
> > This patch add a new column 'selection_fields' in Load_Balancer
> > table in NB DB. CMS can define a set of packet headers to use
> > while selecting a backend. If this column is set, OVN will add the
> > flow in group table with selection method as 'hash' with the set fields.
> > Otherwise it will use the default 'dp_hash' selection method.
> >
> > If a load balancer is configured with the selection_fields as
> > selection_fields    : [ip_dst, ip_src, tp_dst, tp_src]
> >
> > then with this patch, the modified ct_lb action will look like
> >  - ct_lb(backends=IP1:P1-IP2:P1,
> hash_fields="ip_dst,ip_src,tp_dst,tp_src");
> >
> The change is not backward compatible, so it is better warned in the NEWS
> for upgrading considerations.
>
> > And the OF flow will look like
> >  - group_id=2,type=select,selection_method=hash,
> >
>
>  fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(....
> >
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >  include/ovn/actions.h |  1 +
> >  lib/actions.c         | 45 +++++++++++++++++++++++++++++++++-----
> >  northd/ovn-northd.c   | 47 ++++++++++++++++++++++++++++++++-------
> >  ovn-nb.ovsschema      | 10 +++++++--
> >  ovn-nb.xml            |  7 ++++++
> >  tests/ovn-northd.at   | 24 ++++++++++----------
> >  tests/ovn.at          | 51 ++++++++++++++++++++++++++++---------------
> >  7 files changed, 139 insertions(+), 46 deletions(-)
> >
> > diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > index 42d45a74c..4a54abe17 100644
> > --- a/include/ovn/actions.h
> > +++ b/include/ovn/actions.h
> > @@ -259,6 +259,7 @@ struct ovnact_ct_lb {
> >      struct ovnact_ct_lb_dst *dsts;
> >      size_t n_dsts;
> >      uint8_t ltable;             /* Logical table ID of next table. */
> > +    char *hash_fields;
> >  };
> >
> >  struct ovnact_select_dst {
> > diff --git a/lib/actions.c b/lib/actions.c
> > index 021a376b4..6d840a73f 100644
> > --- a/lib/actions.c
> > +++ b/lib/actions.c
> > @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx)
> >      struct ovnact_ct_lb_dst *dsts = NULL;
> >      size_t allocated_dsts = 0;
> >      size_t n_dsts = 0;
> > +    char *hash_fields = NULL;
> >
> > -    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> > -        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> > +    if (lexer_match(ctx->lexer, LEX_T_LPAREN) &&
> > +        !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> > +        if (!lexer_match_id(ctx->lexer, "backends") ||
> > +            !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> > +            lexer_syntax_error(ctx->lexer, "expecting backends");
> > +            return;
> > +        }
> > +
> > +        while (!lexer_match(ctx->lexer, LEX_T_COMMA) &&
> > +               !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> >              struct ovnact_ct_lb_dst dst;
> >              if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
> >                  /* IPv6 address and port */
> > @@ -1012,7 +1021,7 @@ parse_ct_lb_action(struct action_context *ctx)
> >                      }
> >                  }
> >              }
> > -            lexer_match(ctx->lexer, LEX_T_COMMA);
> > +            lexer_match(ctx->lexer, LEX_T_HYPHEN);
> >
> >              /* Append to dsts. */
> >              if (n_dsts >= allocated_dsts) {
> > @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx)
> >              }
> >              dsts[n_dsts++] = dst;
> >          }
> > +
> > +        if (lexer_match_id(ctx->lexer, "hash_fields")) {
> > +            if (!lexer_match(ctx->lexer, LEX_T_EQUALS) ||
> > +                ctx->lexer->token.type != LEX_T_STRING ||
> > +                lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) {
> > +                lexer_syntax_error(ctx->lexer, "invalid hash_fields");
> > +                free(dsts);
> > +                return;
> > +            }
> > +
> > +            hash_fields = xstrdup(ctx->lexer->token.s);
> > +            lexer_get(ctx->lexer);
> > +            lexer_get(ctx->lexer);
> > +        }
> >      }
> >
> >      struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
> >      cl->ltable = ctx->pp->cur_ltable + 1;
> >      cl->dsts = dsts;
> >      cl->n_dsts = n_dsts;
> > +    cl->hash_fields = hash_fields;
> >  }
> >
> >  static void
> > @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl,
> struct ds *s)
> >  {
> >      ds_put_cstr(s, "ct_lb");
> >      if (cl->n_dsts) {
> > -        ds_put_char(s, '(');
> > +        ds_put_cstr(s, "(backends=");
> >          for (size_t i = 0; i < cl->n_dsts; i++) {
> >              if (i) {
> > -                ds_put_cstr(s, ", ");
> > +                ds_put_char(s, '-');
> >              }
> >
> >              const struct ovnact_ct_lb_dst *dst = &cl->dsts[i];
> > @@ -1057,6 +1081,11 @@ format_CT_LB(const struct ovnact_ct_lb *cl, struct
> ds *s)
> >          }
> >          ds_put_char(s, ')');
> >      }
> > +
> > +    if (cl->hash_fields) {
> > +        ds_chomp(s, ')');
> > +        ds_put_format(s, ", hash_fields=\"%s\")", cl->hash_fields);
> > +    }
> >      ds_put_char(s, ';');
> >  }
> >
> > @@ -1103,7 +1132,10 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
> >                              : MFF_LOG_DNAT_ZONE - MFF_REG0;
> >
> >      struct ds ds = DS_EMPTY_INITIALIZER;
> > -    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> > +    ds_put_format(&ds, "type=select,selection_method=%s",
> cl->hash_fields ? "hash": "dp_hash");
> > +    if (cl->hash_fields) {
> > +        ds_put_format(&ds, ",fields(%s)", cl->hash_fields);
> > +    }
> >
> >      BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> >      BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> > @@ -1145,6 +1177,7 @@ static void
> >  ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
> >  {
> >      free(ct_lb->dsts);
> > +    free(ct_lb->hash_fields);
> >  }
> >
> >  static void
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index e31794c4b..e18dd03df 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -3101,7 +3101,7 @@ struct ovn_lb {
> >      struct hmap_node hmap_node;
> >
> >      const struct nbrec_load_balancer *nlb; /* May be NULL. */
> > -
> > +    char *selection_fields;
> >      struct lb_vip *vips;
> >      size_t n_vips;
> >  };
> > @@ -3214,6 +3214,18 @@ ovn_lb_create(struct northd_context *ctx, struct
> hmap *lbs,
> >          lb->vips[n_vips].addr_family = addr_family;
> >          lb->vips[n_vips].backend_ips = xstrdup(node->value);
> >
> > +        char *ps1 = lb->vips[n_vips].backend_ips;
> > +        char *ps2 = ps1;
> > +        /* Replace all occurances of ',' with '-'. */
> > +        while (ps2) {
> > +            ps2 = strstr(ps1, ",");
> > +            if (ps2) {
> > +                *ps2 = '-';
> > +                ps2++;
> > +                ps1 = ps2;
> > +            }
> > +        }
> > +
> >          struct nbrec_load_balancer_health_check *lb_health_check = NULL;
> >          if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp")) {
> >              if (nbrec_lb->n_health_check > 0) {
> > @@ -3329,6 +3341,16 @@ ovn_lb_create(struct northd_context *ctx, struct
> hmap *lbs,
> >          n_vips++;
> >      }
> >
> > +    if (lb->nlb->n_selection_fields) {
> > +        struct ds sel_fields = DS_EMPTY_INITIALIZER;
> > +        for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) {
> > +            ds_put_format(&sel_fields, "%s,",
> lb->nlb->selection_fields[i]);
> > +        }
> > +        ds_chomp(&sel_fields, ',');
> > +        lb->selection_fields = ds_steal_cstr(&sel_fields);
> > +        ds_destroy(&sel_fields);
> > +    }
> > +
> >      return lb;
> >  }
> >
> > @@ -3347,13 +3369,15 @@ ovn_lb_destroy(struct ovn_lb *lb)
> >          free(lb->vips[i].backends);
> >      }
> >      free(lb->vips);
> > +    free(lb->selection_fields);
> >  }
> >
> >  static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
> > -                                       struct ds *action)
> > +                                       struct ds *action,
> > +                                       char *selection_fields)
> >  {
> >      if (lb_vip->health_check) {
> > -        ds_put_cstr(action, "ct_lb(");
> > +        ds_put_cstr(action, "ct_lb(backends=");
> >
> >          size_t n_active_backends = 0;
> >          for (size_t k = 0; k < lb_vip->n_backends; k++) {
> > @@ -3365,7 +3389,7 @@ static void build_lb_vip_ct_lb_actions(struct
> lb_vip *lb_vip,
> >              }
> >
> >              n_active_backends++;
> > -            ds_put_format(action, "%s:%"PRIu16",",
> > +            ds_put_format(action, "%s:%"PRIu16"-",
> >              backend->ip, backend->port);
> >          }
> >
> > @@ -3373,11 +3397,17 @@ static void build_lb_vip_ct_lb_actions(struct
> lb_vip *lb_vip,
> >              ds_clear(action);
> >              ds_put_cstr(action, "drop;");
> >          } else {
> > -            ds_chomp(action, ',');
> > +            ds_chomp(action, '-');
> >              ds_put_cstr(action, ");");
> >          }
> >      } else {
> > -        ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips);
> > +        ds_put_format(action, "ct_lb(backends=%s);",
> lb_vip->backend_ips);
> > +    }
> > +
> > +    if (selection_fields && selection_fields[0]) {
> > +        ds_chomp(action, ';');
> > +        ds_chomp(action, ')');
> > +        ds_put_format(action, ", hash_fields=\"%s\");",
> selection_fields);
> >      }
> >  }
> >
> > @@ -5650,7 +5680,7 @@ build_lb_rules(struct ovn_datapath *od, struct hmap
> *lflows, struct ovn_lb *lb)
> >
> >          /* New connections in Ingress table. */
> >          struct ds action = DS_EMPTY_INITIALIZER;
> > -        build_lb_vip_ct_lb_actions(lb_vip, &action);
> > +        build_lb_vip_ct_lb_actions(lb_vip, &action,
> lb->selection_fields);
> >
> >          struct ds match = DS_EMPTY_INITIALIZER;
> >          ds_put_format(&match, "ct.new && %s.dst == %s", ip_match,
> lb_vip->vip);
> > @@ -9301,7 +9331,8 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
> >              for (size_t j = 0; j < lb->n_vips; j++) {
> >                  struct lb_vip *lb_vip = &lb->vips[j];
> >                  ds_clear(&actions);
> > -                build_lb_vip_ct_lb_actions(lb_vip, &actions);
> > +                build_lb_vip_ct_lb_actions(lb_vip, &actions,
> > +                                           lb->selection_fields);
> >
> >                  if (!sset_contains(&all_ips, lb_vip->vip)) {
> >                      sset_add(&all_ips, lb_vip->vip);
> > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > index b2a046571..a06972aa0 100644
> > --- a/ovn-nb.ovsschema
> > +++ b/ovn-nb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Northbound",
> > -    "version": "5.22.0",
> > -    "cksum": "384164739 25476",
> > +    "version": "5.23.0",
> > +    "cksum": "111023208 25806",
> >      "tables": {
> >          "NB_Global": {
> >              "columns": {
> > @@ -179,6 +179,12 @@
> >                  "ip_port_mappings": {
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}},
> > +                "selection_fields": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set",
> > +                                ["eth_src", "eth_dst", "ip_src",
> "ip_dst",
> > +                                 "tp_src", "tp_dst"]]},
> > +                             "min": 0, "max": "unlimited"}},
> >                  "external_ids": {
> >                      "type": {"key": "string", "value": "string",
> >                               "min": 0, "max": "unlimited"}}},
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index af15c550a..243a25bce 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -1513,6 +1513,13 @@
> >        </p>
> >      </column>
> >
> > +    <column name="selection_fields">
> > +      Set of packet header fields to use for selecting a backend to
> > +      load balance the VIP. This can be used if CMS desires that the
> > +      backend selection is consistent and deterministic for a given
> > +      set of packet header fields.
> > +    </column>
> > +
>
> This description may or may not be true, depending on dp_hash
> implementation of different OVS versions and it can change in the future,
> so I think it is better to document more details about what OVS group
> selection method is used (dp_hash v.s. hash), and direct user to read the
> OVS document to understand the implications more accurately. Performance
> implications should also be mentioned here, but it is also better to let
> user check OVS documentation for details.
>

Thanks. I'll update the documentation in v2.

Numan


>
> >      <group title="Common Columns">
> >        <column name="external_ids">
> >          See <em>External IDs</em> at the beginning of this document.
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 8cc3f7002..c665db89b 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  # Delete the Load_Balancer_Health_Check
> > @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list
> service_monitor |  wc -l`])
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  # Create the Load_Balancer_Health_Check again.
> > @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`])
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  # Get the uuid of both the service_monitor
> > @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> >  ])
> >
> >  # Set the service monitor for sw0-p1 to offline
> > @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  # Set the service monitor for sw1-p1 to error
> > @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([
> >  ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80"
> \
> >  | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> >  ])
> >
> >  # Add one more vip to lb1
> > @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`])
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000);)
> >  ])
> >
> >  # Set the service monitor for sw1-p1 to online
> > @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
> >  ])
> >
> >  # Associate lb1 to sw1
> >  ovn-nbctl --wait=sb ls-lb-add sw1 lb1
> >  ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
> >  ])
> >
> >  # Now create lb2 same as lb1 but udp protocol.
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 7befc8224..729bc1853 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -970,29 +970,44 @@ ct_lb();
> >      encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat)
> >      has prereqs ip
> >  ct_lb(192.168.1.2:80, 192.168.1.3:80);
> > +    Syntax error at `192.168.1.2' expecting backends.
> > +ct_lb(backends=192.168.1.2:80, 192.168.1.3:80);
> > +    Syntax error at `192.168.1.3' expecting `;'.
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80);
> >      encodes as group:1
> >      uses group: id(1),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
> 192.168.1.2:80
>
> ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
> 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >      has prereqs ip
> > -ct_lb(192.168.1.2, 192.168.1.3, );
> > -    formats as ct_lb(192.168.1.2, 192.168.1.3);
> > +ct_lb(backends=192.168.1.2- 192.168.1.3- );
> > +    formats as ct_lb(backends=192.168.1.2-192.168.1.3);
> >      encodes as group:2
> >      uses group: id(2),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >      has prereqs ip
> > -ct_lb(fd0f::2, fd0f::3, );
> > -    formats as ct_lb(fd0f::2, fd0f::3);
> > +ct_lb(backends=fd0f::2- fd0f::3- );
> > +    formats as ct_lb(backends=fd0f::2-fd0f::3);
> >      encodes as group:3
> >      uses group: id(3),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >      has prereqs ip
> >
> > -ct_lb(192.168.1.2:);
> > +ct_lb(backends=192.168.1.2:);
> >      Syntax error at `)' expecting port number.
> > -ct_lb(192.168.1.2:123456);
> > +ct_lb(backends=192.168.1.2:123456);
> >      Syntax error at `123456' expecting port number.
> > -ct_lb(foo);
> > +ct_lb(backends=foo);
> >      Syntax error at `foo' expecting IP address.
> > -ct_lb([192.168.1.2]);
> > +ct_lb(backends=[192.168.1.2]);
> >      Syntax error at `192.168.1.2' expecting IPv6 address.
> >
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
> hash_fields=eth_src,eth_dst,ip_src);
> > +    Syntax error at `eth_src' invalid hash_fields.
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
> hash_fields="eth_src,eth_dst,ip_src");
> > +    encodes as group:4
> > +    uses group: id(4),
>
> name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
> 192.168.1.2:80
>
> ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
> 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> > +    has prereqs ip
> > +ct_lb(backends=fd0f::2-fd0f::3,
> hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst");
> > +    encodes as group:5
> > +    uses group: id(5),
>
> name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> > +    has prereqs ip
> > +
> >  # ct_next
> >  ct_next;
> >      encodes as ct(table=19,zone=NXM_NX_REG13[0..15])
> > @@ -1520,13 +1535,13 @@ handle_svc_check(reg0);
> >  # select
> >  reg9[16..31] = select(1=50, 2=100, 3, );
> >      formats as reg9[16..31] = select(1=50, 2=100, 3=100);
> > -    encodes as group:4
> > -    uses group: id(4),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> > +    encodes as group:6
> > +    uses group: id(6),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> >
> >  reg0 = select(1, 2);
> >      formats as reg0 = select(1=100, 2=100);
> > -    encodes as group:5
> > -    uses group: id(5),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> > +    encodes as group:7
> > +    uses group: id(7),
>
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> >
> >  reg0 = select(1=, 2);
> >      Syntax error at `,' expecting weight.
> > @@ -1542,12 +1557,12 @@ reg0[0..14] = select(1, 2, 3);
> >      cannot use 15-bit field reg0[0..14] for "select", which requires at
> least 16 bits.
> >
> >  fwd_group(liveness="true", childports="eth0", "lsp1");
> > -    encodes as group:6
> > -    uses group: id(6),
>
> name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> > +    encodes as group:8
> > +    uses group: id(8),
>
> name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> >
> >  fwd_group(childports="eth0", "lsp1");
> > -    encodes as group:7
> > -    uses group: id(7),
>
> name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> > +    encodes as group:9
> > +    uses group: id(9),
>
> name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> >
> >  fwd_group(childports=eth0);
> >      Syntax error at `eth0' expecting logical switch port.
> > @@ -18000,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`])
> >
> >  ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt
> >  AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
> ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
> is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,
> 20.0.0.3:80
> );)
> > +  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
> ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
> is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >  ])
> >
> >  # get the svc monitor mac.
> > --
> > 2.25.4
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Numan Siddique May 5, 2020, 6:05 p.m. UTC | #5
On Sat, May 2, 2020 at 1:48 AM Mark Michelson <mmichels@redhat.com> wrote:

> I think you should follow this up with some tests.
>
> On 4/30/20 1:20 PM, numans@ovn.org wrote:
> > From: Numan Siddique <numans@ovn.org>
> >
> > This patch add a new column 'selection_fields' in Load_Balancer
> > table in NB DB. CMS can define a set of packet headers to use
> > while selecting a backend. If this column is set, OVN will add the
> > flow in group table with selection method as 'hash' with the set fields.
> > Otherwise it will use the default 'dp_hash' selection method.
> >
> > If a load balancer is configured with the selection_fields as
> > selection_fields    : [ip_dst, ip_src, tp_dst, tp_src]
> >
> > then with this patch, the modified ct_lb action will look like
> >   - ct_lb(backends=IP1:P1-IP2:P1,
> hash_fields="ip_dst,ip_src,tp_dst,tp_src");
>
> Hm, I'm trying not to bikeshed here, but the syntax of using '-' as the
> separator for IP:port is unfortunate. It makes it seem like it
> represents a range of backends instead of distinct backends. Could it
> maybe be changed to:
>
> ct_lb(backends=IP1:P1,IP2:P2; hash_fields="ip_dst,ip_src,tp_dst,tp_src");
>

To be frank, the option of using "-" looks very ugly. I agree with you.
I thought of using ";", but generally semicolon
is used in the inner actions or actions which can be translated to OF flow
actions
straightway.

But I think in this case, using a semicolon is better. I'll address this in
the next version.



>
> ?
>
> See below for a couple other minor critiques.
>
> >
> > And the OF flow will look like
> >   - group_id=2,type=select,selection_method=hash,
> >
>  fields(ip_src,ip_dst,tcp_src,tcp_dst),bucket=bucket_id:0,weight:100,actions=ct(....
> >
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >   include/ovn/actions.h |  1 +
> >   lib/actions.c         | 45 +++++++++++++++++++++++++++++++++-----
> >   northd/ovn-northd.c   | 47 ++++++++++++++++++++++++++++++++-------
> >   ovn-nb.ovsschema      | 10 +++++++--
> >   ovn-nb.xml            |  7 ++++++
> >   tests/ovn-northd.at   | 24 ++++++++++----------
> >   tests/ovn.at          | 51 ++++++++++++++++++++++++++++---------------
> >   7 files changed, 139 insertions(+), 46 deletions(-)
> >
> > diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > index 42d45a74c..4a54abe17 100644
> > --- a/include/ovn/actions.h
> > +++ b/include/ovn/actions.h
> > @@ -259,6 +259,7 @@ struct ovnact_ct_lb {
> >       struct ovnact_ct_lb_dst *dsts;
> >       size_t n_dsts;
> >       uint8_t ltable;             /* Logical table ID of next table. */
> > +    char *hash_fields;
> >   };
> >
> >   struct ovnact_select_dst {
> > diff --git a/lib/actions.c b/lib/actions.c
> > index 021a376b4..6d840a73f 100644
> > --- a/lib/actions.c
> > +++ b/lib/actions.c
> > @@ -951,9 +951,18 @@ parse_ct_lb_action(struct action_context *ctx)
> >       struct ovnact_ct_lb_dst *dsts = NULL;
> >       size_t allocated_dsts = 0;
> >       size_t n_dsts = 0;
> > +    char *hash_fields = NULL;
> >
> > -    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> > -        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> > +    if (lexer_match(ctx->lexer, LEX_T_LPAREN) &&
> > +        !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> > +        if (!lexer_match_id(ctx->lexer, "backends") ||
> > +            !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
> > +            lexer_syntax_error(ctx->lexer, "expecting backends");
> > +            return;
> > +        }
> > +
> > +        while (!lexer_match(ctx->lexer, LEX_T_COMMA) &&
> > +               !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> >               struct ovnact_ct_lb_dst dst;
> >               if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
> >                   /* IPv6 address and port */
> > @@ -1012,7 +1021,7 @@ parse_ct_lb_action(struct action_context *ctx)
> >                       }
> >                   }
> >               }
> > -            lexer_match(ctx->lexer, LEX_T_COMMA);
> > +            lexer_match(ctx->lexer, LEX_T_HYPHEN);
> >
> >               /* Append to dsts. */
> >               if (n_dsts >= allocated_dsts) {
> > @@ -1020,12 +1029,27 @@ parse_ct_lb_action(struct action_context *ctx)
> >               }
> >               dsts[n_dsts++] = dst;
> >           }
> > +
> > +        if (lexer_match_id(ctx->lexer, "hash_fields")) {
> > +            if (!lexer_match(ctx->lexer, LEX_T_EQUALS) ||
> > +                ctx->lexer->token.type != LEX_T_STRING ||
> > +                lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) {
> > +                lexer_syntax_error(ctx->lexer, "invalid hash_fields");
> > +                free(dsts);
> > +                return;
> > +            }
> > +
> > +            hash_fields = xstrdup(ctx->lexer->token.s);
> > +            lexer_get(ctx->lexer);
> > +            lexer_get(ctx->lexer);
> > +        }
> >       }
> >
> >       struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
> >       cl->ltable = ctx->pp->cur_ltable + 1;
> >       cl->dsts = dsts;
> >       cl->n_dsts = n_dsts;
> > +    cl->hash_fields = hash_fields;
> >   }
> >

>   static void
> > @@ -1033,10 +1057,10 @@ format_CT_LB(const struct ovnact_ct_lb *cl,
> struct ds *s)
> >   {
> >       ds_put_cstr(s, "ct_lb");
> >       if (cl->n_dsts) {
> > -        ds_put_char(s, '(');
> > +        ds_put_cstr(s, "(backends=");
> >           for (size_t i = 0; i < cl->n_dsts; i++) {
> >               if (i) {
> > -                ds_put_cstr(s, ", ");
> > +                ds_put_char(s, '-');
> >               }
> >
> >               const struct ovnact_ct_lb_dst *dst = &cl->dsts[i];
> > @@ -1057,6 +1081,11 @@ format_CT_LB(const struct ovnact_ct_lb *cl,
> struct ds *s)
> >           }
> >           ds_put_char(s, ')');
> >       }
> > +
> > +    if (cl->hash_fields) {
> > +        ds_chomp(s, ')');
> > +        ds_put_format(s, ", hash_fields=\"%s\")", cl->hash_fields);
> > +    }
> >       ds_put_char(s, ';');
> >   }
> >
> > @@ -1103,7 +1132,10 @@ encode_CT_LB(const struct ovnact_ct_lb *cl,
> >                               : MFF_LOG_DNAT_ZONE - MFF_REG0;
> >
> >       struct ds ds = DS_EMPTY_INITIALIZER;
> > -    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> > +    ds_put_format(&ds, "type=select,selection_method=%s",
> cl->hash_fields ? "hash": "dp_hash");
> > +    if (cl->hash_fields) {
> > +        ds_put_format(&ds, ",fields(%s)", cl->hash_fields);
> > +    }
> >
> >       BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> >       BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> > @@ -1145,6 +1177,7 @@ static void
> >   ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
> >   {
> >       free(ct_lb->dsts);
> > +    free(ct_lb->hash_fields);
> >   }
> >
> >   static void
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index e31794c4b..e18dd03df 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -3101,7 +3101,7 @@ struct ovn_lb {
> >       struct hmap_node hmap_node;
> >
> >       const struct nbrec_load_balancer *nlb; /* May be NULL. */
> > -
> > +    char *selection_fields;
> >       struct lb_vip *vips;
> >       size_t n_vips;
> >   };
> > @@ -3214,6 +3214,18 @@ ovn_lb_create(struct northd_context *ctx, struct
> hmap *lbs,
> >           lb->vips[n_vips].addr_family = addr_family;
> >           lb->vips[n_vips].backend_ips = xstrdup(node->value);
> >
> > +        char *ps1 = lb->vips[n_vips].backend_ips;
> > +        char *ps2 = ps1;
> > +        /* Replace all occurances of ',' with '-'. */
> > +        while (ps2) {
> > +            ps2 = strstr(ps1, ",");
>
> A minor improvement here would be to use strchr() instead of strstr().
>
> Optionally, if you don't mind doing an assignment in a while statement,
> you could write this as:
>
> while ((ps2 = strchr(ps1, ','))) {
>      *ps2 = '-';
>      ps2++;
>      ps1 = ps2;
> }
>

This code will go away now in v2.


>
> > +            if (ps2) {
> > +                *ps2 = '-';
> > +                ps2++;
> > +                ps1 = ps2;
> > +            }
> > +        }
> > +
> >           struct nbrec_load_balancer_health_check *lb_health_check =
> NULL;
> >           if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp"))
> {
> >               if (nbrec_lb->n_health_check > 0) {
> > @@ -3329,6 +3341,16 @@ ovn_lb_create(struct northd_context *ctx, struct
> hmap *lbs,
> >           n_vips++;
> >       }
> >
> > +    if (lb->nlb->n_selection_fields) {
> > +        struct ds sel_fields = DS_EMPTY_INITIALIZER;
> > +        for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) {
> > +            ds_put_format(&sel_fields, "%s,",
> lb->nlb->selection_fields[i]);
> > +        }
> > +        ds_chomp(&sel_fields, ',');
> > +        lb->selection_fields = ds_steal_cstr(&sel_fields);
> > +        ds_destroy(&sel_fields);
>
> ds_destroy() is unnecessary since you call ds_steal_cstr()
>

Ack.

Thanks
Numan


>
> > +    }
> > +
> >       return lb;
> >   }
> >
> > @@ -3347,13 +3369,15 @@ ovn_lb_destroy(struct ovn_lb *lb)
> >           free(lb->vips[i].backends);
> >       }
> >       free(lb->vips);
> > +    free(lb->selection_fields);
> >   }
> >
> >   static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
> > -                                       struct ds *action)
> > +                                       struct ds *action,
> > +                                       char *selection_fields)
> >   {
> >       if (lb_vip->health_check) {
> > -        ds_put_cstr(action, "ct_lb(");
> > +        ds_put_cstr(action, "ct_lb(backends=");
> >
> >           size_t n_active_backends = 0;
> >           for (size_t k = 0; k < lb_vip->n_backends; k++) {
> > @@ -3365,7 +3389,7 @@ static void build_lb_vip_ct_lb_actions(struct
> lb_vip *lb_vip,
> >               }
> >
> >               n_active_backends++;
> > -            ds_put_format(action, "%s:%"PRIu16",",
> > +            ds_put_format(action, "%s:%"PRIu16"-",
> >               backend->ip, backend->port);
> >           }
> >
> > @@ -3373,11 +3397,17 @@ static void build_lb_vip_ct_lb_actions(struct
> lb_vip *lb_vip,
> >               ds_clear(action);
> >               ds_put_cstr(action, "drop;");
> >           } else {
> > -            ds_chomp(action, ',');
> > +            ds_chomp(action, '-');
> >               ds_put_cstr(action, ");");
> >           }
> >       } else {
> > -        ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips);
> > +        ds_put_format(action, "ct_lb(backends=%s);",
> lb_vip->backend_ips);
> > +    }
> > +
> > +    if (selection_fields && selection_fields[0]) {
> > +        ds_chomp(action, ';');
> > +        ds_chomp(action, ')');
> > +        ds_put_format(action, ", hash_fields=\"%s\");",
> selection_fields);
> >       }
> >   }
> >
> > @@ -5650,7 +5680,7 @@ build_lb_rules(struct ovn_datapath *od, struct
> hmap *lflows, struct ovn_lb *lb)
> >
> >           /* New connections in Ingress table. */
> >           struct ds action = DS_EMPTY_INITIALIZER;
> > -        build_lb_vip_ct_lb_actions(lb_vip, &action);
> > +        build_lb_vip_ct_lb_actions(lb_vip, &action,
> lb->selection_fields);
> >
> >           struct ds match = DS_EMPTY_INITIALIZER;
> >           ds_put_format(&match, "ct.new && %s.dst == %s", ip_match,
> lb_vip->vip);
> > @@ -9301,7 +9331,8 @@ build_lrouter_flows(struct hmap *datapaths, struct
> hmap *ports,
> >               for (size_t j = 0; j < lb->n_vips; j++) {
> >                   struct lb_vip *lb_vip = &lb->vips[j];
> >                   ds_clear(&actions);
> > -                build_lb_vip_ct_lb_actions(lb_vip, &actions);
> > +                build_lb_vip_ct_lb_actions(lb_vip, &actions,
> > +                                           lb->selection_fields);
> >
> >                   if (!sset_contains(&all_ips, lb_vip->vip)) {
> >                       sset_add(&all_ips, lb_vip->vip);
> > diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
> > index b2a046571..a06972aa0 100644
> > --- a/ovn-nb.ovsschema
> > +++ b/ovn-nb.ovsschema
> > @@ -1,7 +1,7 @@
> >   {
> >       "name": "OVN_Northbound",
> > -    "version": "5.22.0",
> > -    "cksum": "384164739 25476",
> > +    "version": "5.23.0",
> > +    "cksum": "111023208 25806",
> >       "tables": {
> >           "NB_Global": {
> >               "columns": {
> > @@ -179,6 +179,12 @@
> >                   "ip_port_mappings": {
> >                       "type": {"key": "string", "value": "string",
> >                                "min": 0, "max": "unlimited"}},
> > +                "selection_fields": {
> > +                    "type": {"key": {"type": "string",
> > +                             "enum": ["set",
> > +                                ["eth_src", "eth_dst", "ip_src",
> "ip_dst",
> > +                                 "tp_src", "tp_dst"]]},
> > +                             "min": 0, "max": "unlimited"}},
> >                   "external_ids": {
> >                       "type": {"key": "string", "value": "string",
> >                                "min": 0, "max": "unlimited"}}},
> > diff --git a/ovn-nb.xml b/ovn-nb.xml
> > index af15c550a..243a25bce 100644
> > --- a/ovn-nb.xml
> > +++ b/ovn-nb.xml
> > @@ -1513,6 +1513,13 @@
> >         </p>
> >       </column>
> >
> > +    <column name="selection_fields">
> > +      Set of packet header fields to use for selecting a backend to
> > +      load balance the VIP. This can be used if CMS desires that the
> > +      backend selection is consistent and deterministic for a given
> > +      set of packet header fields.
> > +    </column>
> > +
> >       <group title="Common Columns">
> >         <column name="external_ids">
> >           See <em>External IDs</em> at the beginning of this document.
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 8cc3f7002..c665db89b 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -1183,7 +1183,7 @@ ovn-nbctl --wait=sb ls-lb-add sw0 lb1
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   # Delete the Load_Balancer_Health_Check
> > @@ -1192,7 +1192,7 @@ OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list
> service_monitor |  wc -l`])
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   # Create the Load_Balancer_Health_Check again.
> > @@ -1205,7 +1205,7 @@ service_monitor | sed '/^$/d' | wc -l`])
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   # Get the uuid of both the service_monitor
> > @@ -1221,7 +1221,7 @@ OVS_WAIT_UNTIL([
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> >   ])
> >
> >   # Set the service monitor for sw0-p1 to offline
> > @@ -1251,7 +1251,7 @@ OVS_WAIT_UNTIL([
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   # Set the service monitor for sw1-p1 to error
> > @@ -1263,7 +1263,7 @@ OVS_WAIT_UNTIL([
> >   ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst ==
> 80" \
> >   | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> >   ])
> >
> >   # Add one more vip to lb1
> > @@ -1293,8 +1293,8 @@ service_monitor port=1000 | sed '/^$/d' | wc -l`])
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> );)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000);)
> >   ])
> >
> >   # Set the service monitor for sw1-p1 to online
> > @@ -1306,16 +1306,16 @@ OVS_WAIT_UNTIL([
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
> >   ])
> >
> >   # Associate lb1 to sw1
> >   ovn-nbctl --wait=sb ls-lb-add sw1 lb1
> >   ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.40 && tcp.dst == 1000),
> action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
> >   ])
> >
> >   # Now create lb2 same as lb1 but udp protocol.
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 7befc8224..729bc1853 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -970,29 +970,44 @@ ct_lb();
> >       encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat)
> >       has prereqs ip
> >   ct_lb(192.168.1.2:80, 192.168.1.3:80);
> > +    Syntax error at `192.168.1.2' expecting backends.
> > +ct_lb(backends=192.168.1.2:80, 192.168.1.3:80);
> > +    Syntax error at `192.168.1.3' expecting `;'.
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80);
> >       encodes as group:1
> >       uses group: id(1),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
> 192.168.1.2:80
> ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
> 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >       has prereqs ip
> > -ct_lb(192.168.1.2, 192.168.1.3, );
> > -    formats as ct_lb(192.168.1.2, 192.168.1.3);
> > +ct_lb(backends=192.168.1.2- 192.168.1.3- );
> > +    formats as ct_lb(backends=192.168.1.2-192.168.1.3);
> >       encodes as group:2
> >       uses group: id(2),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >       has prereqs ip
> > -ct_lb(fd0f::2, fd0f::3, );
> > -    formats as ct_lb(fd0f::2, fd0f::3);
> > +ct_lb(backends=fd0f::2- fd0f::3- );
> > +    formats as ct_lb(backends=fd0f::2-fd0f::3);
> >       encodes as group:3
> >       uses group: id(3),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> >       has prereqs ip
> >
> > -ct_lb(192.168.1.2:);
> > +ct_lb(backends=192.168.1.2:);
> >       Syntax error at `)' expecting port number.
> > -ct_lb(192.168.1.2:123456);
> > +ct_lb(backends=192.168.1.2:123456);
> >       Syntax error at `123456' expecting port number.
> > -ct_lb(foo);
> > +ct_lb(backends=foo);
> >       Syntax error at `foo' expecting IP address.
> > -ct_lb([192.168.1.2]);
> > +ct_lb(backends=[192.168.1.2]);
> >       Syntax error at `192.168.1.2' expecting IPv6 address.
> >
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
> hash_fields=eth_src,eth_dst,ip_src);
> > +    Syntax error at `eth_src' invalid hash_fields.
> > +ct_lb(backends=192.168.1.2:80-192.168.1.3:80,
> hash_fields="eth_src,eth_dst,ip_src");
> > +    encodes as group:4
> > +    uses group: id(4),
> name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=
> 192.168.1.2:80
> ),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=
> 192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
> > +    has prereqs ip
> > +ct_lb(backends=fd0f::2-fd0f::3,
> hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst");
> > +    encodes as group:5
> > +    uses group: id(5),
> name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
> > +    has prereqs ip
> > +
> >   # ct_next
> >   ct_next;
> >       encodes as ct(table=19,zone=NXM_NX_REG13[0..15])
> > @@ -1520,13 +1535,13 @@ handle_svc_check(reg0);
> >   # select
> >   reg9[16..31] = select(1=50, 2=100, 3, );
> >       formats as reg9[16..31] = select(1=50, 2=100, 3=100);
> > -    encodes as group:4
> > -    uses group: id(4),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> > +    encodes as group:6
> > +    uses group: id(6),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
> >
> >   reg0 = select(1, 2);
> >       formats as reg0 = select(1=100, 2=100);
> > -    encodes as group:5
> > -    uses group: id(5),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> > +    encodes as group:7
> > +    uses group: id(7),
> name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
> >
> >   reg0 = select(1=, 2);
> >       Syntax error at `,' expecting weight.
> > @@ -1542,12 +1557,12 @@ reg0[0..14] = select(1, 2, 3);
> >       cannot use 15-bit field reg0[0..14] for "select", which requires
> at least 16 bits.
> >
> >   fwd_group(liveness="true", childports="eth0", "lsp1");
> > -    encodes as group:6
> > -    uses group: id(6),
> name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> > +    encodes as group:8
> > +    uses group: id(8),
> name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> >
> >   fwd_group(childports="eth0", "lsp1");
> > -    encodes as group:7
> > -    uses group: id(7),
> name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> > +    encodes as group:9
> > +    uses group: id(9),
> name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
> >
> >   fwd_group(childports=eth0);
> >       Syntax error at `eth0' expecting logical switch port.
> > @@ -18000,12 +18015,12 @@ service_monitor | sed '/^$/d' | wc -l`])
> >
> >   ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=10(ls_in_stateful     ), priority=120  , match=(ct.new &&
> ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt
> >   AT_CHECK([cat lflows.txt], [0], [dnl
> > -  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
> ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
> is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80
> ,20.0.0.3:80);)
> > +  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip &&
> ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 &&
> is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80
> -20.0.0.3:80);)
> >   ])
> >
> >   # get the svc monitor mac.
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
diff mbox series

Patch

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 42d45a74c..4a54abe17 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -259,6 +259,7 @@  struct ovnact_ct_lb {
     struct ovnact_ct_lb_dst *dsts;
     size_t n_dsts;
     uint8_t ltable;             /* Logical table ID of next table. */
+    char *hash_fields;
 };
 
 struct ovnact_select_dst {
diff --git a/lib/actions.c b/lib/actions.c
index 021a376b4..6d840a73f 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -951,9 +951,18 @@  parse_ct_lb_action(struct action_context *ctx)
     struct ovnact_ct_lb_dst *dsts = NULL;
     size_t allocated_dsts = 0;
     size_t n_dsts = 0;
+    char *hash_fields = NULL;
 
-    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
-        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN) &&
+        !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+        if (!lexer_match_id(ctx->lexer, "backends") ||
+            !lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+            lexer_syntax_error(ctx->lexer, "expecting backends");
+            return;
+        }
+
+        while (!lexer_match(ctx->lexer, LEX_T_COMMA) &&
+               !lexer_match(ctx->lexer, LEX_T_RPAREN)) {
             struct ovnact_ct_lb_dst dst;
             if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
                 /* IPv6 address and port */
@@ -1012,7 +1021,7 @@  parse_ct_lb_action(struct action_context *ctx)
                     }
                 }
             }
-            lexer_match(ctx->lexer, LEX_T_COMMA);
+            lexer_match(ctx->lexer, LEX_T_HYPHEN);
 
             /* Append to dsts. */
             if (n_dsts >= allocated_dsts) {
@@ -1020,12 +1029,27 @@  parse_ct_lb_action(struct action_context *ctx)
             }
             dsts[n_dsts++] = dst;
         }
+
+        if (lexer_match_id(ctx->lexer, "hash_fields")) {
+            if (!lexer_match(ctx->lexer, LEX_T_EQUALS) ||
+                ctx->lexer->token.type != LEX_T_STRING ||
+                lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) {
+                lexer_syntax_error(ctx->lexer, "invalid hash_fields");
+                free(dsts);
+                return;
+            }
+
+            hash_fields = xstrdup(ctx->lexer->token.s);
+            lexer_get(ctx->lexer);
+            lexer_get(ctx->lexer);
+        }
     }
 
     struct ovnact_ct_lb *cl = ovnact_put_CT_LB(ctx->ovnacts);
     cl->ltable = ctx->pp->cur_ltable + 1;
     cl->dsts = dsts;
     cl->n_dsts = n_dsts;
+    cl->hash_fields = hash_fields;
 }
 
 static void
@@ -1033,10 +1057,10 @@  format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
 {
     ds_put_cstr(s, "ct_lb");
     if (cl->n_dsts) {
-        ds_put_char(s, '(');
+        ds_put_cstr(s, "(backends=");
         for (size_t i = 0; i < cl->n_dsts; i++) {
             if (i) {
-                ds_put_cstr(s, ", ");
+                ds_put_char(s, '-');
             }
 
             const struct ovnact_ct_lb_dst *dst = &cl->dsts[i];
@@ -1057,6 +1081,11 @@  format_CT_LB(const struct ovnact_ct_lb *cl, struct ds *s)
         }
         ds_put_char(s, ')');
     }
+
+    if (cl->hash_fields) {
+        ds_chomp(s, ')');
+        ds_put_format(s, ", hash_fields=\"%s\")", cl->hash_fields);
+    }
     ds_put_char(s, ';');
 }
 
@@ -1103,7 +1132,10 @@  encode_CT_LB(const struct ovnact_ct_lb *cl,
                             : MFF_LOG_DNAT_ZONE - MFF_REG0;
 
     struct ds ds = DS_EMPTY_INITIALIZER;
-    ds_put_format(&ds, "type=select,selection_method=dp_hash");
+    ds_put_format(&ds, "type=select,selection_method=%s", cl->hash_fields ? "hash": "dp_hash");
+    if (cl->hash_fields) {
+        ds_put_format(&ds, ",fields(%s)", cl->hash_fields);
+    }
 
     BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
     BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
@@ -1145,6 +1177,7 @@  static void
 ovnact_ct_lb_free(struct ovnact_ct_lb *ct_lb)
 {
     free(ct_lb->dsts);
+    free(ct_lb->hash_fields);
 }
 
 static void
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index e31794c4b..e18dd03df 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -3101,7 +3101,7 @@  struct ovn_lb {
     struct hmap_node hmap_node;
 
     const struct nbrec_load_balancer *nlb; /* May be NULL. */
-
+    char *selection_fields;
     struct lb_vip *vips;
     size_t n_vips;
 };
@@ -3214,6 +3214,18 @@  ovn_lb_create(struct northd_context *ctx, struct hmap *lbs,
         lb->vips[n_vips].addr_family = addr_family;
         lb->vips[n_vips].backend_ips = xstrdup(node->value);
 
+        char *ps1 = lb->vips[n_vips].backend_ips;
+        char *ps2 = ps1;
+        /* Replace all occurances of ',' with '-'. */
+        while (ps2) {
+            ps2 = strstr(ps1, ",");
+            if (ps2) {
+                *ps2 = '-';
+                ps2++;
+                ps1 = ps2;
+            }
+        }
+
         struct nbrec_load_balancer_health_check *lb_health_check = NULL;
         if (nbrec_lb->protocol && !strcmp(nbrec_lb->protocol, "sctp")) {
             if (nbrec_lb->n_health_check > 0) {
@@ -3329,6 +3341,16 @@  ovn_lb_create(struct northd_context *ctx, struct hmap *lbs,
         n_vips++;
     }
 
+    if (lb->nlb->n_selection_fields) {
+        struct ds sel_fields = DS_EMPTY_INITIALIZER;
+        for (size_t i = 0; i < lb->nlb->n_selection_fields; i++) {
+            ds_put_format(&sel_fields, "%s,", lb->nlb->selection_fields[i]);
+        }
+        ds_chomp(&sel_fields, ',');
+        lb->selection_fields = ds_steal_cstr(&sel_fields);
+        ds_destroy(&sel_fields);
+    }
+
     return lb;
 }
 
@@ -3347,13 +3369,15 @@  ovn_lb_destroy(struct ovn_lb *lb)
         free(lb->vips[i].backends);
     }
     free(lb->vips);
+    free(lb->selection_fields);
 }
 
 static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
-                                       struct ds *action)
+                                       struct ds *action,
+                                       char *selection_fields)
 {
     if (lb_vip->health_check) {
-        ds_put_cstr(action, "ct_lb(");
+        ds_put_cstr(action, "ct_lb(backends=");
 
         size_t n_active_backends = 0;
         for (size_t k = 0; k < lb_vip->n_backends; k++) {
@@ -3365,7 +3389,7 @@  static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
             }
 
             n_active_backends++;
-            ds_put_format(action, "%s:%"PRIu16",",
+            ds_put_format(action, "%s:%"PRIu16"-",
             backend->ip, backend->port);
         }
 
@@ -3373,11 +3397,17 @@  static void build_lb_vip_ct_lb_actions(struct lb_vip *lb_vip,
             ds_clear(action);
             ds_put_cstr(action, "drop;");
         } else {
-            ds_chomp(action, ',');
+            ds_chomp(action, '-');
             ds_put_cstr(action, ");");
         }
     } else {
-        ds_put_format(action, "ct_lb(%s);", lb_vip->backend_ips);
+        ds_put_format(action, "ct_lb(backends=%s);", lb_vip->backend_ips);
+    }
+
+    if (selection_fields && selection_fields[0]) {
+        ds_chomp(action, ';');
+        ds_chomp(action, ')');
+        ds_put_format(action, ", hash_fields=\"%s\");", selection_fields);
     }
 }
 
@@ -5650,7 +5680,7 @@  build_lb_rules(struct ovn_datapath *od, struct hmap *lflows, struct ovn_lb *lb)
 
         /* New connections in Ingress table. */
         struct ds action = DS_EMPTY_INITIALIZER;
-        build_lb_vip_ct_lb_actions(lb_vip, &action);
+        build_lb_vip_ct_lb_actions(lb_vip, &action, lb->selection_fields);
 
         struct ds match = DS_EMPTY_INITIALIZER;
         ds_put_format(&match, "ct.new && %s.dst == %s", ip_match, lb_vip->vip);
@@ -9301,7 +9331,8 @@  build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             for (size_t j = 0; j < lb->n_vips; j++) {
                 struct lb_vip *lb_vip = &lb->vips[j];
                 ds_clear(&actions);
-                build_lb_vip_ct_lb_actions(lb_vip, &actions);
+                build_lb_vip_ct_lb_actions(lb_vip, &actions,
+                                           lb->selection_fields);
 
                 if (!sset_contains(&all_ips, lb_vip->vip)) {
                     sset_add(&all_ips, lb_vip->vip);
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index b2a046571..a06972aa0 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Northbound",
-    "version": "5.22.0",
-    "cksum": "384164739 25476",
+    "version": "5.23.0",
+    "cksum": "111023208 25806",
     "tables": {
         "NB_Global": {
             "columns": {
@@ -179,6 +179,12 @@ 
                 "ip_port_mappings": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}},
+                "selection_fields": {
+                    "type": {"key": {"type": "string",
+                             "enum": ["set",
+                                ["eth_src", "eth_dst", "ip_src", "ip_dst",
+                                 "tp_src", "tp_dst"]]},
+                             "min": 0, "max": "unlimited"}},
                 "external_ids": {
                     "type": {"key": "string", "value": "string",
                              "min": 0, "max": "unlimited"}}},
diff --git a/ovn-nb.xml b/ovn-nb.xml
index af15c550a..243a25bce 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -1513,6 +1513,13 @@ 
       </p>
     </column>
 
+    <column name="selection_fields">
+      Set of packet header fields to use for selecting a backend to
+      load balance the VIP. This can be used if CMS desires that the
+      backend selection is consistent and deterministic for a given
+      set of packet header fields.
+    </column>
+
     <group title="Common Columns">
       <column name="external_ids">
         See <em>External IDs</em> at the beginning of this document.
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 8cc3f7002..c665db89b 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1183,7 +1183,7 @@  ovn-nbctl --wait=sb ls-lb-add sw0 lb1
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 # Delete the Load_Balancer_Health_Check
@@ -1192,7 +1192,7 @@  OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor |  wc -l`])
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 # Create the Load_Balancer_Health_Check again.
@@ -1205,7 +1205,7 @@  service_monitor | sed '/^$/d' | wc -l`])
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 # Get the uuid of both the service_monitor
@@ -1221,7 +1221,7 @@  OVS_WAIT_UNTIL([
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
 ])
 
 # Set the service monitor for sw0-p1 to offline
@@ -1251,7 +1251,7 @@  OVS_WAIT_UNTIL([
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 # Set the service monitor for sw1-p1 to error
@@ -1263,7 +1263,7 @@  OVS_WAIT_UNTIL([
 ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \
 | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
 ])
 
 # Add one more vip to lb1
@@ -1293,8 +1293,8 @@  service_monitor port=1000 | sed '/^$/d' | wc -l`])
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000);)
 ])
 
 # Set the service monitor for sw1-p1 to online
@@ -1306,16 +1306,16 @@  OVS_WAIT_UNTIL([
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
 ])
 
 # Associate lb1 to sw1
 ovn-nbctl --wait=sb ls-lb-add sw1 lb1
 ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.40 && tcp.dst == 1000), action=(ct_lb(backends=10.0.0.3:1000-20.0.0.3:80);)
 ])
 
 # Now create lb2 same as lb1 but udp protocol.
diff --git a/tests/ovn.at b/tests/ovn.at
index 7befc8224..729bc1853 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -970,29 +970,44 @@  ct_lb();
     encodes as ct(table=19,zone=NXM_NX_REG13[0..15],nat)
     has prereqs ip
 ct_lb(192.168.1.2:80, 192.168.1.3:80);
+    Syntax error at `192.168.1.2' expecting backends.
+ct_lb(backends=192.168.1.2:80, 192.168.1.3:80);
+    Syntax error at `192.168.1.3' expecting `;'.
+ct_lb(backends=192.168.1.2:80-192.168.1.3:80);
     encodes as group:1
     uses group: id(1), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
     has prereqs ip
-ct_lb(192.168.1.2, 192.168.1.3, );
-    formats as ct_lb(192.168.1.2, 192.168.1.3);
+ct_lb(backends=192.168.1.2- 192.168.1.3- );
+    formats as ct_lb(backends=192.168.1.2-192.168.1.3);
     encodes as group:2
     uses group: id(2), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3),commit,table=19,zone=NXM_NX_REG13[0..15]))
     has prereqs ip
-ct_lb(fd0f::2, fd0f::3, );
-    formats as ct_lb(fd0f::2, fd0f::3);
+ct_lb(backends=fd0f::2- fd0f::3- );
+    formats as ct_lb(backends=fd0f::2-fd0f::3);
     encodes as group:3
     uses group: id(3), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
     has prereqs ip
 
-ct_lb(192.168.1.2:);
+ct_lb(backends=192.168.1.2:);
     Syntax error at `)' expecting port number.
-ct_lb(192.168.1.2:123456);
+ct_lb(backends=192.168.1.2:123456);
     Syntax error at `123456' expecting port number.
-ct_lb(foo);
+ct_lb(backends=foo);
     Syntax error at `foo' expecting IP address.
-ct_lb([192.168.1.2]);
+ct_lb(backends=[192.168.1.2]);
     Syntax error at `192.168.1.2' expecting IPv6 address.
 
+ct_lb(backends=192.168.1.2:80-192.168.1.3:80, hash_fields=eth_src,eth_dst,ip_src);
+    Syntax error at `eth_src' invalid hash_fields.
+ct_lb(backends=192.168.1.2:80-192.168.1.3:80, hash_fields="eth_src,eth_dst,ip_src");
+    encodes as group:4
+    uses group: id(4), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=192.168.1.2:80),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=192.168.1.3:80),commit,table=19,zone=NXM_NX_REG13[0..15]))
+    has prereqs ip
+ct_lb(backends=fd0f::2-fd0f::3, hash_fields="eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst");
+    encodes as group:5
+    uses group: id(5), name(type=select,selection_method=hash,fields(eth_src,eth_dst,ip_src,ip_dst,tp_src,tp_dst),bucket=bucket_id=0,weight:100,actions=ct(nat(dst=fd0f::2),commit,table=19,zone=NXM_NX_REG13[0..15]),bucket=bucket_id=1,weight:100,actions=ct(nat(dst=fd0f::3),commit,table=19,zone=NXM_NX_REG13[0..15]))
+    has prereqs ip
+
 # ct_next
 ct_next;
     encodes as ct(table=19,zone=NXM_NX_REG13[0..15])
@@ -1520,13 +1535,13 @@  handle_svc_check(reg0);
 # select
 reg9[16..31] = select(1=50, 2=100, 3, );
     formats as reg9[16..31] = select(1=50, 2=100, 3=100);
-    encodes as group:4
-    uses group: id(4), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
+    encodes as group:6
+    uses group: id(6), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:50,actions=load:1->xreg4[16..31],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[16..31],resubmit(,19),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[16..31],resubmit(,19))
 
 reg0 = select(1, 2);
     formats as reg0 = select(1=100, 2=100);
-    encodes as group:5
-    uses group: id(5), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
+    encodes as group:7
+    uses group: id(7), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[96..127],resubmit(,19),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[96..127],resubmit(,19))
 
 reg0 = select(1=, 2);
     Syntax error at `,' expecting weight.
@@ -1542,12 +1557,12 @@  reg0[0..14] = select(1, 2, 3);
     cannot use 15-bit field reg0[0..14] for "select", which requires at least 16 bits.
 
 fwd_group(liveness="true", childports="eth0", "lsp1");
-    encodes as group:6
-    uses group: id(6), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
+    encodes as group:8
+    uses group: id(8), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=watch_port:17,load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
 
 fwd_group(childports="eth0", "lsp1");
-    encodes as group:7
-    uses group: id(7), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
+    encodes as group:9
+    uses group: id(9), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[0..15],resubmit(,64),bucket=load=0x17->NXM_NX_REG15[0..15],resubmit(,64))
 
 fwd_group(childports=eth0);
     Syntax error at `eth0' expecting logical switch port.
@@ -18000,12 +18015,12 @@  service_monitor | sed '/^$/d' | wc -l`])
 
 ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=10(ls_in_stateful     ), priority=120  , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 ovn-sbctl dump-flows lr0 | grep ct_lb | grep priority=120 > lflows.txt
 AT_CHECK([cat lflows.txt], [0], [dnl
-  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+  table=6 (lr_in_dnat         ), priority=120  , match=(ct.new && ip && ip4.dst == 10.0.0.10 && tcp && tcp.dst == 80 && is_chassis_resident("cr-lr0-public")), action=(ct_lb(backends=10.0.0.3:80-20.0.0.3:80);)
 ])
 
 # get the svc monitor mac.