Message ID | 20240813092917.143434-1-karthik.c@nutanix.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev,v2] Support selection fields for ECMP routes. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | fail | apply and check: fail |
Hey folks, Could you please provide any feedback in the approach taken for the following enhancement. Compared to v1 version of the patch, I have split the select rule that’s installed in the routing table based on protocol. I’m doing this in order to support matches on L4 fields. From https://www.openvswitch.org/support/dist-docs/ovs-ofctl.8.txt “The pre-requisites of fields must be provided by flows that output to the group. The use of the fields is defined by the lower-layer that implements the selection_method" My inference here is that we need separate groups for tcp and udp and specifying tcp_src, tcp_dst, udp_src, udp_dst in the same group may lead to non-deterministic behaviour. Please let me know if this is necessary to support this, else I can revert back to v1. Thank You, Karthik Chandrashekar From: Karthik Chandrashekar <karthik.c@nutanix.com> Date: Tuesday, August 13, 2024 at 2:59 PM To: ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Cc: Karthik Chandrashekar <karthik.c@nutanix.com> Subject: [PATCH ovn v2] Support selection fields for ECMP routes. From: Karthik Chandrashekar <karthik.c@nutanix.com> - This patch adds the ability to specify a custom set of packet headers for hash computation for ECMP routes similar to the support that was added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8 - ECMP routes by default use dp_hash as a selection_method for OVS flows. When ecmp_selection_fields is specified, the selection_method will be hash with the specified list of fields used for computing the hash. - For simplicity, list of fields that are used in the select action is a union of all the fields specified in each Logical_Route_Static_Route that is part of a given ECMP route. - In order to allow match based on L4 port numbers, the lr_in_ip_routing rules have been split into separate lflows with protocol specific fields when src_port or dst_port is specified in the ecmp_selectioin_fields. (This is based on the requirement that pre-requisites of fields must be provided by any flows that output to the group) Signed-off-by: Karthik Chandrashekar <karthik.c@nutanix.com> --- v2: - Install separate logical flows for TCP and UDP in lr_in_ip_routing. - Add more test coverage. --- --- include/ovn/actions.h | 1 + lib/actions.c | 55 +++++++++- northd/northd.c | 125 +++++++++++++++++++--- ovn-nb.xml | 17 +++ tests/ovn-northd.at | 42 ++++---- tests/ovn.at | 242 ++++++++++++++++++++++++++++++++++++++++-- utilities/ovn-nbctl.c | 17 ++- 7 files changed, 450 insertions(+), 49 deletions(-) diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 88cf4de79..a9af1e38e 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -338,6 +338,7 @@ struct ovnact_select { struct ovnact_select_dst *dsts; size_t n_dsts; uint8_t ltable; /* Logical table ID of next table. */ + char *hash_fields; struct expr_field res_field; }; diff --git a/lib/actions.c b/lib/actions.c index 37676ef81..ac5d1dbd5 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) struct ovnact_select_dst *dsts = NULL; size_t allocated_dsts = 0; size_t n_dsts = 0; + bool requires_hash_fields = false; + char *hash_fields = NULL; lexer_get(ctx->lexer); /* Skip "select". */ lexer_get(ctx->lexer); /* Skip '('. */ - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { + if (lexer_match_id(ctx->lexer, "values")) { + lexer_force_match(ctx->lexer, LEX_T_EQUALS); + requires_hash_fields = true; + } + + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { struct ovnact_select_dst dst; if (!action_parse_uint16(ctx, &dst.id, "id")) { free(dsts); @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) return; } + if (requires_hash_fields) { + if (!lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); + free(dsts); + return; + } + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || + ctx->lexer->token.type != LEX_T_STRING || + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); + free(dsts); + return; + } + + hash_fields = xstrdup(ctx->lexer->token.s); + lexer_get(ctx->lexer); + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { + lexer_get(ctx->lexer); + } + } else { + if (lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); + free(dsts); + return; + } + } + struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); select->ltable = ctx->pp->cur_ltable + 1; select->dsts = dsts; select->n_dsts = n_dsts; select->res_field = *res_field; + select->hash_fields = hash_fields; } static void @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_cstr(s, " = "); ds_put_cstr(s, "select"); ds_put_char(s, '('); + if (select->hash_fields) { + ds_put_format(s, "values="); + } for (size_t i = 0; i < select->n_dsts; i++) { if (i) { ds_put_cstr(s, ", "); @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_format(s, "=%"PRIu16, dst->weight); } ds_put_char(s, ')'); + if (select->hash_fields) { + ds_chomp(s, ')'); + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); + } ds_put_char(s, ';'); } @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, struct ofpact_group *og; struct ds ds = DS_EMPTY_INITIALIZER; - ds_put_format(&ds, "type=select,selection_method=dp_hash"); + ds_put_format(&ds, "type=select,selection_method=%s", + select->hash_fields ? "hash": "dp_hash"); + if (select->hash_fields) { + ds_put_format(&ds, ",fields(%s)", select->hash_fields); + } - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && + !select->hash_fields) { /* Select dp-hash l4_symmetric by setting the upper 32bits of * selection_method_param to value 1 (1 << 32): */ ds_put_cstr(&ds, ",selection_method_param=0x100000000"); @@ -1647,6 +1695,7 @@ static void ovnact_select_free(struct ovnact_select *select) { free(select->dsts); + free(select->hash_fields); } static void diff --git a/northd/northd.c b/northd/northd.c index a8a0b6f94..1adc82ba1 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -282,9 +282,9 @@ static bool vxlan_mode; * same ip_prefix values: * - connected route overrides static one; * - static route overrides src-ip route. */ -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 -#define ROUTE_PRIO_OFFSET_STATIC 1 -#define ROUTE_PRIO_OFFSET_CONNECTED 2 +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 +#define ROUTE_PRIO_OFFSET_STATIC 2 +#define ROUTE_PRIO_OFFSET_CONNECTED 4 /* Returns the type of the datapath to which a flow with the given 'stage' may * be added. */ @@ -10263,6 +10263,7 @@ struct parsed_route { uint32_t route_table_id; uint32_t hash; const struct nbrec_logical_router_static_route *route; + const char *ecmp_selection_fields; bool ecmp_symmetric_reply; bool is_discard_route; }; @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, pr->ecmp_symmetric_reply = smap_get_bool(&route->options, "ecmp_symmetric_reply", false); pr->is_discard_route = is_discard_route; + pr->ecmp_selection_fields = NULL; + const char *ecmp_selection_fields = smap_get(&route->options, + "ecmp_selection_fields"); + if (ecmp_selection_fields) { + pr->ecmp_selection_fields = ecmp_selection_fields; + } ovs_list_insert(routes, &pr->list_node); return pr; } @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { const char *origin; uint32_t route_table_id; uint16_t route_count; + char *selection_fields; struct ovs_list route_list; /* Contains ecmp_route_list_node */ }; @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, struct ecmp_route_list_node *er = xmalloc(sizeof *er); er->route = route; er->id = ++group->route_count; + + if (route->ecmp_selection_fields) { + if (group->selection_fields) { + struct sset current_field_set; + struct sset field_set; + + sset_from_delimited_string(¤t_field_set, + group->selection_fields, ","); + sset_from_delimited_string(&field_set, + route->ecmp_selection_fields, ","); + + const char *field; + SSET_FOR_EACH (field, &field_set) { + sset_add(¤t_field_set, field); + } + + group->selection_fields = xasprintf("%s", + sset_join(¤t_field_set, + ",", "")); + + sset_destroy(&field_set); + sset_destroy(¤t_field_set); + } else { + group->selection_fields = xasprintf("%s", + route->ecmp_selection_fields); + } + } + ovs_list_insert(&group->route_list, &er->list_node); } @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, eg->is_src_route = route->is_src_route; eg->origin = smap_get_def(&route->route->options, "origin", ""); eg->route_table_id = route->route_table_id; + eg->selection_fields = NULL; ovs_list_init(&eg->route_list); ecmp_groups_add_route(eg, route); @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) static void build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, const char *network_s, int plen, bool is_src_route, - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, + bool has_protocol_match) { const char *dir; /* The priority here is calculated to implement longest-prefix-match @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, dir = "dst"; } + if (has_protocol_match) { + ofs += 1; + } + *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; if (op_inport) { @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, static void build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, const struct hmap *lr_ports, struct ecmp_groups_node *eg, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, const char *protocol) { bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, + protocol != NULL); free(prefix_s); - struct ds actions = DS_EMPTY_INITIALIZER; - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, - REG_ECMP_MEMBER_ID); + if (eg->selection_fields && protocol) { + if (!strcmp(protocol, "tcp")) { + ds_put_format(&route_match, " && tcp"); + } else if (!strcmp(protocol, "udp")) { + ds_put_format(&route_match, " && udp"); + } + } + struct ds values = DS_EMPTY_INITIALIZER; bool is_first = true; LIST_FOR_EACH (er, list_node, &eg->route_list) { if (is_first) { is_first = false; } else { - ds_put_cstr(&actions, ", "); + ds_put_cstr(&values, ", "); } - ds_put_format(&actions, "%"PRIu16, er->id); + ds_put_format(&values, "%"PRIu16, er->id); + } + + struct ds actions = DS_EMPTY_INITIALIZER; + if (eg->selection_fields) { + struct sset current_field_set; + struct sset field_set = SSET_INITIALIZER(&field_set); + sset_from_delimited_string(¤t_field_set, + eg->selection_fields, ","); + + const char *field; + SSET_FOR_EACH (field, ¤t_field_set) { + if (!strcmp(field, "src_port")) { + if (protocol) { + if (!strcmp(protocol, "tcp")) { + sset_add(&field_set, "tcp_src"); + } else if (!strcmp(protocol, "udp")) { + sset_add(&field_set, "udp_src"); + } + } + } else if (!strcmp(field, "dst_port")) { + if (protocol) { + if (!strcmp(protocol, "tcp")) { + sset_add(&field_set, "tcp_dst"); + } else if (!strcmp(protocol, "udp")) { + sset_add(&field_set, "udp_dst"); + } + } + } else { + sset_add(&field_set, field); + } + } + + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); + ds_put_format(&actions, "; hash_fields=\"%s\"", + sset_join(&field_set, ",", "")); + + sset_destroy(¤t_field_set); + sset_destroy(&field_set); + } else { + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); } ds_put_cstr(&actions, ");"); @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, } } build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, - is_ipv4, &match, &priority, ofs); + is_ipv4, &match, &priority, ofs, false); struct ds common_actions = DS_EMPTY_INITIALIZER; struct ds actions = DS_EMPTY_INITIALIZER; @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { /* add a flow in IP_ROUTING, and one flow for each member in * IP_ROUTING_ECMP. */ - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); + if (group->selection_fields) { + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, + "tcp"); + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, + "udp"); + } } const struct unique_routes_node *ur; HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { diff --git a/ovn-nb.xml b/ovn-nb.xml index 6376320d3..7576bf7d2 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3718,6 +3718,23 @@ or </p> </column> + <column name="options" key="ecmp_selection_fields"> + <p> + ECMP routes use OpenFlow groups of type <code>select</code> to + pick a nexthop among the list of avaible nexthops. + OVS supports two selection methods: <code>dp_hash</code> and + <code>hash</code> for hash computatiion and selecting + the buckets of a group. Please see the OVS documentation + (man ovs-ofctl) for more details on the selection methods. + </p> + + <p> + OVN by default uses <code>dp_hash</code>. In order to use the + <code>hash</code> selection method, specify comma-separated + list of selection fields. + </p> + </column> + <column name="options" key="origin"> In case ovn-interconnection has been learned this route, it will have its origin set: either "connected" or "static". This key is supposed diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 199197f09..3d3046a21 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CLEANUP @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl grep -e "(lr_in_ip_routing ).*outport" lr0flows AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) ]) AT_CLEANUP diff --git a/tests/ovn.at b/tests/ovn.at index b31afbfb3..f45ffdfb5 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); encodes as group:20 uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); + encodes as group:21 + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) + +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); + encodes as group:22 + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) + reg0 = select(1=, 2); Syntax error at `,' expecting weight. reg0 = select(1=0, 2); @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); Syntax error at `123456' expecting weight. reg0 = select(123); Syntax error at `;' expecting at least 2 group members. +reg0 = select(values=1, 2); + Syntax error at `;' expecting hash_fields. +reg0 = select(values=1, 2; hash_fields); + Syntax error at `)' invalid hash_fields. +reg0 = select(values=1, 2; hash_fields=); + Syntax error at `)' invalid hash_fields. +reg0 = select(1, 2; hash_fields="ip_src"); + Syntax error at `=' hash_fields unexpected. ip.proto = select(1, 2, 3); Field ip.proto is not modifiable. reg0[[0..14]] = select(1, 2, 3); @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); fwd_group(liveness=true, childports="eth0", "lsp1"); formats as fwd_group(liveness="true", childports="eth0", "lsp1"); - encodes as group:21 - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:23 + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports=eth0); Syntax error at `eth0' expecting logical switch port. @@ -2115,8 +2133,8 @@ fwd_group(); Syntax error at `)' expecting `;'. fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); Syntax error at `xyzzy' expecting true or false. @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP static routes - custom hash]) +ovn_start + +# Logical network: +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 +# (192.168.2.22) +# +# Static routes on lr1: +# 10.0.0.0/24 nexthop 192.168.2.21 +# 10.0.0.0/24 nexthop 192.168.2.22 +# +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port +# +# Test: +# lsp11 send packets to 10.0.0.100 with different source ports +# +# Expected result: +# All packets should go out of a either lsp21 or lsp22 + +ovn-nbctl lr-add lr1 + +ovn-nbctl ls-add ls1 +ovn-nbctl ls-add ls2 + +for i in 1 2; do + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ + -- lsp-set-addresses lsp-ls${i}-lr1 router +done + +#install static routes +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 + +# Create logical ports +ovn-nbctl lsp-add ls1 lsp11 -- \ + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" +ovn-nbctl lsp-add ls2 lsp21 -- \ + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" +ovn-nbctl lsp-add ls2 lsp22 -- \ + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" + +net_add n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 + +as hv1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=lsp11 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + +ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=lsp21 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=2 + +ovs-vsctl -- add-port br-int hv1-vif3 -- \ + set interface hv1-vif3 external-ids:iface-id=lsp22 \ + options:tx_pcap=hv1/vif3-tx.pcap \ + options:rxq_pcap=hv1/vif3-rx.pcap \ + ofport-request=3 + +# wait for earlier changes to take effect +check ovn-nbctl --wait=hv sync +wait_for_ports_up + +ovn-sbctl dump-flows > sbflows +AT_CAPTURE_FILE([sbflows]) + +AT_CAPTURE_FILE([ofgroups]) +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups + grep "selection_method=hash,fields" ofgroups | \ + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 +]) + +as hv1 ovs-ofctl dump-groups br-int > ofgroups +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 +]) +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 +]) + +as hv1 ovs-ofctl dump-flows br-int > oflows +AT_CAPTURE_FILE([oflows]) + +for i in $(seq 5001 5010); do + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) + + for j in 1 2; do + # Assume all packets go to lsp2${j}. + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} + done +done + +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed +OVS_WAIT_UNTIL([ + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` + echo $hv1_rcv_n1 $hv1_rcv_n2 + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) + +if test $hv1_rcv_n1 = 0; then + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) +else + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) +fi + +# Move all VIFs to hv2 and send the same packets again +as hv1 +ovs-vsctl del-port hv1-vif1 +ovs-vsctl del-port hv1-vif2 +ovs-vsctl del-port hv1-vif3 + +wait_column "" Port_Binding chassis logical_port=lsp11 +wait_column "" Port_Binding chassis logical_port=lsp21 +wait_column "" Port_Binding chassis logical_port=lsp22 + +as hv2 +ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=lsp11 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +ovs-vsctl -- add-port br-int hv2-vif2 -- \ + set interface hv2-vif2 external-ids:iface-id=lsp21 \ + options:tx_pcap=hv2/vif2-tx.pcap \ + options:rxq_pcap=hv2/vif2-rx.pcap \ + ofport-request=2 + +ovs-vsctl -- add-port br-int hv2-vif3 -- \ + set interface hv2-vif3 external-ids:iface-id=lsp22 \ + options:tx_pcap=hv2/vif3-tx.pcap \ + options:rxq_pcap=hv2/vif3-rx.pcap \ + ofport-request=3 + +# wait for earlier changes to take effect +check ovn-nbctl --wait=hv sync +wait_for_ports_up + +AT_CAPTURE_FILE([ofgroups]) +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups + grep "selection_method=hash,fields" ofgroups | \ + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 +]) + +as hv2 ovs-ofctl dump-groups br-int > ofgroups +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 +]) +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 +]) + +as hv2 ovs-ofctl dump-flows br-int > oflows +AT_CAPTURE_FILE([oflows]) + +for i in $(seq 5001 5010); do + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) + + for j in 1 2; do + # Assume all packets go to lsp2${j}. + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} + done +done + +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed +OVS_WAIT_UNTIL([ + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` + echo $hv2_rcv_n1 $hv2_rcv_n2 + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) + +if test $hv2_rcv_n1 = 0; then + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) +else + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) +fi + +# All packets should of the same port on both hosts +if test $hv1_rcv_n1 = 0; then + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) +else + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) +fi + +OVN_CLEANUP([hv1], [hv2]) + +AT_CLEANUP +]) OVN_FOR_EACH_NORTHD([ AT_SETUP([route tables -- <main> route table routes]) diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c index 679d3f2d9..70e8fb239 100644 --- a/utilities/ovn-nbctl.c +++ b/utilities/ovn-nbctl.c @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) nbrec_logical_router_static_route_set_route_table(route, route_table); } - if (ecmp_symmetric_reply) { - const struct smap options = SMAP_CONST1(&options, - "ecmp_symmetric_reply", - "true"); + const char *ecmp_selection_fields = shash_find_data(&ctx->options, + "--ecmp-selection-fields"); + if (ecmp_symmetric_reply || ecmp_selection_fields) { + struct smap options = SMAP_INITIALIZER(&options); + if (ecmp_symmetric_reply) { + smap_add(&options, "ecmp_symmetric_reply", "true"); + } + if (ecmp_selection_fields) { + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); + } nbrec_logical_router_static_route_set_options(route, &options); + smap_destroy(&options); } nbrec_logical_router_update_static_routes_addvalue(lr, route); @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," - "--route-table=,--bfd?", RW }, + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, "--if-exists,--policy=,--route-table=", RW }, -- 2.22.3
References: <20240813092917.143434-1-karthik.c@nutanix.com> Bleep bloop. Greetings Karthik Chandrashekar, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. git-am: error: Failed to merge in the changes. hint: Use 'git am --show-current-patch=diff' to see the failed patch Patch failed at 0001 Support selection fields for ECMP routes. When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort". Please check this out. If you feel there has been an error, please email aconole@redhat.com Thanks, 0-day Robot
Hi Karthik, thanks for the patch. The protocol items introduced in v2 are interesting. It highlights that there are inherent flaws in allowing L4 selection fields to be used in what is intended to be a L3 feature. IMO, there shouldn't be a need to discriminate based on L4 protocol when trying to route packets. The current patch does some things I don't particularly like with regards to L4 protocol: * The number of ECMP flows has tripled as a result of this patch. * The patch adds undocumented "dst_port" and "src_port" selection fields as options. * The patch has no SCTP support. (and if it did, then the number of ECMP flows would quadruple instead of tripling) I would say that since ECMP is a pure L3 feature, L4 selection fields should not be used at all as part of its hash function. As you show in your patch, you have to add protocol prerequisites to the match in case a L4-protocol-specific selection field is added. How to enforce this is difficult. We could add code to northd that specifically checks for the various L4 selection fields and rejects them if they are present (falling back to the default selection method as a result). Or we could document that L4 selection fields should not be used, but let the user shoot themselves in the foot if they absolutely insist on adding any. One thing that comes to mind for this particular feature is: what is the motivation? Why do you want to be able to hash on specific fields instead of using the default hash? Maybe that'll help me to understand why the L4 protocol considerations are here. I have some additional findings inline below. On 8/13/24 05:29, karthik.c@nutanix.com wrote: > From: Karthik Chandrashekar <karthik.c@nutanix.com> > > - This patch adds the ability to specify a custom set of packet headers > for hash computation for ECMP routes similar to the support that was > added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8 > - ECMP routes by default use dp_hash as a selection_method for OVS flows. > When ecmp_selection_fields is specified, the selection_method will be > hash with the specified list of fields used for computing the hash. > - For simplicity, list of fields that are used in the select action > is a union of all the fields specified in each Logical_Route_Static_Route > that is part of a given ECMP route. > - In order to allow match based on L4 port numbers, the lr_in_ip_routing > rules have been split into separate lflows with protocol specific > fields when src_port or dst_port is specified in the ecmp_selectioin_fields. > (This is based on the requirement that pre-requisites of fields > must be provided by any flows that output to the group) > > Signed-off-by: Karthik Chandrashekar <karthik.c@nutanix.com> > --- > v2: > - Install separate logical flows for TCP and UDP in lr_in_ip_routing. > - Add more test coverage. > --- > --- > include/ovn/actions.h | 1 + > lib/actions.c | 55 +++++++++- > northd/northd.c | 125 +++++++++++++++++++--- > ovn-nb.xml | 17 +++ > tests/ovn-northd.at | 42 ++++---- > tests/ovn.at | 242 ++++++++++++++++++++++++++++++++++++++++-- > utilities/ovn-nbctl.c | 17 ++- > 7 files changed, 450 insertions(+), 49 deletions(-) > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 88cf4de79..a9af1e38e 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -338,6 +338,7 @@ struct ovnact_select { > struct ovnact_select_dst *dsts; > size_t n_dsts; > uint8_t ltable; /* Logical table ID of next table. */ > + char *hash_fields; > struct expr_field res_field; > }; > > diff --git a/lib/actions.c b/lib/actions.c > index 37676ef81..ac5d1dbd5 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > struct ovnact_select_dst *dsts = NULL; > size_t allocated_dsts = 0; > size_t n_dsts = 0; > + bool requires_hash_fields = false; > + char *hash_fields = NULL; > > lexer_get(ctx->lexer); /* Skip "select". */ > lexer_get(ctx->lexer); /* Skip '('. */ > > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (lexer_match_id(ctx->lexer, "values")) { > + lexer_force_match(ctx->lexer, LEX_T_EQUALS); > + requires_hash_fields = true; > + } > + > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { This while statement allows for some odd syntax to be accepted. For instance, the following would be allowed: select(values=1=50, 2=100) hash_fields=ip_dst) One way to avoid this is to change which token to look for based on whether require_hash_fields is true. enum lex_type value_end_token = requires_hash_fields ? LEX_T_SEMICOLON : LEX_T_PAREN; while (lexer_match(ctx->lexer, value_end_token) { ... > struct ovnact_select_dst dst; > if (!action_parse_uint16(ctx, &dst.id, "id")) { > free(dsts); > @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > return; > } > > + if (requires_hash_fields) { > + if (!lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); > + free(dsts); > + return; > + } > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > + ctx->lexer->token.type != LEX_T_STRING || > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > + free(dsts); > + return; > + } > + > + hash_fields = xstrdup(ctx->lexer->token.s); > + lexer_get(ctx->lexer); > + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { > + lexer_get(ctx->lexer); > + } I'm having trouble understanding what this if statement accomplishes. > + } else { > + if (lexer_match_id(ctx->lexer, "hash_fields")) { I don't think there is a specific coding guideline that says you have to do it this way, but it's common to combine the else and if on a single line. This allows the code to be indented one level less. } else if (lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); > + free(dsts); > + return; > + } > + } > + > struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); > select->ltable = ctx->pp->cur_ltable + 1; > select->dsts = dsts; > select->n_dsts = n_dsts; > select->res_field = *res_field; > + select->hash_fields = hash_fields; > } > > static void > @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_cstr(s, " = "); > ds_put_cstr(s, "select"); > ds_put_char(s, '('); > + if (select->hash_fields) { > + ds_put_format(s, "values="); > + } > for (size_t i = 0; i < select->n_dsts; i++) { > if (i) { > ds_put_cstr(s, ", "); > @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_format(s, "=%"PRIu16, dst->weight); > } > ds_put_char(s, ')'); > + if (select->hash_fields) { > + ds_chomp(s, ')'); > + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); > + } > ds_put_char(s, ';'); > } > > @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, > struct ofpact_group *og; > > struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > + ds_put_format(&ds, "type=select,selection_method=%s", > + select->hash_fields ? "hash": "dp_hash"); > + if (select->hash_fields) { > + ds_put_format(&ds, ",fields(%s)", select->hash_fields); > + } > > - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { > + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && > + !select->hash_fields) { > /* Select dp-hash l4_symmetric by setting the upper 32bits of > * selection_method_param to value 1 (1 << 32): */ > ds_put_cstr(&ds, ",selection_method_param=0x100000000"); > @@ -1647,6 +1695,7 @@ static void > ovnact_select_free(struct ovnact_select *select) > { > free(select->dsts); > + free(select->hash_fields); > } > > static void > diff --git a/northd/northd.c b/northd/northd.c > index a8a0b6f94..1adc82ba1 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -282,9 +282,9 @@ static bool vxlan_mode; > * same ip_prefix values: > * - connected route overrides static one; > * - static route overrides src-ip route. */ > -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 > -#define ROUTE_PRIO_OFFSET_STATIC 1 > -#define ROUTE_PRIO_OFFSET_CONNECTED 2 > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 > +#define ROUTE_PRIO_OFFSET_STATIC 2 > +#define ROUTE_PRIO_OFFSET_CONNECTED 4 > > /* Returns the type of the datapath to which a flow with the given 'stage' may > * be added. */ > @@ -10263,6 +10263,7 @@ struct parsed_route { > uint32_t route_table_id; > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > + const char *ecmp_selection_fields; > bool ecmp_symmetric_reply; > bool is_discard_route; > }; > @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", false); > pr->is_discard_route = is_discard_route; > + pr->ecmp_selection_fields = NULL; > + const char *ecmp_selection_fields = smap_get(&route->options, > + "ecmp_selection_fields"); > + if (ecmp_selection_fields) { > + pr->ecmp_selection_fields = ecmp_selection_fields; > + } smap_get() returns NULL if it fails. The six line addition above could be simplified to: pr->ecmp_selection_fields = smap_get(&route->options, "ecmp_selection_fields"); > ovs_list_insert(routes, &pr->list_node); > return pr; > } > @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { > const char *origin; > uint32_t route_table_id; > uint16_t route_count; > + char *selection_fields; > struct ovs_list route_list; /* Contains ecmp_route_list_node */ > }; > > @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, > struct ecmp_route_list_node *er = xmalloc(sizeof *er); > er->route = route; > er->id = ++group->route_count; > + > + if (route->ecmp_selection_fields) { > + if (group->selection_fields) { > + struct sset current_field_set; > + struct sset field_set; > + > + sset_from_delimited_string(¤t_field_set, > + group->selection_fields, ","); > + sset_from_delimited_string(&field_set, > + route->ecmp_selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, &field_set) { > + sset_add(¤t_field_set, field); > + } This is an interesting behavior. If ECMP routes do not all have the same selection fields, then the result for the group is a set union of the selection fields from all ECMP routes in the group. IMO, all routes in an ECMP group should have the same configured selection fields. If they do not, then that is a configuration error. We can't assume that what the user wants is the union of all fields. On another note, the frequent conversion from string to sset back to string seems wasteful. There's not much reason to store the string version of the selection fields on the parsed_route. Instead, you could always store this as an sset. On the ecmp_group_node, it seems like the sset is used more frequently, and the string version is only really needed when adding the logical flow. So on the ecmp_group_node, you could also store just the sset of selection fields, and then in the function where ECMP group flows are added, that is where you could convert to a string. > + > + group->selection_fields = xasprintf("%s", > + sset_join(¤t_field_set, > + ",", "")); This introduces a memory leak. group->selection_fields needs to be freed before it is reassigned. > + > + sset_destroy(&field_set); > + sset_destroy(¤t_field_set); > + } else { > + group->selection_fields = xasprintf("%s", > + route->ecmp_selection_fields); > + } > + } > + > ovs_list_insert(&group->route_list, &er->list_node); > } > > @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, > eg->is_src_route = route->is_src_route; > eg->origin = smap_get_def(&route->route->options, "origin", ""); > eg->route_table_id = route->route_table_id; > + eg->selection_fields = NULL; > ovs_list_init(&eg->route_list); > ecmp_groups_add_route(eg, route); > > @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) > static void > build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > const char *network_s, int plen, bool is_src_route, > - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) > + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, > + bool has_protocol_match) > { > const char *dir; > /* The priority here is calculated to implement longest-prefix-match > @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > dir = "dst"; > } > > + if (has_protocol_match) { > + ofs += 1; > + } > + > *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; > > if (op_inport) { > @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, > static void > build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > const struct hmap *lr_ports, struct ecmp_groups_node *eg, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, const char *protocol) > > { > bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); > @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? > ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; > build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, > - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); > + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, > + protocol != NULL); > free(prefix_s); > > - struct ds actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, > - REG_ECMP_MEMBER_ID); > + if (eg->selection_fields && protocol) { > + if (!strcmp(protocol, "tcp")) { > + ds_put_format(&route_match, " && tcp"); > + } else if (!strcmp(protocol, "udp")) { > + ds_put_format(&route_match, " && udp"); > + } > + } > > + struct ds values = DS_EMPTY_INITIALIZER; > bool is_first = true; > LIST_FOR_EACH (er, list_node, &eg->route_list) { > if (is_first) { > is_first = false; > } else { > - ds_put_cstr(&actions, ", "); > + ds_put_cstr(&values, ", "); > } > - ds_put_format(&actions, "%"PRIu16, er->id); > + ds_put_format(&values, "%"PRIu16, er->id); > + } > + > + struct ds actions = DS_EMPTY_INITIALIZER; > + if (eg->selection_fields) { > + struct sset current_field_set; > + struct sset field_set = SSET_INITIALIZER(&field_set); > + sset_from_delimited_string(¤t_field_set, > + eg->selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, ¤t_field_set) { > + if (!strcmp(field, "src_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_src"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_src"); > + } > + } > + } else if (!strcmp(field, "dst_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_dst"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_dst"); > + } > + } > + } else { > + sset_add(&field_set, field); > + } > + } > + > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > + ds_put_format(&actions, "; hash_fields=\"%s\"", > + sset_join(&field_set, ",", "")); > + > + sset_destroy(¤t_field_set); > + sset_destroy(&field_set); > + } else { > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > } > > ds_put_cstr(&actions, ");"); > @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, > } > } > build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, > - is_ipv4, &match, &priority, ofs); > + is_ipv4, &match, &priority, ofs, false); > > struct ds common_actions = DS_EMPTY_INITIALIZER; > struct ds actions = DS_EMPTY_INITIALIZER; > @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( > HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { > /* add a flow in IP_ROUTING, and one flow for each member in > * IP_ROUTING_ECMP. */ > - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); > + if (group->selection_fields) { > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "tcp"); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "udp"); > + } > } > const struct unique_routes_node *ur; > HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 6376320d3..7576bf7d2 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -3718,6 +3718,23 @@ or > </p> > </column> > > + <column name="options" key="ecmp_selection_fields"> > + <p> > + ECMP routes use OpenFlow groups of type <code>select</code> to > + pick a nexthop among the list of avaible nexthops. s/avaible/available/ > + OVS supports two selection methods: <code>dp_hash</code> and > + <code>hash</code> for hash computatiion and selecting s/computatiion/computation/ > + the buckets of a group. Please see the OVS documentation > + (man ovs-ofctl) for more details on the selection methods. > + </p> > + > + <p> > + OVN by default uses <code>dp_hash</code>. In order to use the > + <code>hash</code> selection method, specify comma-separated > + list of selection fields. > + </p> > + </column> > + > <column name="options" key="origin"> > In case ovn-interconnection has been learned this route, it will have > its origin set: either "connected" or "static". This key is supposed > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 199197f09..3d3046a21 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl > grep -e "(lr_in_ip_routing ).*outport" lr0flows > > AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > diff --git a/tests/ovn.at b/tests/ovn.at > index b31afbfb3..f45ffdfb5 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); > encodes as group:20 > uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > > +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); > + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); > + encodes as group:21 > + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) > + > +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); > + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); > + encodes as group:22 > + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > + > reg0 = select(1=, 2); > Syntax error at `,' expecting weight. > reg0 = select(1=0, 2); > @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); > Syntax error at `123456' expecting weight. > reg0 = select(123); > Syntax error at `;' expecting at least 2 group members. > +reg0 = select(values=1, 2); > + Syntax error at `;' expecting hash_fields. > +reg0 = select(values=1, 2; hash_fields); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(values=1, 2; hash_fields=); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(1, 2; hash_fields="ip_src"); > + Syntax error at `=' hash_fields unexpected. > ip.proto = select(1, 2, 3); > Field ip.proto is not modifiable. > reg0[[0..14]] = select(1, 2, 3); > @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); > > fwd_group(liveness=true, childports="eth0", "lsp1"); > formats as fwd_group(liveness="true", childports="eth0", "lsp1"); > - encodes as group:21 > - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:23 > + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports=eth0); > Syntax error at `eth0' expecting logical switch port. > @@ -2115,8 +2133,8 @@ fwd_group(); > Syntax error at `)' expecting `;'. > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); > Syntax error at `xyzzy' expecting true or false. > @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP static routes - custom hash]) > +ovn_start > + > +# Logical network: > +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) > +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 > +# (192.168.2.22) > +# > +# Static routes on lr1: > +# 10.0.0.0/24 nexthop 192.168.2.21 > +# 10.0.0.0/24 nexthop 192.168.2.22 > +# > +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port > +# > +# Test: > +# lsp11 send packets to 10.0.0.100 with different source ports > +# > +# Expected result: > +# All packets should go out of a either lsp21 or lsp22 > + > +ovn-nbctl lr-add lr1 > + > +ovn-nbctl ls-add ls1 > +ovn-nbctl ls-add ls2 > + > +for i in 1 2; do > + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 > + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ > + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ > + -- lsp-set-addresses lsp-ls${i}-lr1 router > +done > + > +#install static routes > +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 > +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 > + > +# Create logical ports > +ovn-nbctl lsp-add ls1 lsp11 -- \ > + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" > +ovn-nbctl lsp-add ls2 lsp21 -- \ > + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" > +ovn-nbctl lsp-add ls2 lsp22 -- \ > + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" > + > +net_add n1 > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > + > +sim_add hv2 > +as hv2 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.2 > + > +as hv1 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv1-vif2 -- \ > + set interface hv1-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv1/vif2-tx.pcap \ > + options:rxq_pcap=hv1/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv1-vif3 -- \ > + set interface hv1-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv1/vif3-tx.pcap \ > + options:rxq_pcap=hv1/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +ovn-sbctl dump-flows > sbflows > +AT_CAPTURE_FILE([sbflows]) > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv1 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv1 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv1_rcv_n1 $hv1_rcv_n2 > + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) > + > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) > +fi > + > +# Move all VIFs to hv2 and send the same packets again > +as hv1 > +ovs-vsctl del-port hv1-vif1 > +ovs-vsctl del-port hv1-vif2 > +ovs-vsctl del-port hv1-vif3 > + > +wait_column "" Port_Binding chassis logical_port=lsp11 > +wait_column "" Port_Binding chassis logical_port=lsp21 > +wait_column "" Port_Binding chassis logical_port=lsp22 > + > +as hv2 > +ovs-vsctl -- add-port br-int hv2-vif1 -- \ > + set interface hv2-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv2/vif1-tx.pcap \ > + options:rxq_pcap=hv2/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv2-vif2 -- \ > + set interface hv2-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv2/vif2-tx.pcap \ > + options:rxq_pcap=hv2/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv2-vif3 -- \ > + set interface hv2-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv2/vif3-tx.pcap \ > + options:rxq_pcap=hv2/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv2 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv2 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv2_rcv_n1 $hv2_rcv_n2 > + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) > + > +if test $hv2_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) > +fi > + > +# All packets should of the same port on both hosts > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) > +fi > + > +OVN_CLEANUP([hv1], [hv2]) > + > +AT_CLEANUP > +]) > > OVN_FOR_EACH_NORTHD([ > AT_SETUP([route tables -- <main> route table routes]) > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c > index 679d3f2d9..70e8fb239 100644 > --- a/utilities/ovn-nbctl.c > +++ b/utilities/ovn-nbctl.c > @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_route_table(route, route_table); > } > > - if (ecmp_symmetric_reply) { > - const struct smap options = SMAP_CONST1(&options, > - "ecmp_symmetric_reply", > - "true"); > + const char *ecmp_selection_fields = shash_find_data(&ctx->options, > + "--ecmp-selection-fields"); > + if (ecmp_symmetric_reply || ecmp_selection_fields) { > + struct smap options = SMAP_INITIALIZER(&options); > + if (ecmp_symmetric_reply) { > + smap_add(&options, "ecmp_symmetric_reply", "true"); > + } > + if (ecmp_selection_fields) { > + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); > + } > nbrec_logical_router_static_route_set_options(route, &options); > + smap_destroy(&options); > } > > nbrec_logical_router_update_static_routes_addvalue(lr, route); > @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { > { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", > nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, > "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," > - "--route-table=,--bfd?", RW }, > + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, > { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", > nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, > "--if-exists,--policy=,--route-table=", RW },
Hi Mark, Thank you for the feedback. The current implementation of dp_hash in the kernel uses 5-tuple for hash computation. The implementation also has a random seed. As a result when a VM migrates from one host to another, the hash value could change and the same flow could take a different nexthop. The reason for adding the flexibility to switch from dp_hash to hash is to provide a predictable hash across hosts. I have demonstrated this in the following test case – “ECMP static routes - custom hash”. The reason for using L4 port information was to keep it similar to dp_hash implementation. Thank You, Karthik From: Mark Michelson <mmichels@redhat.com> Date: Thursday, August 15, 2024 at 1:08 AM To: Karthik Chandrashekar <karthik.c@nutanix.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. !-------------------------------------------------------------------| CAUTION: External Email |-------------------------------------------------------------------! Hi Karthik, thanks for the patch. The protocol items introduced in v2 are interesting. It highlights that there are inherent flaws in allowing L4 selection fields to be used in what is intended to be a L3 feature. IMO, there shouldn't be a need to discriminate based on L4 protocol when trying to route packets. The current patch does some things I don't particularly like with regards to L4 protocol: * The number of ECMP flows has tripled as a result of this patch. * The patch adds undocumented "dst_port" and "src_port" selection fields as options. * The patch has no SCTP support. (and if it did, then the number of ECMP flows would quadruple instead of tripling) I would say that since ECMP is a pure L3 feature, L4 selection fields should not be used at all as part of its hash function. As you show in your patch, you have to add protocol prerequisites to the match in case a L4-protocol-specific selection field is added. How to enforce this is difficult. We could add code to northd that specifically checks for the various L4 selection fields and rejects them if they are present (falling back to the default selection method as a result). Or we could document that L4 selection fields should not be used, but let the user shoot themselves in the foot if they absolutely insist on adding any. One thing that comes to mind for this particular feature is: what is the motivation? Why do you want to be able to hash on specific fields instead of using the default hash? Maybe that'll help me to understand why the L4 protocol considerations are here. I have some additional findings inline below. On 8/13/24 05:29, karthik.c@nutanix.com wrote: > From: Karthik Chandrashekar <karthik.c@nutanix.com> > > - This patch adds the ability to specify a custom set of packet headers > for hash computation for ECMP routes similar to the support that was > added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8 > - ECMP routes by default use dp_hash as a selection_method for OVS flows. > When ecmp_selection_fields is specified, the selection_method will be > hash with the specified list of fields used for computing the hash. > - For simplicity, list of fields that are used in the select action > is a union of all the fields specified in each Logical_Route_Static_Route > that is part of a given ECMP route. > - In order to allow match based on L4 port numbers, the lr_in_ip_routing > rules have been split into separate lflows with protocol specific > fields when src_port or dst_port is specified in the ecmp_selectioin_fields. > (This is based on the requirement that pre-requisites of fields > must be provided by any flows that output to the group) > > Signed-off-by: Karthik Chandrashekar <karthik.c@nutanix.com> > --- > v2: > - Install separate logical flows for TCP and UDP in lr_in_ip_routing. > - Add more test coverage. > --- > --- > include/ovn/actions.h | 1 + > lib/actions.c | 55 +++++++++- > northd/northd.c | 125 +++++++++++++++++++--- > ovn-nb.xml | 17 +++ > tests/ovn-northd.at | 42 ++++---- > tests/ovn.at | 242 ++++++++++++++++++++++++++++++++++++++++-- > utilities/ovn-nbctl.c | 17 ++- > 7 files changed, 450 insertions(+), 49 deletions(-) > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 88cf4de79..a9af1e38e 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -338,6 +338,7 @@ struct ovnact_select { > struct ovnact_select_dst *dsts; > size_t n_dsts; > uint8_t ltable; /* Logical table ID of next table. */ > + char *hash_fields; > struct expr_field res_field; > }; > > diff --git a/lib/actions.c b/lib/actions.c > index 37676ef81..ac5d1dbd5 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > struct ovnact_select_dst *dsts = NULL; > size_t allocated_dsts = 0; > size_t n_dsts = 0; > + bool requires_hash_fields = false; > + char *hash_fields = NULL; > > lexer_get(ctx->lexer); /* Skip "select". */ > lexer_get(ctx->lexer); /* Skip '('. */ > > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (lexer_match_id(ctx->lexer, "values")) { > + lexer_force_match(ctx->lexer, LEX_T_EQUALS); > + requires_hash_fields = true; > + } > + > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { This while statement allows for some odd syntax to be accepted. For instance, the following would be allowed: select(values=1=50, 2=100) hash_fields=ip_dst) One way to avoid this is to change which token to look for based on whether require_hash_fields is true. enum lex_type value_end_token = requires_hash_fields ? LEX_T_SEMICOLON : LEX_T_PAREN; while (lexer_match(ctx->lexer, value_end_token) { ... > struct ovnact_select_dst dst; > if (!action_parse_uint16(ctx, &dst.id, "id")) { > free(dsts); > @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > return; > } > > + if (requires_hash_fields) { > + if (!lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); > + free(dsts); > + return; > + } > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > + ctx->lexer->token.type != LEX_T_STRING || > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > + free(dsts); > + return; > + } > + > + hash_fields = xstrdup(ctx->lexer->token.s); > + lexer_get(ctx->lexer); > + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { > + lexer_get(ctx->lexer); > + } I'm having trouble understanding what this if statement accomplishes. > + } else { > + if (lexer_match_id(ctx->lexer, "hash_fields")) { I don't think there is a specific coding guideline that says you have to do it this way, but it's common to combine the else and if on a single line. This allows the code to be indented one level less. } else if (lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); > + free(dsts); > + return; > + } > + } > + > struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); > select->ltable = ctx->pp->cur_ltable + 1; > select->dsts = dsts; > select->n_dsts = n_dsts; > select->res_field = *res_field; > + select->hash_fields = hash_fields; > } > > static void > @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_cstr(s, " = "); > ds_put_cstr(s, "select"); > ds_put_char(s, '('); > + if (select->hash_fields) { > + ds_put_format(s, "values="); > + } > for (size_t i = 0; i < select->n_dsts; i++) { > if (i) { > ds_put_cstr(s, ", "); > @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_format(s, "=%"PRIu16, dst->weight); > } > ds_put_char(s, ')'); > + if (select->hash_fields) { > + ds_chomp(s, ')'); > + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); > + } > ds_put_char(s, ';'); > } > > @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, > struct ofpact_group *og; > > struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > + ds_put_format(&ds, "type=select,selection_method=%s", > + select->hash_fields ? "hash": "dp_hash"); > + if (select->hash_fields) { > + ds_put_format(&ds, ",fields(%s)", select->hash_fields); > + } > > - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { > + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && > + !select->hash_fields) { > /* Select dp-hash l4_symmetric by setting the upper 32bits of > * selection_method_param to value 1 (1 << 32): */ > ds_put_cstr(&ds, ",selection_method_param=0x100000000"); > @@ -1647,6 +1695,7 @@ static void > ovnact_select_free(struct ovnact_select *select) > { > free(select->dsts); > + free(select->hash_fields); > } > > static void > diff --git a/northd/northd.c b/northd/northd.c > index a8a0b6f94..1adc82ba1 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -282,9 +282,9 @@ static bool vxlan_mode; > * same ip_prefix values: > * - connected route overrides static one; > * - static route overrides src-ip route. */ > -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 > -#define ROUTE_PRIO_OFFSET_STATIC 1 > -#define ROUTE_PRIO_OFFSET_CONNECTED 2 > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 > +#define ROUTE_PRIO_OFFSET_STATIC 2 > +#define ROUTE_PRIO_OFFSET_CONNECTED 4 > > /* Returns the type of the datapath to which a flow with the given 'stage' may > * be added. */ > @@ -10263,6 +10263,7 @@ struct parsed_route { > uint32_t route_table_id; > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > + const char *ecmp_selection_fields; > bool ecmp_symmetric_reply; > bool is_discard_route; > }; > @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", false); > pr->is_discard_route = is_discard_route; > + pr->ecmp_selection_fields = NULL; > + const char *ecmp_selection_fields = smap_get(&route->options, > + "ecmp_selection_fields"); > + if (ecmp_selection_fields) { > + pr->ecmp_selection_fields = ecmp_selection_fields; > + } smap_get() returns NULL if it fails. The six line addition above could be simplified to: pr->ecmp_selection_fields = smap_get(&route->options, "ecmp_selection_fields"); > ovs_list_insert(routes, &pr->list_node); > return pr; > } > @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { > const char *origin; > uint32_t route_table_id; > uint16_t route_count; > + char *selection_fields; > struct ovs_list route_list; /* Contains ecmp_route_list_node */ > }; > > @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, > struct ecmp_route_list_node *er = xmalloc(sizeof *er); > er->route = route; > er->id = ++group->route_count; > + > + if (route->ecmp_selection_fields) { > + if (group->selection_fields) { > + struct sset current_field_set; > + struct sset field_set; > + > + sset_from_delimited_string(¤t_field_set, > + group->selection_fields, ","); > + sset_from_delimited_string(&field_set, > + route->ecmp_selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, &field_set) { > + sset_add(¤t_field_set, field); > + } This is an interesting behavior. If ECMP routes do not all have the same selection fields, then the result for the group is a set union of the selection fields from all ECMP routes in the group. IMO, all routes in an ECMP group should have the same configured selection fields. If they do not, then that is a configuration error. We can't assume that what the user wants is the union of all fields. On another note, the frequent conversion from string to sset back to string seems wasteful. There's not much reason to store the string version of the selection fields on the parsed_route. Instead, you could always store this as an sset. On the ecmp_group_node, it seems like the sset is used more frequently, and the string version is only really needed when adding the logical flow. So on the ecmp_group_node, you could also store just the sset of selection fields, and then in the function where ECMP group flows are added, that is where you could convert to a string. > + > + group->selection_fields = xasprintf("%s", > + sset_join(¤t_field_set, > + ",", "")); This introduces a memory leak. group->selection_fields needs to be freed before it is reassigned. > + > + sset_destroy(&field_set); > + sset_destroy(¤t_field_set); > + } else { > + group->selection_fields = xasprintf("%s", > + route->ecmp_selection_fields); > + } > + } > + > ovs_list_insert(&group->route_list, &er->list_node); > } > > @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, > eg->is_src_route = route->is_src_route; > eg->origin = smap_get_def(&route->route->options, "origin", ""); > eg->route_table_id = route->route_table_id; > + eg->selection_fields = NULL; > ovs_list_init(&eg->route_list); > ecmp_groups_add_route(eg, route); > > @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) > static void > build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > const char *network_s, int plen, bool is_src_route, > - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) > + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, > + bool has_protocol_match) > { > const char *dir; > /* The priority here is calculated to implement longest-prefix-match > @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > dir = "dst"; > } > > + if (has_protocol_match) { > + ofs += 1; > + } > + > *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; > > if (op_inport) { > @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, > static void > build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > const struct hmap *lr_ports, struct ecmp_groups_node *eg, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, const char *protocol) > > { > bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); > @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? > ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; > build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, > - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); > + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, > + protocol != NULL); > free(prefix_s); > > - struct ds actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, > - REG_ECMP_MEMBER_ID); > + if (eg->selection_fields && protocol) { > + if (!strcmp(protocol, "tcp")) { > + ds_put_format(&route_match, " && tcp"); > + } else if (!strcmp(protocol, "udp")) { > + ds_put_format(&route_match, " && udp"); > + } > + } > > + struct ds values = DS_EMPTY_INITIALIZER; > bool is_first = true; > LIST_FOR_EACH (er, list_node, &eg->route_list) { > if (is_first) { > is_first = false; > } else { > - ds_put_cstr(&actions, ", "); > + ds_put_cstr(&values, ", "); > } > - ds_put_format(&actions, "%"PRIu16, er->id); > + ds_put_format(&values, "%"PRIu16, er->id); > + } > + > + struct ds actions = DS_EMPTY_INITIALIZER; > + if (eg->selection_fields) { > + struct sset current_field_set; > + struct sset field_set = SSET_INITIALIZER(&field_set); > + sset_from_delimited_string(¤t_field_set, > + eg->selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, ¤t_field_set) { > + if (!strcmp(field, "src_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_src"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_src"); > + } > + } > + } else if (!strcmp(field, "dst_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_dst"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_dst"); > + } > + } > + } else { > + sset_add(&field_set, field); > + } > + } > + > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > + ds_put_format(&actions, "; hash_fields=\"%s\"", > + sset_join(&field_set, ",", "")); > + > + sset_destroy(¤t_field_set); > + sset_destroy(&field_set); > + } else { > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > } > > ds_put_cstr(&actions, ");"); > @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, > } > } > build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, > - is_ipv4, &match, &priority, ofs); > + is_ipv4, &match, &priority, ofs, false); > > struct ds common_actions = DS_EMPTY_INITIALIZER; > struct ds actions = DS_EMPTY_INITIALIZER; > @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( > HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { > /* add a flow in IP_ROUTING, and one flow for each member in > * IP_ROUTING_ECMP. */ > - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); > + if (group->selection_fields) { > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "tcp"); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "udp"); > + } > } > const struct unique_routes_node *ur; > HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 6376320d3..7576bf7d2 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -3718,6 +3718,23 @@ or > </p> > </column> > > + <column name="options" key="ecmp_selection_fields"> > + <p> > + ECMP routes use OpenFlow groups of type <code>select</code> to > + pick a nexthop among the list of avaible nexthops. s/avaible/available/ > + OVS supports two selection methods: <code>dp_hash</code> and > + <code>hash</code> for hash computatiion and selecting s/computatiion/computation/ > + the buckets of a group. Please see the OVS documentation > + (man ovs-ofctl) for more details on the selection methods. > + </p> > + > + <p> > + OVN by default uses <code>dp_hash</code>. In order to use the > + <code>hash</code> selection method, specify comma-separated > + list of selection fields. > + </p> > + </column> > + > <column name="options" key="origin"> > In case ovn-interconnection has been learned this route, it will have > its origin set: either "connected" or "static". This key is supposed > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 199197f09..3d3046a21 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl > grep -e "(lr_in_ip_routing ).*outport" lr0flows > > AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > diff --git a/tests/ovn.at b/tests/ovn.at > index b31afbfb3..f45ffdfb5 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); > encodes as group:20 > uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > > +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); > + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); > + encodes as group:21 > + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) > + > +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); > + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); > + encodes as group:22 > + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > + > reg0 = select(1=, 2); > Syntax error at `,' expecting weight. > reg0 = select(1=0, 2); > @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); > Syntax error at `123456' expecting weight. > reg0 = select(123); > Syntax error at `;' expecting at least 2 group members. > +reg0 = select(values=1, 2); > + Syntax error at `;' expecting hash_fields. > +reg0 = select(values=1, 2; hash_fields); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(values=1, 2; hash_fields=); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(1, 2; hash_fields="ip_src"); > + Syntax error at `=' hash_fields unexpected. > ip.proto = select(1, 2, 3); > Field ip.proto is not modifiable. > reg0[[0..14]] = select(1, 2, 3); > @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); > > fwd_group(liveness=true, childports="eth0", "lsp1"); > formats as fwd_group(liveness="true", childports="eth0", "lsp1"); > - encodes as group:21 > - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:23 > + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports=eth0); > Syntax error at `eth0' expecting logical switch port. > @@ -2115,8 +2133,8 @@ fwd_group(); > Syntax error at `)' expecting `;'. > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); > Syntax error at `xyzzy' expecting true or false. > @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP static routes - custom hash]) > +ovn_start > + > +# Logical network: > +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) > +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 > +# (192.168.2.22) > +# > +# Static routes on lr1: > +# 10.0.0.0/24 nexthop 192.168.2.21 > +# 10.0.0.0/24 nexthop 192.168.2.22 > +# > +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port > +# > +# Test: > +# lsp11 send packets to 10.0.0.100 with different source ports > +# > +# Expected result: > +# All packets should go out of a either lsp21 or lsp22 > + > +ovn-nbctl lr-add lr1 > + > +ovn-nbctl ls-add ls1 > +ovn-nbctl ls-add ls2 > + > +for i in 1 2; do > + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 > + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ > + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ > + -- lsp-set-addresses lsp-ls${i}-lr1 router > +done > + > +#install static routes > +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 > +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 > + > +# Create logical ports > +ovn-nbctl lsp-add ls1 lsp11 -- \ > + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" > +ovn-nbctl lsp-add ls2 lsp21 -- \ > + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" > +ovn-nbctl lsp-add ls2 lsp22 -- \ > + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" > + > +net_add n1 > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > + > +sim_add hv2 > +as hv2 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.2 > + > +as hv1 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv1-vif2 -- \ > + set interface hv1-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv1/vif2-tx.pcap \ > + options:rxq_pcap=hv1/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv1-vif3 -- \ > + set interface hv1-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv1/vif3-tx.pcap \ > + options:rxq_pcap=hv1/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +ovn-sbctl dump-flows > sbflows > +AT_CAPTURE_FILE([sbflows]) > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv1 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv1 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv1_rcv_n1 $hv1_rcv_n2 > + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) > + > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) > +fi > + > +# Move all VIFs to hv2 and send the same packets again > +as hv1 > +ovs-vsctl del-port hv1-vif1 > +ovs-vsctl del-port hv1-vif2 > +ovs-vsctl del-port hv1-vif3 > + > +wait_column "" Port_Binding chassis logical_port=lsp11 > +wait_column "" Port_Binding chassis logical_port=lsp21 > +wait_column "" Port_Binding chassis logical_port=lsp22 > + > +as hv2 > +ovs-vsctl -- add-port br-int hv2-vif1 -- \ > + set interface hv2-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv2/vif1-tx.pcap \ > + options:rxq_pcap=hv2/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv2-vif2 -- \ > + set interface hv2-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv2/vif2-tx.pcap \ > + options:rxq_pcap=hv2/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv2-vif3 -- \ > + set interface hv2-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv2/vif3-tx.pcap \ > + options:rxq_pcap=hv2/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv2 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv2 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv2_rcv_n1 $hv2_rcv_n2 > + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) > + > +if test $hv2_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) > +fi > + > +# All packets should of the same port on both hosts > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) > +fi > + > +OVN_CLEANUP([hv1], [hv2]) > + > +AT_CLEANUP > +]) > > OVN_FOR_EACH_NORTHD([ > AT_SETUP([route tables -- <main> route table routes]) > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c > index 679d3f2d9..70e8fb239 100644 > --- a/utilities/ovn-nbctl.c > +++ b/utilities/ovn-nbctl.c > @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_route_table(route, route_table); > } > > - if (ecmp_symmetric_reply) { > - const struct smap options = SMAP_CONST1(&options, > - "ecmp_symmetric_reply", > - "true"); > + const char *ecmp_selection_fields = shash_find_data(&ctx->options, > + "--ecmp-selection-fields"); > + if (ecmp_symmetric_reply || ecmp_selection_fields) { > + struct smap options = SMAP_INITIALIZER(&options); > + if (ecmp_symmetric_reply) { > + smap_add(&options, "ecmp_symmetric_reply", "true"); > + } > + if (ecmp_selection_fields) { > + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); > + } > nbrec_logical_router_static_route_set_options(route, &options); > + smap_destroy(&options); > } > > nbrec_logical_router_update_static_routes_addvalue(lr, route); > @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { > { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", > nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, > "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," > - "--route-table=,--bfd?", RW }, > + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, > { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", > nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, > "--if-exists,--policy=,--route-table=", RW },
Hi Mark, I have addressed the review comments and sent out a v3 version of the patch. I have made the separation of flows based on protocol in the routing table more granular. This will only happen if match on src_port or dst_port is specified. I have updated the documentation on the usage and semantics along with some examples. Thank You, Karthik From: dev <ovs-dev-bounces@openvswitch.org> on behalf of Karthik Chandrashekar <karthik.c@nutanix.com> Date: Thursday, August 15, 2024 at 8:24 AM To: Mark Michelson <mmichels@redhat.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. Hi Mark, Thank you for the feedback. The current implementation of dp_hash in the kernel uses 5-tuple for hash computation. The implementation also has a random seed. As a result when a VM migrates from one host to another, the hash value could change and the same flow could take a different nexthop. The reason for adding the flexibility to switch from dp_hash to hash is to provide a predictable hash across hosts. I have demonstrated this in the following test case – “ECMP static routes - custom hash”. The reason for using L4 port information was to keep it similar to dp_hash implementation. Thank You, Karthik From: Mark Michelson <mmichels@redhat.com> Date: Thursday, August 15, 2024 at 1:08 AM To: Karthik Chandrashekar <karthik.c@nutanix.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. !-------------------------------------------------------------------| CAUTION: External Email |-------------------------------------------------------------------! Hi Karthik, thanks for the patch. The protocol items introduced in v2 are interesting. It highlights that there are inherent flaws in allowing L4 selection fields to be used in what is intended to be a L3 feature. IMO, there shouldn't be a need to discriminate based on L4 protocol when trying to route packets. The current patch does some things I don't particularly like with regards to L4 protocol: * The number of ECMP flows has tripled as a result of this patch. * The patch adds undocumented "dst_port" and "src_port" selection fields as options. * The patch has no SCTP support. (and if it did, then the number of ECMP flows would quadruple instead of tripling) I would say that since ECMP is a pure L3 feature, L4 selection fields should not be used at all as part of its hash function. As you show in your patch, you have to add protocol prerequisites to the match in case a L4-protocol-specific selection field is added. How to enforce this is difficult. We could add code to northd that specifically checks for the various L4 selection fields and rejects them if they are present (falling back to the default selection method as a result). Or we could document that L4 selection fields should not be used, but let the user shoot themselves in the foot if they absolutely insist on adding any. One thing that comes to mind for this particular feature is: what is the motivation? Why do you want to be able to hash on specific fields instead of using the default hash? Maybe that'll help me to understand why the L4 protocol considerations are here. I have some additional findings inline below. On 8/13/24 05:29, karthik.c@nutanix.com wrote: > From: Karthik Chandrashekar <karthik.c@nutanix.com> > > - This patch adds the ability to specify a custom set of packet headers > for hash computation for ECMP routes similar to the support that was > added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8 > - ECMP routes by default use dp_hash as a selection_method for OVS flows. > When ecmp_selection_fields is specified, the selection_method will be > hash with the specified list of fields used for computing the hash. > - For simplicity, list of fields that are used in the select action > is a union of all the fields specified in each Logical_Route_Static_Route > that is part of a given ECMP route. > - In order to allow match based on L4 port numbers, the lr_in_ip_routing > rules have been split into separate lflows with protocol specific > fields when src_port or dst_port is specified in the ecmp_selectioin_fields. > (This is based on the requirement that pre-requisites of fields > must be provided by any flows that output to the group) > > Signed-off-by: Karthik Chandrashekar <karthik.c@nutanix.com> > --- > v2: > - Install separate logical flows for TCP and UDP in lr_in_ip_routing. > - Add more test coverage. > --- > --- > include/ovn/actions.h | 1 + > lib/actions.c | 55 +++++++++- > northd/northd.c | 125 +++++++++++++++++++--- > ovn-nb.xml | 17 +++ > tests/ovn-northd.at | 42 ++++---- > tests/ovn.at | 242 ++++++++++++++++++++++++++++++++++++++++-- > utilities/ovn-nbctl.c | 17 ++- > 7 files changed, 450 insertions(+), 49 deletions(-) > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 88cf4de79..a9af1e38e 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -338,6 +338,7 @@ struct ovnact_select { > struct ovnact_select_dst *dsts; > size_t n_dsts; > uint8_t ltable; /* Logical table ID of next table. */ > + char *hash_fields; > struct expr_field res_field; > }; > > diff --git a/lib/actions.c b/lib/actions.c > index 37676ef81..ac5d1dbd5 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > struct ovnact_select_dst *dsts = NULL; > size_t allocated_dsts = 0; > size_t n_dsts = 0; > + bool requires_hash_fields = false; > + char *hash_fields = NULL; > > lexer_get(ctx->lexer); /* Skip "select". */ > lexer_get(ctx->lexer); /* Skip '('. */ > > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (lexer_match_id(ctx->lexer, "values")) { > + lexer_force_match(ctx->lexer, LEX_T_EQUALS); > + requires_hash_fields = true; > + } > + > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { This while statement allows for some odd syntax to be accepted. For instance, the following would be allowed: select(values=1=50, 2=100) hash_fields=ip_dst) One way to avoid this is to change which token to look for based on whether require_hash_fields is true. enum lex_type value_end_token = requires_hash_fields ? LEX_T_SEMICOLON : LEX_T_PAREN; while (lexer_match(ctx->lexer, value_end_token) { ... > struct ovnact_select_dst dst; > if (!action_parse_uint16(ctx, &dst.id, "id")) { > free(dsts); > @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > return; > } > > + if (requires_hash_fields) { > + if (!lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); > + free(dsts); > + return; > + } > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > + ctx->lexer->token.type != LEX_T_STRING || > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > + free(dsts); > + return; > + } > + > + hash_fields = xstrdup(ctx->lexer->token.s); > + lexer_get(ctx->lexer); > + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { > + lexer_get(ctx->lexer); > + } I'm having trouble understanding what this if statement accomplishes. > + } else { > + if (lexer_match_id(ctx->lexer, "hash_fields")) { I don't think there is a specific coding guideline that says you have to do it this way, but it's common to combine the else and if on a single line. This allows the code to be indented one level less. } else if (lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); > + free(dsts); > + return; > + } > + } > + > struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); > select->ltable = ctx->pp->cur_ltable + 1; > select->dsts = dsts; > select->n_dsts = n_dsts; > select->res_field = *res_field; > + select->hash_fields = hash_fields; > } > > static void > @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_cstr(s, " = "); > ds_put_cstr(s, "select"); > ds_put_char(s, '('); > + if (select->hash_fields) { > + ds_put_format(s, "values="); > + } > for (size_t i = 0; i < select->n_dsts; i++) { > if (i) { > ds_put_cstr(s, ", "); > @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_format(s, "=%"PRIu16, dst->weight); > } > ds_put_char(s, ')'); > + if (select->hash_fields) { > + ds_chomp(s, ')'); > + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); > + } > ds_put_char(s, ';'); > } > > @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, > struct ofpact_group *og; > > struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > + ds_put_format(&ds, "type=select,selection_method=%s", > + select->hash_fields ? "hash": "dp_hash"); > + if (select->hash_fields) { > + ds_put_format(&ds, ",fields(%s)", select->hash_fields); > + } > > - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { > + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && > + !select->hash_fields) { > /* Select dp-hash l4_symmetric by setting the upper 32bits of > * selection_method_param to value 1 (1 << 32): */ > ds_put_cstr(&ds, ",selection_method_param=0x100000000"); > @@ -1647,6 +1695,7 @@ static void > ovnact_select_free(struct ovnact_select *select) > { > free(select->dsts); > + free(select->hash_fields); > } > > static void > diff --git a/northd/northd.c b/northd/northd.c > index a8a0b6f94..1adc82ba1 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -282,9 +282,9 @@ static bool vxlan_mode; > * same ip_prefix values: > * - connected route overrides static one; > * - static route overrides src-ip route. */ > -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 > -#define ROUTE_PRIO_OFFSET_STATIC 1 > -#define ROUTE_PRIO_OFFSET_CONNECTED 2 > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 > +#define ROUTE_PRIO_OFFSET_STATIC 2 > +#define ROUTE_PRIO_OFFSET_CONNECTED 4 > > /* Returns the type of the datapath to which a flow with the given 'stage' may > * be added. */ > @@ -10263,6 +10263,7 @@ struct parsed_route { > uint32_t route_table_id; > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > + const char *ecmp_selection_fields; > bool ecmp_symmetric_reply; > bool is_discard_route; > }; > @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", false); > pr->is_discard_route = is_discard_route; > + pr->ecmp_selection_fields = NULL; > + const char *ecmp_selection_fields = smap_get(&route->options, > + "ecmp_selection_fields"); > + if (ecmp_selection_fields) { > + pr->ecmp_selection_fields = ecmp_selection_fields; > + } smap_get() returns NULL if it fails. The six line addition above could be simplified to: pr->ecmp_selection_fields = smap_get(&route->options, "ecmp_selection_fields"); > ovs_list_insert(routes, &pr->list_node); > return pr; > } > @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { > const char *origin; > uint32_t route_table_id; > uint16_t route_count; > + char *selection_fields; > struct ovs_list route_list; /* Contains ecmp_route_list_node */ > }; > > @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, > struct ecmp_route_list_node *er = xmalloc(sizeof *er); > er->route = route; > er->id = ++group->route_count; > + > + if (route->ecmp_selection_fields) { > + if (group->selection_fields) { > + struct sset current_field_set; > + struct sset field_set; > + > + sset_from_delimited_string(¤t_field_set, > + group->selection_fields, ","); > + sset_from_delimited_string(&field_set, > + route->ecmp_selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, &field_set) { > + sset_add(¤t_field_set, field); > + } This is an interesting behavior. If ECMP routes do not all have the same selection fields, then the result for the group is a set union of the selection fields from all ECMP routes in the group. IMO, all routes in an ECMP group should have the same configured selection fields. If they do not, then that is a configuration error. We can't assume that what the user wants is the union of all fields. On another note, the frequent conversion from string to sset back to string seems wasteful. There's not much reason to store the string version of the selection fields on the parsed_route. Instead, you could always store this as an sset. On the ecmp_group_node, it seems like the sset is used more frequently, and the string version is only really needed when adding the logical flow. So on the ecmp_group_node, you could also store just the sset of selection fields, and then in the function where ECMP group flows are added, that is where you could convert to a string. > + > + group->selection_fields = xasprintf("%s", > + sset_join(¤t_field_set, > + ",", "")); This introduces a memory leak. group->selection_fields needs to be freed before it is reassigned. > + > + sset_destroy(&field_set); > + sset_destroy(¤t_field_set); > + } else { > + group->selection_fields = xasprintf("%s", > + route->ecmp_selection_fields); > + } > + } > + > ovs_list_insert(&group->route_list, &er->list_node); > } > > @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, > eg->is_src_route = route->is_src_route; > eg->origin = smap_get_def(&route->route->options, "origin", ""); > eg->route_table_id = route->route_table_id; > + eg->selection_fields = NULL; > ovs_list_init(&eg->route_list); > ecmp_groups_add_route(eg, route); > > @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) > static void > build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > const char *network_s, int plen, bool is_src_route, > - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) > + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, > + bool has_protocol_match) > { > const char *dir; > /* The priority here is calculated to implement longest-prefix-match > @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > dir = "dst"; > } > > + if (has_protocol_match) { > + ofs += 1; > + } > + > *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; > > if (op_inport) { > @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, > static void > build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > const struct hmap *lr_ports, struct ecmp_groups_node *eg, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, const char *protocol) > > { > bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); > @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? > ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; > build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, > - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); > + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, > + protocol != NULL); > free(prefix_s); > > - struct ds actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, > - REG_ECMP_MEMBER_ID); > + if (eg->selection_fields && protocol) { > + if (!strcmp(protocol, "tcp")) { > + ds_put_format(&route_match, " && tcp"); > + } else if (!strcmp(protocol, "udp")) { > + ds_put_format(&route_match, " && udp"); > + } > + } > > + struct ds values = DS_EMPTY_INITIALIZER; > bool is_first = true; > LIST_FOR_EACH (er, list_node, &eg->route_list) { > if (is_first) { > is_first = false; > } else { > - ds_put_cstr(&actions, ", "); > + ds_put_cstr(&values, ", "); > } > - ds_put_format(&actions, "%"PRIu16, er->id); > + ds_put_format(&values, "%"PRIu16, er->id); > + } > + > + struct ds actions = DS_EMPTY_INITIALIZER; > + if (eg->selection_fields) { > + struct sset current_field_set; > + struct sset field_set = SSET_INITIALIZER(&field_set); > + sset_from_delimited_string(¤t_field_set, > + eg->selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, ¤t_field_set) { > + if (!strcmp(field, "src_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_src"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_src"); > + } > + } > + } else if (!strcmp(field, "dst_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_dst"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_dst"); > + } > + } > + } else { > + sset_add(&field_set, field); > + } > + } > + > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > + ds_put_format(&actions, "; hash_fields=\"%s\"", > + sset_join(&field_set, ",", "")); > + > + sset_destroy(¤t_field_set); > + sset_destroy(&field_set); > + } else { > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > } > > ds_put_cstr(&actions, ");"); > @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, > } > } > build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, > - is_ipv4, &match, &priority, ofs); > + is_ipv4, &match, &priority, ofs, false); > > struct ds common_actions = DS_EMPTY_INITIALIZER; > struct ds actions = DS_EMPTY_INITIALIZER; > @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( > HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { > /* add a flow in IP_ROUTING, and one flow for each member in > * IP_ROUTING_ECMP. */ > - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); > + if (group->selection_fields) { > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "tcp"); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "udp"); > + } > } > const struct unique_routes_node *ur; > HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 6376320d3..7576bf7d2 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -3718,6 +3718,23 @@ or > </p> > </column> > > + <column name="options" key="ecmp_selection_fields"> > + <p> > + ECMP routes use OpenFlow groups of type <code>select</code> to > + pick a nexthop among the list of avaible nexthops. s/avaible/available/ > + OVS supports two selection methods: <code>dp_hash</code> and > + <code>hash</code> for hash computatiion and selecting s/computatiion/computation/ > + the buckets of a group. Please see the OVS documentation > + (man ovs-ofctl) for more details on the selection methods. > + </p> > + > + <p> > + OVN by default uses <code>dp_hash</code>. In order to use the > + <code>hash</code> selection method, specify comma-separated > + list of selection fields. > + </p> > + </column> > + > <column name="options" key="origin"> > In case ovn-interconnection has been learned this route, it will have > its origin set: either "connected" or "static". This key is supposed > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 199197f09..3d3046a21 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl > grep -e "(lr_in_ip_routing ).*outport" lr0flows > > AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > diff --git a/tests/ovn.at b/tests/ovn.at > index b31afbfb3..f45ffdfb5 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); > encodes as group:20 > uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > > +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); > + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); > + encodes as group:21 > + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) > + > +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); > + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); > + encodes as group:22 > + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > + > reg0 = select(1=, 2); > Syntax error at `,' expecting weight. > reg0 = select(1=0, 2); > @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); > Syntax error at `123456' expecting weight. > reg0 = select(123); > Syntax error at `;' expecting at least 2 group members. > +reg0 = select(values=1, 2); > + Syntax error at `;' expecting hash_fields. > +reg0 = select(values=1, 2; hash_fields); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(values=1, 2; hash_fields=); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(1, 2; hash_fields="ip_src"); > + Syntax error at `=' hash_fields unexpected. > ip.proto = select(1, 2, 3); > Field ip.proto is not modifiable. > reg0[[0..14]] = select(1, 2, 3); > @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); > > fwd_group(liveness=true, childports="eth0", "lsp1"); > formats as fwd_group(liveness="true", childports="eth0", "lsp1"); > - encodes as group:21 > - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:23 > + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports=eth0); > Syntax error at `eth0' expecting logical switch port. > @@ -2115,8 +2133,8 @@ fwd_group(); > Syntax error at `)' expecting `;'. > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); > Syntax error at `xyzzy' expecting true or false. > @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP static routes - custom hash]) > +ovn_start > + > +# Logical network: > +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) > +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 > +# (192.168.2.22) > +# > +# Static routes on lr1: > +# 10.0.0.0/24 nexthop 192.168.2.21 > +# 10.0.0.0/24 nexthop 192.168.2.22 > +# > +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port > +# > +# Test: > +# lsp11 send packets to 10.0.0.100 with different source ports > +# > +# Expected result: > +# All packets should go out of a either lsp21 or lsp22 > + > +ovn-nbctl lr-add lr1 > + > +ovn-nbctl ls-add ls1 > +ovn-nbctl ls-add ls2 > + > +for i in 1 2; do > + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 > + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ > + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ > + -- lsp-set-addresses lsp-ls${i}-lr1 router > +done > + > +#install static routes > +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 > +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 > + > +# Create logical ports > +ovn-nbctl lsp-add ls1 lsp11 -- \ > + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" > +ovn-nbctl lsp-add ls2 lsp21 -- \ > + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" > +ovn-nbctl lsp-add ls2 lsp22 -- \ > + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" > + > +net_add n1 > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > + > +sim_add hv2 > +as hv2 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.2 > + > +as hv1 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv1-vif2 -- \ > + set interface hv1-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv1/vif2-tx.pcap \ > + options:rxq_pcap=hv1/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv1-vif3 -- \ > + set interface hv1-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv1/vif3-tx.pcap \ > + options:rxq_pcap=hv1/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +ovn-sbctl dump-flows > sbflows > +AT_CAPTURE_FILE([sbflows]) > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv1 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv1 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv1_rcv_n1 $hv1_rcv_n2 > + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) > + > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) > +fi > + > +# Move all VIFs to hv2 and send the same packets again > +as hv1 > +ovs-vsctl del-port hv1-vif1 > +ovs-vsctl del-port hv1-vif2 > +ovs-vsctl del-port hv1-vif3 > + > +wait_column "" Port_Binding chassis logical_port=lsp11 > +wait_column "" Port_Binding chassis logical_port=lsp21 > +wait_column "" Port_Binding chassis logical_port=lsp22 > + > +as hv2 > +ovs-vsctl -- add-port br-int hv2-vif1 -- \ > + set interface hv2-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv2/vif1-tx.pcap \ > + options:rxq_pcap=hv2/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv2-vif2 -- \ > + set interface hv2-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv2/vif2-tx.pcap \ > + options:rxq_pcap=hv2/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv2-vif3 -- \ > + set interface hv2-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv2/vif3-tx.pcap \ > + options:rxq_pcap=hv2/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv2 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv2 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv2_rcv_n1 $hv2_rcv_n2 > + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) > + > +if test $hv2_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) > +fi > + > +# All packets should of the same port on both hosts > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) > +fi > + > +OVN_CLEANUP([hv1], [hv2]) > + > +AT_CLEANUP > +]) > > OVN_FOR_EACH_NORTHD([ > AT_SETUP([route tables -- <main> route table routes]) > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c > index 679d3f2d9..70e8fb239 100644 > --- a/utilities/ovn-nbctl.c > +++ b/utilities/ovn-nbctl.c > @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_route_table(route, route_table); > } > > - if (ecmp_symmetric_reply) { > - const struct smap options = SMAP_CONST1(&options, > - "ecmp_symmetric_reply", > - "true"); > + const char *ecmp_selection_fields = shash_find_data(&ctx->options, > + "--ecmp-selection-fields"); > + if (ecmp_symmetric_reply || ecmp_selection_fields) { > + struct smap options = SMAP_INITIALIZER(&options); > + if (ecmp_symmetric_reply) { > + smap_add(&options, "ecmp_symmetric_reply", "true"); > + } > + if (ecmp_selection_fields) { > + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); > + } > nbrec_logical_router_static_route_set_options(route, &options); > + smap_destroy(&options); > } > > nbrec_logical_router_update_static_routes_addvalue(lr, route); > @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { > { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", > nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, > "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," > - "--route-table=,--bfd?", RW }, > + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, > { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", > nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, > "--if-exists,--policy=,--route-table=", RW },
Hi Mark and team, Could you please review the latest changes on the updated version (v3) of this patch. Thank You, Karthik From: dev <ovs-dev-bounces@openvswitch.org> on behalf of Karthik Chandrashekar <karthik.c@nutanix.com> Date: Tuesday, August 20, 2024 at 12:08 AM To: Mark Michelson <mmichels@redhat.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. Hi Mark, I have addressed the review comments and sent out a v3 version of the patch. I have made the separation of flows based on protocol in the routing table more granular. This will only happen if match on src_port or dst_port is specified. I have updated the documentation on the usage and semantics along with some examples. Thank You, Karthik From: dev <ovs-dev-bounces@openvswitch.org> on behalf of Karthik Chandrashekar <karthik.c@nutanix.com> Date: Thursday, August 15, 2024 at 8:24 AM To: Mark Michelson <mmichels@redhat.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. Hi Mark, Thank you for the feedback. The current implementation of dp_hash in the kernel uses 5-tuple for hash computation. The implementation also has a random seed. As a result when a VM migrates from one host to another, the hash value could change and the same flow could take a different nexthop. The reason for adding the flexibility to switch from dp_hash to hash is to provide a predictable hash across hosts. I have demonstrated this in the following test case – “ECMP static routes - custom hash”. The reason for using L4 port information was to keep it similar to dp_hash implementation. Thank You, Karthik From: Mark Michelson <mmichels@redhat.com> Date: Thursday, August 15, 2024 at 1:08 AM To: Karthik Chandrashekar <karthik.c@nutanix.com>, ovs-dev@openvswitch.org <ovs-dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH ovn v2] Support selection fields for ECMP routes. !-------------------------------------------------------------------| CAUTION: External Email |-------------------------------------------------------------------! Hi Karthik, thanks for the patch. The protocol items introduced in v2 are interesting. It highlights that there are inherent flaws in allowing L4 selection fields to be used in what is intended to be a L3 feature. IMO, there shouldn't be a need to discriminate based on L4 protocol when trying to route packets. The current patch does some things I don't particularly like with regards to L4 protocol: * The number of ECMP flows has tripled as a result of this patch. * The patch adds undocumented "dst_port" and "src_port" selection fields as options. * The patch has no SCTP support. (and if it did, then the number of ECMP flows would quadruple instead of tripling) I would say that since ECMP is a pure L3 feature, L4 selection fields should not be used at all as part of its hash function. As you show in your patch, you have to add protocol prerequisites to the match in case a L4-protocol-specific selection field is added. How to enforce this is difficult. We could add code to northd that specifically checks for the various L4 selection fields and rejects them if they are present (falling back to the default selection method as a result). Or we could document that L4 selection fields should not be used, but let the user shoot themselves in the foot if they absolutely insist on adding any. One thing that comes to mind for this particular feature is: what is the motivation? Why do you want to be able to hash on specific fields instead of using the default hash? Maybe that'll help me to understand why the L4 protocol considerations are here. I have some additional findings inline below. On 8/13/24 05:29, karthik.c@nutanix.com wrote: > From: Karthik Chandrashekar <karthik.c@nutanix.com> > > - This patch adds the ability to specify a custom set of packet headers > for hash computation for ECMP routes similar to the support that was > added for LB in 5af304e7478adcf5ac50ed41e96a55bebebff3e8 > - ECMP routes by default use dp_hash as a selection_method for OVS flows. > When ecmp_selection_fields is specified, the selection_method will be > hash with the specified list of fields used for computing the hash. > - For simplicity, list of fields that are used in the select action > is a union of all the fields specified in each Logical_Route_Static_Route > that is part of a given ECMP route. > - In order to allow match based on L4 port numbers, the lr_in_ip_routing > rules have been split into separate lflows with protocol specific > fields when src_port or dst_port is specified in the ecmp_selectioin_fields. > (This is based on the requirement that pre-requisites of fields > must be provided by any flows that output to the group) > > Signed-off-by: Karthik Chandrashekar <karthik.c@nutanix.com> > --- > v2: > - Install separate logical flows for TCP and UDP in lr_in_ip_routing. > - Add more test coverage. > --- > --- > include/ovn/actions.h | 1 + > lib/actions.c | 55 +++++++++- > northd/northd.c | 125 +++++++++++++++++++--- > ovn-nb.xml | 17 +++ > tests/ovn-northd.at | 42 ++++---- > tests/ovn.at | 242 ++++++++++++++++++++++++++++++++++++++++-- > utilities/ovn-nbctl.c | 17 ++- > 7 files changed, 450 insertions(+), 49 deletions(-) > > diff --git a/include/ovn/actions.h b/include/ovn/actions.h > index 88cf4de79..a9af1e38e 100644 > --- a/include/ovn/actions.h > +++ b/include/ovn/actions.h > @@ -338,6 +338,7 @@ struct ovnact_select { > struct ovnact_select_dst *dsts; > size_t n_dsts; > uint8_t ltable; /* Logical table ID of next table. */ > + char *hash_fields; > struct expr_field res_field; > }; > > diff --git a/lib/actions.c b/lib/actions.c > index 37676ef81..ac5d1dbd5 100644 > --- a/lib/actions.c > +++ b/lib/actions.c > @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > struct ovnact_select_dst *dsts = NULL; > size_t allocated_dsts = 0; > size_t n_dsts = 0; > + bool requires_hash_fields = false; > + char *hash_fields = NULL; > > lexer_get(ctx->lexer); /* Skip "select". */ > lexer_get(ctx->lexer); /* Skip '('. */ > > - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > + if (lexer_match_id(ctx->lexer, "values")) { > + lexer_force_match(ctx->lexer, LEX_T_EQUALS); > + requires_hash_fields = true; > + } > + > + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && > + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { This while statement allows for some odd syntax to be accepted. For instance, the following would be allowed: select(values=1=50, 2=100) hash_fields=ip_dst) One way to avoid this is to change which token to look for based on whether require_hash_fields is true. enum lex_type value_end_token = requires_hash_fields ? LEX_T_SEMICOLON : LEX_T_PAREN; while (lexer_match(ctx->lexer, value_end_token) { ... > struct ovnact_select_dst dst; > if (!action_parse_uint16(ctx, &dst.id, "id")) { > free(dsts); > @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) > return; > } > > + if (requires_hash_fields) { > + if (!lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); > + free(dsts); > + return; > + } > + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || > + ctx->lexer->token.type != LEX_T_STRING || > + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { > + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); > + free(dsts); > + return; > + } > + > + hash_fields = xstrdup(ctx->lexer->token.s); > + lexer_get(ctx->lexer); > + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { > + lexer_get(ctx->lexer); > + } I'm having trouble understanding what this if statement accomplishes. > + } else { > + if (lexer_match_id(ctx->lexer, "hash_fields")) { I don't think there is a specific coding guideline that says you have to do it this way, but it's common to combine the else and if on a single line. This allows the code to be indented one level less. } else if (lexer_match_id(ctx->lexer, "hash_fields")) { > + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); > + free(dsts); > + return; > + } > + } > + > struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); > select->ltable = ctx->pp->cur_ltable + 1; > select->dsts = dsts; > select->n_dsts = n_dsts; > select->res_field = *res_field; > + select->hash_fields = hash_fields; > } > > static void > @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_cstr(s, " = "); > ds_put_cstr(s, "select"); > ds_put_char(s, '('); > + if (select->hash_fields) { > + ds_put_format(s, "values="); > + } > for (size_t i = 0; i < select->n_dsts; i++) { > if (i) { > ds_put_cstr(s, ", "); > @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) > ds_put_format(s, "=%"PRIu16, dst->weight); > } > ds_put_char(s, ')'); > + if (select->hash_fields) { > + ds_chomp(s, ')'); > + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); > + } > ds_put_char(s, ';'); > } > > @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, > struct ofpact_group *og; > > struct ds ds = DS_EMPTY_INITIALIZER; > - ds_put_format(&ds, "type=select,selection_method=dp_hash"); > + ds_put_format(&ds, "type=select,selection_method=%s", > + select->hash_fields ? "hash": "dp_hash"); > + if (select->hash_fields) { > + ds_put_format(&ds, ",fields(%s)", select->hash_fields); > + } > > - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { > + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && > + !select->hash_fields) { > /* Select dp-hash l4_symmetric by setting the upper 32bits of > * selection_method_param to value 1 (1 << 32): */ > ds_put_cstr(&ds, ",selection_method_param=0x100000000"); > @@ -1647,6 +1695,7 @@ static void > ovnact_select_free(struct ovnact_select *select) > { > free(select->dsts); > + free(select->hash_fields); > } > > static void > diff --git a/northd/northd.c b/northd/northd.c > index a8a0b6f94..1adc82ba1 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -282,9 +282,9 @@ static bool vxlan_mode; > * same ip_prefix values: > * - connected route overrides static one; > * - static route overrides src-ip route. */ > -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 > -#define ROUTE_PRIO_OFFSET_STATIC 1 > -#define ROUTE_PRIO_OFFSET_CONNECTED 2 > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 > +#define ROUTE_PRIO_OFFSET_STATIC 2 > +#define ROUTE_PRIO_OFFSET_CONNECTED 4 > > /* Returns the type of the datapath to which a flow with the given 'stage' may > * be added. */ > @@ -10263,6 +10263,7 @@ struct parsed_route { > uint32_t route_table_id; > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > + const char *ecmp_selection_fields; > bool ecmp_symmetric_reply; > bool is_discard_route; > }; > @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, > pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", false); > pr->is_discard_route = is_discard_route; > + pr->ecmp_selection_fields = NULL; > + const char *ecmp_selection_fields = smap_get(&route->options, > + "ecmp_selection_fields"); > + if (ecmp_selection_fields) { > + pr->ecmp_selection_fields = ecmp_selection_fields; > + } smap_get() returns NULL if it fails. The six line addition above could be simplified to: pr->ecmp_selection_fields = smap_get(&route->options, "ecmp_selection_fields"); > ovs_list_insert(routes, &pr->list_node); > return pr; > } > @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { > const char *origin; > uint32_t route_table_id; > uint16_t route_count; > + char *selection_fields; > struct ovs_list route_list; /* Contains ecmp_route_list_node */ > }; > > @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, > struct ecmp_route_list_node *er = xmalloc(sizeof *er); > er->route = route; > er->id = ++group->route_count; > + > + if (route->ecmp_selection_fields) { > + if (group->selection_fields) { > + struct sset current_field_set; > + struct sset field_set; > + > + sset_from_delimited_string(¤t_field_set, > + group->selection_fields, ","); > + sset_from_delimited_string(&field_set, > + route->ecmp_selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, &field_set) { > + sset_add(¤t_field_set, field); > + } This is an interesting behavior. If ECMP routes do not all have the same selection fields, then the result for the group is a set union of the selection fields from all ECMP routes in the group. IMO, all routes in an ECMP group should have the same configured selection fields. If they do not, then that is a configuration error. We can't assume that what the user wants is the union of all fields. On another note, the frequent conversion from string to sset back to string seems wasteful. There's not much reason to store the string version of the selection fields on the parsed_route. Instead, you could always store this as an sset. On the ecmp_group_node, it seems like the sset is used more frequently, and the string version is only really needed when adding the logical flow. So on the ecmp_group_node, you could also store just the sset of selection fields, and then in the function where ECMP group flows are added, that is where you could convert to a string. > + > + group->selection_fields = xasprintf("%s", > + sset_join(¤t_field_set, > + ",", "")); This introduces a memory leak. group->selection_fields needs to be freed before it is reassigned. > + > + sset_destroy(&field_set); > + sset_destroy(¤t_field_set); > + } else { > + group->selection_fields = xasprintf("%s", > + route->ecmp_selection_fields); > + } > + } > + > ovs_list_insert(&group->route_list, &er->list_node); > } > > @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, > eg->is_src_route = route->is_src_route; > eg->origin = smap_get_def(&route->route->options, "origin", ""); > eg->route_table_id = route->route_table_id; > + eg->selection_fields = NULL; > ovs_list_init(&eg->route_list); > ecmp_groups_add_route(eg, route); > > @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) > static void > build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > const char *network_s, int plen, bool is_src_route, > - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) > + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, > + bool has_protocol_match) > { > const char *dir; > /* The priority here is calculated to implement longest-prefix-match > @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, > dir = "dst"; > } > > + if (has_protocol_match) { > + ofs += 1; > + } > + > *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; > > if (op_inport) { > @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, > static void > build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > const struct hmap *lr_ports, struct ecmp_groups_node *eg, > - struct lflow_ref *lflow_ref) > + struct lflow_ref *lflow_ref, const char *protocol) > > { > bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); > @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, > int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? > ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; > build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, > - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); > + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, > + protocol != NULL); > free(prefix_s); > > - struct ds actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, > - REG_ECMP_MEMBER_ID); > + if (eg->selection_fields && protocol) { > + if (!strcmp(protocol, "tcp")) { > + ds_put_format(&route_match, " && tcp"); > + } else if (!strcmp(protocol, "udp")) { > + ds_put_format(&route_match, " && udp"); > + } > + } > > + struct ds values = DS_EMPTY_INITIALIZER; > bool is_first = true; > LIST_FOR_EACH (er, list_node, &eg->route_list) { > if (is_first) { > is_first = false; > } else { > - ds_put_cstr(&actions, ", "); > + ds_put_cstr(&values, ", "); > } > - ds_put_format(&actions, "%"PRIu16, er->id); > + ds_put_format(&values, "%"PRIu16, er->id); > + } > + > + struct ds actions = DS_EMPTY_INITIALIZER; > + if (eg->selection_fields) { > + struct sset current_field_set; > + struct sset field_set = SSET_INITIALIZER(&field_set); > + sset_from_delimited_string(¤t_field_set, > + eg->selection_fields, ","); > + > + const char *field; > + SSET_FOR_EACH (field, ¤t_field_set) { > + if (!strcmp(field, "src_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_src"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_src"); > + } > + } > + } else if (!strcmp(field, "dst_port")) { > + if (protocol) { > + if (!strcmp(protocol, "tcp")) { > + sset_add(&field_set, "tcp_dst"); > + } else if (!strcmp(protocol, "udp")) { > + sset_add(&field_set, "udp_dst"); > + } > + } > + } else { > + sset_add(&field_set, field); > + } > + } > + > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > + ds_put_format(&actions, "; hash_fields=\"%s\"", > + sset_join(&field_set, ",", "")); > + > + sset_destroy(¤t_field_set); > + sset_destroy(&field_set); > + } else { > + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 > + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, > + REG_ECMP_MEMBER_ID, ds_cstr(&values)); > } > > ds_put_cstr(&actions, ");"); > @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, > } > } > build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, > - is_ipv4, &match, &priority, ofs); > + is_ipv4, &match, &priority, ofs, false); > > struct ds common_actions = DS_EMPTY_INITIALIZER; > struct ds actions = DS_EMPTY_INITIALIZER; > @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( > HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { > /* add a flow in IP_ROUTING, and one flow for each member in > * IP_ROUTING_ECMP. */ > - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); > + if (group->selection_fields) { > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "tcp"); > + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, > + "udp"); > + } > } > const struct unique_routes_node *ur; > HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 6376320d3..7576bf7d2 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -3718,6 +3718,23 @@ or > </p> > </column> > > + <column name="options" key="ecmp_selection_fields"> > + <p> > + ECMP routes use OpenFlow groups of type <code>select</code> to > + pick a nexthop among the list of avaible nexthops. s/avaible/available/ > + OVS supports two selection methods: <code>dp_hash</code> and > + <code>hash</code> for hash computatiion and selecting s/computatiion/computation/ > + the buckets of a group. Please see the OVS documentation > + (man ovs-ofctl) for more details on the selection methods. > + </p> > + > + <p> > + OVN by default uses <code>dp_hash</code>. In order to use the > + <code>hash</code> selection method, specify comma-separated > + list of selection fields. > + </p> > + </column> > + > <column name="options" key="origin"> > In case ovn-interconnection has been learned this route, it will have > its origin set: either "connected" or "static". This key is supposed > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 199197f09..3d3046a21 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl > @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) > table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) > table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl > table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) > @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 > ovn-sbctl dump-flows lr0 > lr0flows > > AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public > > ovn-sbctl dump-flows lr0 > lr0flows > AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl > grep -e "(lr_in_ip_routing ).*outport" lr0flows > > AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) > + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) > ]) > > AT_CLEANUP > diff --git a/tests/ovn.at b/tests/ovn.at > index b31afbfb3..f45ffdfb5 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); > encodes as group:20 > uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > > +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); > + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); > + encodes as group:21 > + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) > + > +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); > + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); > + encodes as group:22 > + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) > + > reg0 = select(1=, 2); > Syntax error at `,' expecting weight. > reg0 = select(1=0, 2); > @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); > Syntax error at `123456' expecting weight. > reg0 = select(123); > Syntax error at `;' expecting at least 2 group members. > +reg0 = select(values=1, 2); > + Syntax error at `;' expecting hash_fields. > +reg0 = select(values=1, 2; hash_fields); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(values=1, 2; hash_fields=); > + Syntax error at `)' invalid hash_fields. > +reg0 = select(1, 2; hash_fields="ip_src"); > + Syntax error at `=' hash_fields unexpected. > ip.proto = select(1, 2, 3); > Field ip.proto is not modifiable. > reg0[[0..14]] = select(1, 2, 3); > @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); > > fwd_group(liveness=true, childports="eth0", "lsp1"); > formats as fwd_group(liveness="true", childports="eth0", "lsp1"); > - encodes as group:21 > - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:23 > + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(childports=eth0); > Syntax error at `eth0' expecting logical switch port. > @@ -2115,8 +2133,8 @@ fwd_group(); > Syntax error at `)' expecting `;'. > > fwd_group(childports="eth0", "lsp1"); > - encodes as group:22 > - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > + encodes as group:24 > + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) > > fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); > Syntax error at `xyzzy' expecting true or false. > @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ECMP static routes - custom hash]) > +ovn_start > + > +# Logical network: > +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) > +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 > +# (192.168.2.22) > +# > +# Static routes on lr1: > +# 10.0.0.0/24 nexthop 192.168.2.21 > +# 10.0.0.0/24 nexthop 192.168.2.22 > +# > +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port > +# > +# Test: > +# lsp11 send packets to 10.0.0.100 with different source ports > +# > +# Expected result: > +# All packets should go out of a either lsp21 or lsp22 > + > +ovn-nbctl lr-add lr1 > + > +ovn-nbctl ls-add ls1 > +ovn-nbctl ls-add ls2 > + > +for i in 1 2; do > + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 > + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ > + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ > + -- lsp-set-addresses lsp-ls${i}-lr1 router > +done > + > +#install static routes > +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 > +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 > + > +# Create logical ports > +ovn-nbctl lsp-add ls1 lsp11 -- \ > + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" > +ovn-nbctl lsp-add ls2 lsp21 -- \ > + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" > +ovn-nbctl lsp-add ls2 lsp22 -- \ > + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" > + > +net_add n1 > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > + > +sim_add hv2 > +as hv2 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.2 > + > +as hv1 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv1-vif2 -- \ > + set interface hv1-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv1/vif2-tx.pcap \ > + options:rxq_pcap=hv1/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv1-vif3 -- \ > + set interface hv1-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv1/vif3-tx.pcap \ > + options:rxq_pcap=hv1/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +ovn-sbctl dump-flows > sbflows > +AT_CAPTURE_FILE([sbflows]) > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv1 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv1 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv1_rcv_n1 $hv1_rcv_n2 > + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) > + > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) > +fi > + > +# Move all VIFs to hv2 and send the same packets again > +as hv1 > +ovs-vsctl del-port hv1-vif1 > +ovs-vsctl del-port hv1-vif2 > +ovs-vsctl del-port hv1-vif3 > + > +wait_column "" Port_Binding chassis logical_port=lsp11 > +wait_column "" Port_Binding chassis logical_port=lsp21 > +wait_column "" Port_Binding chassis logical_port=lsp22 > + > +as hv2 > +ovs-vsctl -- add-port br-int hv2-vif1 -- \ > + set interface hv2-vif1 external-ids:iface-id=lsp11 \ > + options:tx_pcap=hv2/vif1-tx.pcap \ > + options:rxq_pcap=hv2/vif1-rx.pcap \ > + ofport-request=1 > + > +ovs-vsctl -- add-port br-int hv2-vif2 -- \ > + set interface hv2-vif2 external-ids:iface-id=lsp21 \ > + options:tx_pcap=hv2/vif2-tx.pcap \ > + options:rxq_pcap=hv2/vif2-rx.pcap \ > + ofport-request=2 > + > +ovs-vsctl -- add-port br-int hv2-vif3 -- \ > + set interface hv2-vif3 external-ids:iface-id=lsp22 \ > + options:tx_pcap=hv2/vif3-tx.pcap \ > + options:rxq_pcap=hv2/vif3-rx.pcap \ > + ofport-request=3 > + > +# wait for earlier changes to take effect > +check ovn-nbctl --wait=hv sync > +wait_for_ports_up > + > +AT_CAPTURE_FILE([ofgroups]) > +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups > + grep "selection_method=hash,fields" ofgroups | \ > + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 > +]) > + > +as hv2 ovs-ofctl dump-groups br-int > ofgroups > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 > +]) > +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 > +]) > + > +as hv2 ovs-ofctl dump-flows br-int > oflows > +AT_CAPTURE_FILE([oflows]) > + > +for i in $(seq 5001 5010); do > + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && > + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) > + > + for j in 1 2; do > + # Assume all packets go to lsp2${j}. > + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && > + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && > + tcp && tcp.src==$i && tcp.dst==80" > + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} > + done > +done > + > +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed > +OVS_WAIT_UNTIL([ > + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` > + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` > + echo $hv2_rcv_n1 $hv2_rcv_n2 > + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) > + > +if test $hv2_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) > +fi > + > +# All packets should of the same port on both hosts > +if test $hv1_rcv_n1 = 0; then > + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) > +else > + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) > +fi > + > +OVN_CLEANUP([hv1], [hv2]) > + > +AT_CLEANUP > +]) > > OVN_FOR_EACH_NORTHD([ > AT_SETUP([route tables -- <main> route table routes]) > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c > index 679d3f2d9..70e8fb239 100644 > --- a/utilities/ovn-nbctl.c > +++ b/utilities/ovn-nbctl.c > @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_route_table(route, route_table); > } > > - if (ecmp_symmetric_reply) { > - const struct smap options = SMAP_CONST1(&options, > - "ecmp_symmetric_reply", > - "true"); > + const char *ecmp_selection_fields = shash_find_data(&ctx->options, > + "--ecmp-selection-fields"); > + if (ecmp_symmetric_reply || ecmp_selection_fields) { > + struct smap options = SMAP_INITIALIZER(&options); > + if (ecmp_symmetric_reply) { > + smap_add(&options, "ecmp_symmetric_reply", "true"); > + } > + if (ecmp_selection_fields) { > + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); > + } > nbrec_logical_router_static_route_set_options(route, &options); > + smap_destroy(&options); > } > > nbrec_logical_router_update_static_routes_addvalue(lr, route); > @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { > { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", > nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, > "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," > - "--route-table=,--bfd?", RW }, > + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, > { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", > nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, > "--if-exists,--policy=,--route-table=", RW },
diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 88cf4de79..a9af1e38e 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -338,6 +338,7 @@ struct ovnact_select { struct ovnact_select_dst *dsts; size_t n_dsts; uint8_t ltable; /* Logical table ID of next table. */ + char *hash_fields; struct expr_field res_field; }; diff --git a/lib/actions.c b/lib/actions.c index 37676ef81..ac5d1dbd5 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -1534,11 +1534,19 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) struct ovnact_select_dst *dsts = NULL; size_t allocated_dsts = 0; size_t n_dsts = 0; + bool requires_hash_fields = false; + char *hash_fields = NULL; lexer_get(ctx->lexer); /* Skip "select". */ lexer_get(ctx->lexer); /* Skip '('. */ - while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { + if (lexer_match_id(ctx->lexer, "values")) { + lexer_force_match(ctx->lexer, LEX_T_EQUALS); + requires_hash_fields = true; + } + + while (!lexer_match(ctx->lexer, LEX_T_SEMICOLON) && + !lexer_match(ctx->lexer, LEX_T_RPAREN)) { struct ovnact_select_dst dst; if (!action_parse_uint16(ctx, &dst.id, "id")) { free(dsts); @@ -1574,11 +1582,39 @@ parse_select_action(struct action_context *ctx, struct expr_field *res_field) return; } + if (requires_hash_fields) { + if (!lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "expecting hash_fields"); + free(dsts); + return; + } + if (!lexer_match(ctx->lexer, LEX_T_EQUALS) || + ctx->lexer->token.type != LEX_T_STRING || + lexer_lookahead(ctx->lexer) != LEX_T_RPAREN) { + lexer_syntax_error(ctx->lexer, "invalid hash_fields"); + free(dsts); + return; + } + + hash_fields = xstrdup(ctx->lexer->token.s); + lexer_get(ctx->lexer); + if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) { + lexer_get(ctx->lexer); + } + } else { + if (lexer_match_id(ctx->lexer, "hash_fields")) { + lexer_syntax_error(ctx->lexer, "hash_fields unexpected"); + free(dsts); + return; + } + } + struct ovnact_select *select = ovnact_put_SELECT(ctx->ovnacts); select->ltable = ctx->pp->cur_ltable + 1; select->dsts = dsts; select->n_dsts = n_dsts; select->res_field = *res_field; + select->hash_fields = hash_fields; } static void @@ -1588,6 +1624,9 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_cstr(s, " = "); ds_put_cstr(s, "select"); ds_put_char(s, '('); + if (select->hash_fields) { + ds_put_format(s, "values="); + } for (size_t i = 0; i < select->n_dsts; i++) { if (i) { ds_put_cstr(s, ", "); @@ -1598,6 +1637,10 @@ format_SELECT(const struct ovnact_select *select, struct ds *s) ds_put_format(s, "=%"PRIu16, dst->weight); } ds_put_char(s, ')'); + if (select->hash_fields) { + ds_chomp(s, ')'); + ds_put_format(s, "; hash_fields=\"%s\")", select->hash_fields); + } ds_put_char(s, ';'); } @@ -1612,9 +1655,14 @@ encode_SELECT(const struct ovnact_select *select, struct ofpact_group *og; struct ds ds = DS_EMPTY_INITIALIZER; - ds_put_format(&ds, "type=select,selection_method=dp_hash"); + ds_put_format(&ds, "type=select,selection_method=%s", + select->hash_fields ? "hash": "dp_hash"); + if (select->hash_fields) { + ds_put_format(&ds, ",fields(%s)", select->hash_fields); + } - if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT)) { + if (ovs_feature_is_supported(OVS_DP_HASH_L4_SYM_SUPPORT) && + !select->hash_fields) { /* Select dp-hash l4_symmetric by setting the upper 32bits of * selection_method_param to value 1 (1 << 32): */ ds_put_cstr(&ds, ",selection_method_param=0x100000000"); @@ -1647,6 +1695,7 @@ static void ovnact_select_free(struct ovnact_select *select) { free(select->dsts); + free(select->hash_fields); } static void diff --git a/northd/northd.c b/northd/northd.c index a8a0b6f94..1adc82ba1 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -282,9 +282,9 @@ static bool vxlan_mode; * same ip_prefix values: * - connected route overrides static one; * - static route overrides src-ip route. */ -#define ROUTE_PRIO_OFFSET_MULTIPLIER 3 -#define ROUTE_PRIO_OFFSET_STATIC 1 -#define ROUTE_PRIO_OFFSET_CONNECTED 2 +#define ROUTE_PRIO_OFFSET_MULTIPLIER 5 +#define ROUTE_PRIO_OFFSET_STATIC 2 +#define ROUTE_PRIO_OFFSET_CONNECTED 4 /* Returns the type of the datapath to which a flow with the given 'stage' may * be added. */ @@ -10263,6 +10263,7 @@ struct parsed_route { uint32_t route_table_id; uint32_t hash; const struct nbrec_logical_router_static_route *route; + const char *ecmp_selection_fields; bool ecmp_symmetric_reply; bool is_discard_route; }; @@ -10373,6 +10374,12 @@ parsed_routes_add(struct ovn_datapath *od, const struct hmap *lr_ports, pr->ecmp_symmetric_reply = smap_get_bool(&route->options, "ecmp_symmetric_reply", false); pr->is_discard_route = is_discard_route; + pr->ecmp_selection_fields = NULL; + const char *ecmp_selection_fields = smap_get(&route->options, + "ecmp_selection_fields"); + if (ecmp_selection_fields) { + pr->ecmp_selection_fields = ecmp_selection_fields; + } ovs_list_insert(routes, &pr->list_node); return pr; } @@ -10402,6 +10409,7 @@ struct ecmp_groups_node { const char *origin; uint32_t route_table_id; uint16_t route_count; + char *selection_fields; struct ovs_list route_list; /* Contains ecmp_route_list_node */ }; @@ -10418,6 +10426,34 @@ ecmp_groups_add_route(struct ecmp_groups_node *group, struct ecmp_route_list_node *er = xmalloc(sizeof *er); er->route = route; er->id = ++group->route_count; + + if (route->ecmp_selection_fields) { + if (group->selection_fields) { + struct sset current_field_set; + struct sset field_set; + + sset_from_delimited_string(¤t_field_set, + group->selection_fields, ","); + sset_from_delimited_string(&field_set, + route->ecmp_selection_fields, ","); + + const char *field; + SSET_FOR_EACH (field, &field_set) { + sset_add(¤t_field_set, field); + } + + group->selection_fields = xasprintf("%s", + sset_join(¤t_field_set, + ",", "")); + + sset_destroy(&field_set); + sset_destroy(¤t_field_set); + } else { + group->selection_fields = xasprintf("%s", + route->ecmp_selection_fields); + } + } + ovs_list_insert(&group->route_list, &er->list_node); } @@ -10440,6 +10476,7 @@ ecmp_groups_add(struct hmap *ecmp_groups, eg->is_src_route = route->is_src_route; eg->origin = smap_get_def(&route->route->options, "origin", ""); eg->route_table_id = route->route_table_id; + eg->selection_fields = NULL; ovs_list_init(&eg->route_list); ecmp_groups_add_route(eg, route); @@ -10542,7 +10579,8 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen) static void build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, const char *network_s, int plen, bool is_src_route, - bool is_ipv4, struct ds *match, uint16_t *priority, int ofs) + bool is_ipv4, struct ds *match, uint16_t *priority, int ofs, + bool has_protocol_match) { const char *dir; /* The priority here is calculated to implement longest-prefix-match @@ -10554,6 +10592,10 @@ build_route_match(const struct ovn_port *op_inport, uint32_t rtb_id, dir = "dst"; } + if (has_protocol_match) { + ofs += 1; + } + *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs; if (op_inport) { @@ -10740,7 +10782,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, static void build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, const struct hmap *lr_ports, struct ecmp_groups_node *eg, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, const char *protocol) { bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); @@ -10752,22 +10794,71 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC; build_route_match(NULL, eg->route_table_id, prefix_s, eg->plen, - eg->is_src_route, is_ipv4, &route_match, &priority, ofs); + eg->is_src_route, is_ipv4, &route_match, &priority, ofs, + protocol != NULL); free(prefix_s); - struct ds actions = DS_EMPTY_INITIALIZER; - ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 - "; %s = select(", REG_ECMP_GROUP_ID, eg->id, - REG_ECMP_MEMBER_ID); + if (eg->selection_fields && protocol) { + if (!strcmp(protocol, "tcp")) { + ds_put_format(&route_match, " && tcp"); + } else if (!strcmp(protocol, "udp")) { + ds_put_format(&route_match, " && udp"); + } + } + struct ds values = DS_EMPTY_INITIALIZER; bool is_first = true; LIST_FOR_EACH (er, list_node, &eg->route_list) { if (is_first) { is_first = false; } else { - ds_put_cstr(&actions, ", "); + ds_put_cstr(&values, ", "); } - ds_put_format(&actions, "%"PRIu16, er->id); + ds_put_format(&values, "%"PRIu16, er->id); + } + + struct ds actions = DS_EMPTY_INITIALIZER; + if (eg->selection_fields) { + struct sset current_field_set; + struct sset field_set = SSET_INITIALIZER(&field_set); + sset_from_delimited_string(¤t_field_set, + eg->selection_fields, ","); + + const char *field; + SSET_FOR_EACH (field, ¤t_field_set) { + if (!strcmp(field, "src_port")) { + if (protocol) { + if (!strcmp(protocol, "tcp")) { + sset_add(&field_set, "tcp_src"); + } else if (!strcmp(protocol, "udp")) { + sset_add(&field_set, "udp_src"); + } + } + } else if (!strcmp(field, "dst_port")) { + if (protocol) { + if (!strcmp(protocol, "tcp")) { + sset_add(&field_set, "tcp_dst"); + } else if (!strcmp(protocol, "udp")) { + sset_add(&field_set, "udp_dst"); + } + } + } else { + sset_add(&field_set, field); + } + } + + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(values=%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); + ds_put_format(&actions, "; hash_fields=\"%s\"", + sset_join(&field_set, ",", "")); + + sset_destroy(¤t_field_set); + sset_destroy(&field_set); + } else { + ds_put_format(&actions, "ip.ttl--; flags.loopback = 1; %s = %"PRIu16 + "; %s = select(%s", REG_ECMP_GROUP_ID, eg->id, + REG_ECMP_MEMBER_ID, ds_cstr(&values)); } ds_put_cstr(&actions, ");"); @@ -10847,7 +10938,7 @@ add_route(struct lflow_table *lflows, struct ovn_datapath *od, } } build_route_match(op_inport, rtb_id, network_s, plen, is_src_route, - is_ipv4, &match, &priority, ofs); + is_ipv4, &match, &priority, ofs, false); struct ds common_actions = DS_EMPTY_INITIALIZER; struct ds actions = DS_EMPTY_INITIALIZER; @@ -12733,7 +12824,13 @@ build_static_route_flows_for_lrouter( HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { /* add a flow in IP_ROUTING, and one flow for each member in * IP_ROUTING_ECMP. */ - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, NULL); + if (group->selection_fields) { + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, + "tcp"); + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, + "udp"); + } } const struct unique_routes_node *ur; HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { diff --git a/ovn-nb.xml b/ovn-nb.xml index 6376320d3..7576bf7d2 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -3718,6 +3718,23 @@ or </p> </column> + <column name="options" key="ecmp_selection_fields"> + <p> + ECMP routes use OpenFlow groups of type <code>select</code> to + pick a nexthop among the list of avaible nexthops. + OVS supports two selection methods: <code>dp_hash</code> and + <code>hash</code> for hash computatiion and selecting + the buckets of a group. Please see the OVS documentation + (man ovs-ofctl) for more details on the selection methods. + </p> + + <p> + OVN by default uses <code>dp_hash</code>. In order to use the + <code>hash</code> selection method, specify comma-separated + list of selection fields. + </p> + </column> + <column name="options" key="origin"> In case ovn-interconnection has been learned this route, it will have its origin set: either "connected" or "static". This key is supposed diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 199197f09..3d3046a21 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -6704,9 +6704,9 @@ ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dnl @@ -6721,9 +6721,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6750,9 +6750,9 @@ AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing ), priority=0 , match=(1), action=(drop;) table=??(lr_in_ip_routing ), priority=10300, match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 192.168.0.1; outport = "lr0-public"; next;) table=??(lr_in_ip_routing ), priority=10550, match=(nd_rs || nd_ra), action=(drop;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 0 && ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lr0-public" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl table=??(lr_in_ip_routing_ecmp), priority=0 , match=(1), action=(drop;) @@ -6768,14 +6768,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10 ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public ovn-sbctl dump-flows lr0 > lr0flows AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 0 && ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;) ]) AT_CLEANUP @@ -7199,16 +7199,16 @@ AT_CHECK([grep "lr_in_ip_routing_pre" lr0flows | ovn_strip_lflows], [0], [dnl grep -e "(lr_in_ip_routing ).*outport" lr0flows AT_CHECK([grep -e "(lr_in_ip_routing ).*outport" lr0flows | ovn_strip_lflows], [0], [dnl - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=1 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=194 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=73 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=74 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) - table=??(lr_in_ip_routing ), priority=97 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=122 , match=(reg7 == 1 && ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.1.10; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.1.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.1.1; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=124 , match=(ip4.dst == 192.168.2.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.2.1; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=162 , match=(reg7 == 2 && ip4.dst == 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=2 , match=(reg7 == 2 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp0" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp1" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:101; eth.src = 00:00:00:00:01:01; outport = "lrp1"; flags.loopback = 1; next;) + table=??(lr_in_ip_routing ), priority=324 , match=(inport == "lrp2" && ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = ip6.dst; xxreg1 = fe80::200:ff:fe00:201; eth.src = 00:00:00:00:02:01; outport = "lrp2"; flags.loopback = 1; next;) ]) AT_CLEANUP diff --git a/tests/ovn.at b/tests/ovn.at index b31afbfb3..f45ffdfb5 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2086,6 +2086,16 @@ reg0 = select(1, 2); encodes as group:20 uses group: id(20), name(type=select,selection_method=dp_hash,bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) +reg9[[16..31]] = select(values=1=50, 2=100, 3; hash_fields="ip_src,ip_dst" ); + formats as reg9[[16..31]] = select(values=1=50, 2=100, 3=100; hash_fields="ip_src,ip_dst"); + encodes as group:21 + uses group: id(21), name(type=select,selection_method=hash,fields(ip_src,ip_dst),bucket=bucket_id=0,weight:50,actions=load:1->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xreg4[[16..31]],resubmit(,oflow_in_table),bucket=bucket_id=2,weight:100,actions=load:3->xreg4[[16..31]],resubmit(,oflow_in_table)) + +reg0 = select(values=1, 2; hash_fields="ip_dst,ip_src"); + formats as reg0 = select(values=1=100, 2=100; hash_fields="ip_dst,ip_src"); + encodes as group:22 + uses group: id(22), name(type=select,selection_method=hash,fields(ip_dst,ip_src),bucket=bucket_id=0,weight:100,actions=load:1->xxreg0[[96..127]],resubmit(,oflow_in_table),bucket=bucket_id=1,weight:100,actions=load:2->xxreg0[[96..127]],resubmit(,oflow_in_table)) + reg0 = select(1=, 2); Syntax error at `,' expecting weight. reg0 = select(1=0, 2); @@ -2094,6 +2104,14 @@ reg0 = select(1=123456, 2); Syntax error at `123456' expecting weight. reg0 = select(123); Syntax error at `;' expecting at least 2 group members. +reg0 = select(values=1, 2); + Syntax error at `;' expecting hash_fields. +reg0 = select(values=1, 2; hash_fields); + Syntax error at `)' invalid hash_fields. +reg0 = select(values=1, 2; hash_fields=); + Syntax error at `)' invalid hash_fields. +reg0 = select(1, 2; hash_fields="ip_src"); + Syntax error at `=' hash_fields unexpected. ip.proto = select(1, 2, 3); Field ip.proto is not modifiable. reg0[[0..14]] = select(1, 2, 3); @@ -2101,12 +2119,12 @@ reg0[[0..14]] = select(1, 2, 3); fwd_group(liveness=true, childports="eth0", "lsp1"); formats as fwd_group(liveness="true", childports="eth0", "lsp1"); - encodes as group:21 - uses group: id(21), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:23 + uses group: id(23), name(type=select,selection_method=dp_hash,bucket=watch_port:5,load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=watch_port:17,load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(childports=eth0); Syntax error at `eth0' expecting logical switch port. @@ -2115,8 +2133,8 @@ fwd_group(); Syntax error at `)' expecting `;'. fwd_group(childports="eth0", "lsp1"); - encodes as group:22 - uses group: id(22), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) + encodes as group:24 + uses group: id(24), name(type=select,selection_method=dp_hash,bucket=load=0x5->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT),bucket=load=0x17->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_SAVE_INPORT)) fwd_group(liveness=xyzzy, childports="eth0", "lsp1"); Syntax error at `xyzzy' expecting true or false. @@ -26531,6 +26549,218 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ECMP static routes - custom hash]) +ovn_start + +# Logical network: +# ls1 (192.168.1.0/24) - lr1 - ls2 (192.168.2.0/24) +# lsl has lsp11 (192.168.1.11) and ls2 has lsp21 (192.168.2.21) and lsp22 +# (192.168.2.22) +# +# Static routes on lr1: +# 10.0.0.0/24 nexthop 192.168.2.21 +# 10.0.0.0/24 nexthop 192.168.2.22 +# +# ECMP hash on ip_proto,src_ip,dst_ip,dst_port +# +# Test: +# lsp11 send packets to 10.0.0.100 with different source ports +# +# Expected result: +# All packets should go out of a either lsp21 or lsp22 + +ovn-nbctl lr-add lr1 + +ovn-nbctl ls-add ls1 +ovn-nbctl ls-add ls2 + +for i in 1 2; do + ovn-nbctl lrp-add lr1 lrp-lr1-ls${i} 00:00:00:01:0${i}:01 192.168.${i}.1/24 + ovn-nbctl lsp-add ls${i} lsp-ls${i}-lr1 -- lsp-set-type lsp-ls${i}-lr1 router \ + -- lsp-set-options lsp-ls${i}-lr1 router-port=lrp-lr1-ls${i} \ + -- lsp-set-addresses lsp-ls${i}-lr1 router +done + +#install static routes +ovn-nbctl --ecmp-selection-fields="ip_proto,ip_src" lr-route-add lr1 10.0.0.0/24 192.168.2.21 +ovn-nbctl --ecmp --ecmp-selection-fields="ip_proto,ip_src,ip_dst,dst_port" lr-route-add lr1 10.0.0.0/24 192.168.2.22 + +# Create logical ports +ovn-nbctl lsp-add ls1 lsp11 -- \ + lsp-set-addresses lsp11 "f0:00:00:00:01:11 192.168.1.11" +ovn-nbctl lsp-add ls2 lsp21 -- \ + lsp-set-addresses lsp21 "f0:00:00:00:02:21 192.168.2.21" +ovn-nbctl lsp-add ls2 lsp22 -- \ + lsp-set-addresses lsp22 "f0:00:00:00:02:22 192.168.2.22" + +net_add n1 +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 + +as hv1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=lsp11 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + +ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=lsp21 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=2 + +ovs-vsctl -- add-port br-int hv1-vif3 -- \ + set interface hv1-vif3 external-ids:iface-id=lsp22 \ + options:tx_pcap=hv1/vif3-tx.pcap \ + options:rxq_pcap=hv1/vif3-rx.pcap \ + ofport-request=3 + +# wait for earlier changes to take effect +check ovn-nbctl --wait=hv sync +wait_for_ports_up + +ovn-sbctl dump-flows > sbflows +AT_CAPTURE_FILE([sbflows]) + +AT_CAPTURE_FILE([ofgroups]) +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-groups br-int > ofgroups + grep "selection_method=hash,fields" ofgroups | \ + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 +]) + +as hv1 ovs-ofctl dump-groups br-int > ofgroups +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 +]) +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 +]) + +as hv1 ovs-ofctl dump-flows br-int > oflows +AT_CAPTURE_FILE([oflows]) + +for i in $(seq 5001 5010); do + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + OVS_WAIT_UNTIL([as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet"]) + + for j in 1 2; do + # Assume all packets go to lsp2${j}. + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} + done +done + +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed +OVS_WAIT_UNTIL([ + hv1_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` + hv1_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` + echo $hv1_rcv_n1 $hv1_rcv_n2 + test $(($hv1_rcv_n1 + $hv1_rcv_n2)) -ge 10]) + +if test $hv1_rcv_n1 = 0; then + AT_CHECK([test $hv1_rcv_n2 -ge 10], [0], []) +else + AT_CHECK([test $hv1_rcv_n1 -ge 10], [0], []) +fi + +# Move all VIFs to hv2 and send the same packets again +as hv1 +ovs-vsctl del-port hv1-vif1 +ovs-vsctl del-port hv1-vif2 +ovs-vsctl del-port hv1-vif3 + +wait_column "" Port_Binding chassis logical_port=lsp11 +wait_column "" Port_Binding chassis logical_port=lsp21 +wait_column "" Port_Binding chassis logical_port=lsp22 + +as hv2 +ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=lsp11 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +ovs-vsctl -- add-port br-int hv2-vif2 -- \ + set interface hv2-vif2 external-ids:iface-id=lsp21 \ + options:tx_pcap=hv2/vif2-tx.pcap \ + options:rxq_pcap=hv2/vif2-rx.pcap \ + ofport-request=2 + +ovs-vsctl -- add-port br-int hv2-vif3 -- \ + set interface hv2-vif3 external-ids:iface-id=lsp22 \ + options:tx_pcap=hv2/vif3-tx.pcap \ + options:rxq_pcap=hv2/vif3-rx.pcap \ + ofport-request=3 + +# wait for earlier changes to take effect +check ovn-nbctl --wait=hv sync +wait_for_ports_up + +AT_CAPTURE_FILE([ofgroups]) +OVS_WAIT_FOR_OUTPUT([as hv2 ovs-ofctl dump-groups br-int > ofgroups + grep "selection_method=hash,fields" ofgroups | \ + grep "nw_proto" | grep "ip_src" | grep "ip_dst" | wc -l], [0], [3 +]) + +as hv2 ovs-ofctl dump-groups br-int > ofgroups +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "tcp_dst" | wc -l], [0], [1 +]) +AT_CHECK([grep "nw_proto" ofgroups | grep "ip_src" | grep "ip_dst" | grep "udp_dst" | wc -l], [0], [1 +]) + +as hv2 ovs-ofctl dump-flows br-int > oflows +AT_CAPTURE_FILE([oflows]) + +for i in $(seq 5001 5010); do + packet="inport==\"lsp11\" && eth.src==f0:00:00:00:01:11 && eth.dst==00:00:00:01:01:01 && + ip4 && ip.ttl==64 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + OVS_WAIT_UNTIL([as hv2 ovs-appctl -t ovn-controller inject-pkt "$packet"]) + + for j in 1 2; do + # Assume all packets go to lsp2${j}. + exp_packet="eth.src==00:00:00:01:02:01 && eth.dst==f0:00:00:00:02:2${j} && + ip4 && ip.ttl==63 && ip4.src==192.168.1.11 && ip4.dst==10.0.0.100 && + tcp && tcp.src==$i && tcp.dst==80" + echo $exp_packet | ovstest test-ovn expr-to-packets >> expected_lsp2${j} + done +done + +# All packets should go out of a single port given the hashing is based on ip_proto,ip_src,ip_dst,dst_port which is fixed +OVS_WAIT_UNTIL([ + hv2_rcv_n1=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif2-tx.pcap > lsp21.packets && cat lsp21.packets | wc -l` + hv2_rcv_n2=`$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/vif3-tx.pcap > lsp22.packets && cat lsp22.packets | wc -l` + echo $hv2_rcv_n1 $hv2_rcv_n2 + test $(($hv2_rcv_n1 + $hv2_rcv_n2)) -ge 10]) + +if test $hv2_rcv_n1 = 0; then + AT_CHECK([test $hv2_rcv_n2 -ge 10], [0], []) +else + AT_CHECK([test $hv2_rcv_n1 -ge 10], [0], []) +fi + +# All packets should of the same port on both hosts +if test $hv1_rcv_n1 = 0; then + AT_CHECK([test $hv2_rcv_n1 -eq 0], [0], []) +else + AT_CHECK([test $hv2_rcv_n2 -eq 0], [0], []) +fi + +OVN_CLEANUP([hv1], [hv2]) + +AT_CLEANUP +]) OVN_FOR_EACH_NORTHD([ AT_SETUP([route tables -- <main> route table routes]) diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c index 679d3f2d9..70e8fb239 100644 --- a/utilities/ovn-nbctl.c +++ b/utilities/ovn-nbctl.c @@ -4720,11 +4720,18 @@ nbctl_lr_route_add(struct ctl_context *ctx) nbrec_logical_router_static_route_set_route_table(route, route_table); } - if (ecmp_symmetric_reply) { - const struct smap options = SMAP_CONST1(&options, - "ecmp_symmetric_reply", - "true"); + const char *ecmp_selection_fields = shash_find_data(&ctx->options, + "--ecmp-selection-fields"); + if (ecmp_symmetric_reply || ecmp_selection_fields) { + struct smap options = SMAP_INITIALIZER(&options); + if (ecmp_symmetric_reply) { + smap_add(&options, "ecmp_symmetric_reply", "true"); + } + if (ecmp_selection_fields) { + smap_add(&options, "ecmp_selection_fields", ecmp_selection_fields); + } nbrec_logical_router_static_route_set_options(route, &options); + smap_destroy(&options); } nbrec_logical_router_update_static_routes_addvalue(lr, route); @@ -8057,7 +8064,7 @@ static const struct ctl_command_syntax nbctl_commands[] = { { "lr-route-add", 3, 4, "ROUTER PREFIX NEXTHOP [PORT]", nbctl_pre_lr_route_add, nbctl_lr_route_add, NULL, "--may-exist,--ecmp,--ecmp-symmetric-reply,--policy=," - "--route-table=,--bfd?", RW }, + "--route-table=,--bfd?,--ecmp-selection-fields=", RW }, { "lr-route-del", 1, 4, "ROUTER [PREFIX [NEXTHOP [PORT]]]", nbctl_pre_lr_route_del, nbctl_lr_route_del, NULL, "--if-exists,--policy=,--route-table=", RW },