@@ -1598,6 +1598,9 @@ icmp4 {
port (ingress table <code>ARP Request</code> will generate an ARP
request, if needed, with <code>reg0</code> as the target protocol
address and <code>reg1</code> as the source protocol address).
+ A IP route can be configured that it has multipath to next-hop.
+ If a packet has multipath to destination, OVN assign the port
+ index into reg[0] to indicate the packet's output port in table 6.
</p>
<p>
@@ -1617,6 +1620,28 @@ icmp4 {
<li>
<p>
+ IPv4/IPV6 multipath routing table. For each route to IPv4/IPv6
+ network <var>N</var> with netmask <var>M</var>, on multipath port
+ <var>P</var> with IP address <var>A</var> and Ethernet
+ address <var>E</var>, a logical flow with match
+ <code>ip4.dst ==<var>N</var>/<var>M</var></code>,whose priority
+ is the number of 1-bits plus 10 in <var>M</var>,
+ has the following actions:
+ </p>
+
+ <pre>
+ip.ttl--;
+multipath (nw_dst, 0, modulo_n, <var>n_links</var>, 0, reg0);
+reg9[2] = 1
+next;
+ </pre>
+ <p>
+ <var>n_links</var> is the number of multipath port.
+ </p>
+ </li>
+
+ <li>
+ <p>
IPv4 routing table. For each route to IPv4 network <var>N</var> with
netmask <var>M</var>, on router port <var>P</var> with IP address
<var>A</var> and Ethernet
@@ -1686,7 +1711,43 @@ next;
</li>
</ul>
- <h3>Ingress Table 6: ARP/ND Resolution</h3>
+ <h3>Ingress Table 6: Multipath</h3>
+ <p>
+ Any packet taht reaches this table is an IP packet and reg9[2]=1
+ using the following flows to route to corresponding port. This table
+ implement dispatching by consuming reg0.
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ A packet with netmask <var>M</var>, IP address <var>A</var> and
+ <code>reg9[2] = 1</code>, whose priority above 1 has following
+ actions:
+ </p>
+
+ <pre>
+reg0 = <var>G</var>;
+reg1 = <var>A</var>;
+eth.src = <var>E</var>;
+outport = <var>P</var>;
+flags.loopback = 1;
+next;
+ </pre>
+
+ <p>
+ <var>G</var> is the gateway IP address. <var>A</var>, <var>E</var>
+ and <var>P</var> are the values that were described in multipath
+ routeing in table 5
+ </p>
+
+ <p>
+ A priority-0 logical flow with match has actions <code>next;</code>.
+ </p>
+ </li>
+ </ul>
+
+ <h3>Ingress Table 7: ARP/ND Resolution</h3>
<p>
Any packet that reaches this table is an IP packet whose next-hop
@@ -1779,7 +1840,7 @@ next;
</li>
</ul>
- <h3>Ingress Table 7: Gateway Redirect</h3>
+ <h3>Ingress Table 8: Gateway Redirect</h3>
<p>
For distributed logical routers where one of the logical router
@@ -1836,7 +1897,7 @@ next;
</li>
</ul>
- <h3>Ingress Table 8: ARP Request</h3>
+ <h3>Ingress Table 9: ARP Request</h3>
<p>
In the common case where the Ethernet destination has been resolved, this
@@ -135,9 +135,10 @@ enum ovn_stage {
PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
- PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 7, "lr_in_gw_redirect") \
- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 8, "lr_in_arp_request") \
+ PIPELINE_STAGE(ROUTER, IN, MULTIPATH, 6, "lr_in_multipath") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 7, "lr_in_arp_resolve") \
+ PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 8, "lr_in_gw_redirect") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 9, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
@@ -173,6 +174,11 @@ enum ovn_stage {
* one of the logical router's own IP addresses. */
#define REGBIT_EGRESS_LOOPBACK "reg9[1]"
+/* Indicate multipath action has process this packet and store hash result
+ * into other regX. Should consume the hash result to determin the right
+ * output port. */
+#define REGBIT_MULTIPATH "reg9[2]"
+
/* Returns an "enum ovn_stage" built from the arguments. */
static enum ovn_stage
ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
@@ -4142,72 +4148,165 @@ add_route(struct hmap *lflows, const struct ovn_port *op,
}
static void
-build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
- struct hmap *ports,
- const struct nbrec_logical_router_static_route *route)
+add_multipath_route(struct hmap *lflows, uint32_t port_num,
+ struct ovn_port **out_ports,
+ const char **lrp_addr_s,
+ struct ovn_datapath *od,
+ const char *network_s, int plen,
+ const char *gateway, const char *policy)
+{
+ bool is_ipv4 = strchr(network_s, '.') ? true : false;
+ struct ds match = DS_EMPTY_INITIALIZER;
+ const char *dir;
+ uint16_t priority;
+
+ if (policy && !strcmp(policy, "src-ip")) {
+ dir = "src";
+ priority = plen * 2;
+ } else {
+ dir = "dst";
+ priority = (plen * 2) + 1;
+ }
+
+ /* Set higer priority than regular route. */
+ priority += 10;
+
+ ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
+ network_s, plen);
+
+ struct ds actions = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&actions, "ip.ttl--; ");
+ ds_put_format(&actions,
+ "multipath (nw_dst, 0, modulo_n, %u, 0, reg0); "
+ "%s = 1; "
+ "next;",
+ port_num, REGBIT_MULTIPATH);
+
+ /* The priority here is calculated to implement longest-prefix-match
+ * routing. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, priority,
+ ds_cstr(&match), ds_cstr(&actions));
+
+ for (int i = 0; i < port_num; i++) {
+ struct ds mp_match = DS_EMPTY_INITIALIZER;
+ struct ds mp_actions = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&mp_match, "%s == 1 && reg0 == %d && ",
+ REGBIT_MULTIPATH, i);
+ ds_put_format(&mp_match, "ip%s.%s == %s/%d",
+ is_ipv4 ? "4" : "6", dir,
+ network_s, plen);
+
+ ds_put_format(&mp_actions, "%sreg0 = ", is_ipv4 ? "" : "xx");
+ if (gateway) {
+ ds_put_cstr(&mp_actions, gateway);
+ } else {
+ ds_put_format(&mp_actions, "ip%s.dst", is_ipv4 ? "4" : "6");
+ }
+
+ ds_put_format(&mp_actions, "; "
+ "%sreg1 = %s; "
+ "eth.src = %s; "
+ "outport = %s; "
+ "flags.loopback = 1; "
+ "next;",
+ is_ipv4 ? "" : "xx",
+ lrp_addr_s[i],
+ out_ports[i]->lrp_networks.ea_s,
+ out_ports[i]->json_key);
+
+ /* Add flow in table 6 to determin the right output port
+ * for this traffic. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_MULTIPATH, priority,
+ ds_cstr(&mp_match), ds_cstr(&mp_actions));
+ ds_destroy(&mp_match);
+ ds_destroy(&mp_actions);
+ }
+ ds_destroy(&match);
+ ds_destroy(&actions);
+}
+
+static bool
+verify_nexthop_prefix(const struct nbrec_logical_router_static_route *route,
+ bool *is_ipv4, char **prefix_s, unsigned int *plen)
{
ovs_be32 nexthop;
- const char *lrp_addr_s = NULL;
- unsigned int plen;
- bool is_ipv4;
/* Verify that the next hop is an IP address with an all-ones mask. */
- char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
+ char *error = ip_parse_cidr(route->nexthop, &nexthop, plen);
if (!error) {
- if (plen != 32) {
+ if (*plen != 32) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
- return;
+ return false;
}
- is_ipv4 = true;
+ *is_ipv4 = true;
} else {
free(error);
struct in6_addr ip6;
- error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
+ error = ipv6_parse_cidr(route->nexthop, &ip6, plen);
if (!error) {
- if (plen != 128) {
+ if (*plen != 128) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
- return;
+ return false;
}
- is_ipv4 = false;
+ *is_ipv4 = false;
} else {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
free(error);
- return;
+ return false;
}
}
- char *prefix_s;
- if (is_ipv4) {
+ if (*is_ipv4) {
ovs_be32 prefix;
/* Verify that ip prefix is a valid IPv4 address. */
- error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
+ error = ip_parse_cidr(route->ip_prefix, &prefix, plen);
if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
route->ip_prefix);
free(error);
- return;
+ return false;
}
- prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
+ *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix
+ & be32_prefix_mask(*plen)));
} else {
/* Verify that ip prefix is a valid IPv6 address. */
struct in6_addr prefix;
- error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
+ error = ipv6_parse_cidr(route->ip_prefix, &prefix, plen);
if (error) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
route->ip_prefix);
free(error);
- return;
+ return false;
}
- struct in6_addr mask = ipv6_create_mask(plen);
+ struct in6_addr mask = ipv6_create_mask(*plen);
struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
- prefix_s = xmalloc(INET6_ADDRSTRLEN);
- inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
+ *prefix_s = xmalloc(INET6_ADDRSTRLEN);
+ inet_ntop(AF_INET6, &network, *prefix_s, INET6_ADDRSTRLEN);
+ }
+
+ return true;
+}
+
+static void
+build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
+ struct hmap *ports,
+ const struct nbrec_logical_router_static_route *route)
+{
+ const char *lrp_addr_s = NULL;
+ unsigned int plen;
+ bool is_ipv4;
+ char *prefix_s = NULL;
+
+ if (!verify_nexthop_prefix(route, &is_ipv4, &prefix_s, &plen)) {
+ return;
}
/* Find the outgoing port. */
@@ -4270,7 +4369,75 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
policy);
free_prefix_s:
- free(prefix_s);
+ if (prefix_s) {
+ free(prefix_s);
+ }
+}
+
+static void
+build_multipath_flow(struct hmap *lflows, struct ovn_datapath *od,
+ struct hmap *ports,
+ const struct nbrec_logical_router_static_route *route)
+{
+ unsigned int plen;
+ bool is_ipv4;
+ char *prefix_s = NULL;
+
+ if (!verify_nexthop_prefix(route, &is_ipv4, &prefix_s, &plen)) {
+ return;
+ }
+
+ /* Find the outgoing port. */
+ struct ovn_port **out_ports = xmalloc(route->n_multipath_port *
+ sizeof(struct ovn_port *));
+ const char **lrp_addr_s = xmalloc(route->n_multipath_port *
+ sizeof(const char *));
+ for (int i = 0; i < route->n_multipath_port; i++) {
+ // TODO May need to consider some ports are not found?
+ out_ports[i] = ovn_port_find(ports, route->multipath_port[i]);
+ if (!out_ports[i]) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
+ route->multipath_port[i], route->ip_prefix);
+ goto free_ports_lrp_addr;
+ }
+
+ lrp_addr_s[i] = find_lrp_member_ip(out_ports[i], route->nexthop);
+ if (!lrp_addr_s[i]) {
+ if (is_ipv4) {
+ if (out_ports[i]->lrp_networks.n_ipv4_addrs) {
+ lrp_addr_s[i] = out_ports[i]->
+ lrp_networks.ipv4_addrs[0].addr_s;
+ }
+ } else {
+ if (out_ports[i]->lrp_networks.n_ipv6_addrs) {
+ lrp_addr_s[i] = out_ports[i]->
+ lrp_networks.ipv6_addrs[0].addr_s;
+ }
+ }
+ }
+ if (!lrp_addr_s[i]) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl,
+ "%s has no path for static route %s; next hop %s",
+ route->multipath_port[i], route->ip_prefix,
+ route->nexthop);
+ goto free_ports_lrp_addr;
+ }
+ }
+
+
+ char *policy = route->policy ? route->policy : "dst-ip";
+ add_multipath_route(lflows, route->n_multipath_port,
+ out_ports, lrp_addr_s, od,
+ prefix_s, plen, route->nexthop, policy);
+
+free_ports_lrp_addr:
+ free(out_ports);
+ free(lrp_addr_s);
+ if (prefix_s) {
+ free(prefix_s);
+ }
}
static void
@@ -5344,7 +5511,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
}
}
- /* Convert the static routes to flows. */
+ /* Convert the static routes and multipath route to flows. */
HMAP_FOR_EACH (od, key_node, datapaths) {
if (!od->nbr) {
continue;
@@ -5355,12 +5522,24 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
route = od->nbr->static_routes[i];
build_static_route_flow(lflows, od, ports, route);
+ /* Logical router ingress table 5-6: Multipath Routing.
+ *
+ * If router has configured a traffic has multiple paths
+ * to destination. The right output port should be firgured
+ * out by computing IP packet's header */
+ if (route->n_multipath_port > 1) {
+ /* Generate multipath routes in table 5,6 for
+ * dedicated traffic */
+ build_multipath_flow(lflows, od, ports, route);
+ }
}
+ /* Packets are allowed by default in table 6. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_MULTIPATH, 0, "1", "next;");
}
/* XXX destination unreachable */
- /* Local router ingress table 6: ARP Resolution.
+ /* Local router ingress table 7: ARP Resolution.
*
* Any packet that reaches this table is an IP packet whose next-hop IP
* address is in reg0. (ip4.dst is the final destination.) This table
@@ -5555,7 +5734,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
"get_nd(outport, xxreg0); next;");
}
- /* Logical router ingress table 7: Gateway redirect.
+ /* Logical router ingress table 8: Gateway redirect.
*
* For traffic with outport equal to the l3dgw_port
* on a distributed router, this table redirects a subset
@@ -5595,7 +5774,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
}
- /* Local router ingress table 8: ARP request.
+ /* Local router ingress table 9: ARP request.
*
* In the common case where the Ethernet destination has been resolved,
* this table outputs the packet (priority 0). Otherwise, it composes
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
"version": "5.8.0",
- "cksum": "2812300190 16766",
+ "cksum": "1967092589 16903",
"tables": {
"NB_Global": {
"columns": {
@@ -235,7 +235,9 @@
"dst-ip"]]},
"min": 0, "max": 1}},
"nexthop": {"type": "string"},
- "output_port": {"type": {"key": "string", "min": 0, "max": 1}}},
+ "output_port": {"type": {"key": "string", "min": 0, "max": 1}},
+ "multipath_port": {"type": {"key": "string", "min": 0,
+ "max": "unlimited"}}},
"isRoot": false},
"NAT": {
"columns": {
@@ -1487,6 +1487,15 @@
address as the one via which the <ref column="nexthop"/> is reachable.
</p>
</column>
+ <column name="multipath_port">
+ <p>
+ The name of the <ref table="Logical_Router_Port"/> via which the packet
+ needs to be sent out. When it contains more than two ports, it means
+ packet has multiple candidate output ports. OVN uses the packet header
+ to determin which port the packet would be delivered to.
+ Currently, OVN consumes destination IP address to figure out port.
+ </p>
+ </column>
</table>
<table name="NAT" title="NAT rules">
1. ovn-nb.ovsschema was updated to add new field multipath_port. 2. Add multipath feature in ovn-northd part. northd generates multipath flows to dispatch traffic by using packet's IP dst address if user set Logical_Router_Static_Route's multipath_port with ports. 3. Add new table(lr_in_multipath) in ovn-northd's router ingress stages to dispatch traffic to ports. 4. Add multipath flow in Table 5(lr_in_ip_routing) and store hash result into reg0. reg9[2] was used to indicate packet which need dispatching. 5. Add multipath feature description in ovn/northd/ovn-northd.8.xml and ovn/ovn-nb.xml Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> --- ovn/northd/ovn-northd.8.xml | 67 +++++++++++- ovn/northd/ovn-northd.c | 245 ++++++++++++++++++++++++++++++++++++++------ ovn/ovn-nb.ovsschema | 6 +- ovn/ovn-nb.xml | 9 ++ 4 files changed, 289 insertions(+), 38 deletions(-)