@@ -284,7 +284,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 27);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -354,6 +354,7 @@ enum ovs_key_attr {
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking labels */
OVS_KEY_ATTR_PACKET_ETHERTYPE, /* be16 Ethernet type for packet
* execution. */
+ OVS_KEY_ATTR_NEXT_BASE_LAYER, /* base layer of encapsulated packet */
#ifdef __KERNEL__
/* Only used within kernel data path. */
@@ -23,7 +23,7 @@
/* This sequence number should be incremented whenever anything involving flows
* or the wildcarding of flows changes. This will cause build assertion
* failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 36
+#define FLOW_WC_SEQ 37
/* Number of Open vSwitch extension 32-bit registers. */
#define FLOW_N_REGS 8
@@ -132,6 +132,10 @@ struct flow {
ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */
ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address.
* Keep last for BUILD_ASSERT_DECL below. */
+
+ uint8_t next_base_layer; /* Fields of encapsulated packet, if any,
+ * start at this layer */
+ uint8_t pad4[7];
};
BUILD_ASSERT_DECL(sizeof(struct flow) % sizeof(uint64_t) == 0);
BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0);
@@ -141,7 +145,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0);
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
== sizeof(struct flow_tnl) + 216
- && FLOW_WC_SEQ == 36);
+ && FLOW_WC_SEQ == 37);
/* Incremental points at which flow classification may be performed in
* segments.
@@ -124,7 +124,7 @@ struct mf_ctx {
* away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
* defined as macros. */
-#if (FLOW_WC_SEQ != 36)
+#if (FLOW_WC_SEQ != 37)
#define MINIFLOW_ASSERT(X) ovs_assert(X)
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
"assertions enabled. Consider updating FLOW_WC_SEQ after "
@@ -821,6 +821,20 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code));
miniflow_pad_to_64(mf, tp_dst);
}
+ } else if (OVS_LIKELY(nw_proto == IPPROTO_GRE)) {
+ if (OVS_LIKELY(size >= sizeof(struct gre_base_hdr))) {
+ const struct gre_base_hdr *gre = data_pull(&data, &size,
+ sizeof *gre);
+ if (gre->protocol == htons(ETH_TYPE_TEB)) {
+ /* No need to store a zero value for next_base_layer
+ * in the miniflow which would cost an extra word of
+ * storage. */
+ BUILD_ASSERT(LAYER_2 == 0);
+ } else {
+ miniflow_push_uint8(mf, next_base_layer, LAYER_3);
+ miniflow_pad_to_64(mf, next_base_layer);
+ }
+ }
}
}
out:
@@ -859,7 +873,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
{
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
match_init_catchall(flow_metadata);
if (flow->tunnel.tun_id != htonll(0)) {
@@ -1269,7 +1283,7 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc,
memset(&wc->masks, 0x0, sizeof wc->masks);
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
if (flow_tnl_dst_is_set(&flow->tunnel)) {
if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
@@ -1389,7 +1403,7 @@ void
flow_wc_map(const struct flow *flow, struct flowmap *map)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
flowmap_init(map);
@@ -1437,6 +1451,8 @@ flow_wc_map(const struct flow *flow, struct flowmap *map)
if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_IGMP)) {
FLOWMAP_SET(map, igmp_group_ip4);
+ } else if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_GRE)) {
+ FLOWMAP_SET(map, next_base_layer);
} else {
FLOWMAP_SET(map, tcp_flags);
}
@@ -1455,6 +1471,8 @@ flow_wc_map(const struct flow *flow, struct flowmap *map)
FLOWMAP_SET(map, nd_target);
FLOWMAP_SET(map, arp_sha);
FLOWMAP_SET(map, arp_tha);
+ } else if (OVS_UNLIKELY(flow->nw_proto == IPPROTO_GRE)) {
+ FLOWMAP_SET(map, next_base_layer);
} else {
FLOWMAP_SET(map, tcp_flags);
}
@@ -1476,7 +1494,7 @@ void
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
@@ -2104,7 +2122,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label));
/* Clear all L3 and L4 fields and dp_hash. */
- BUILD_ASSERT(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT(FLOW_WC_SEQ == 37);
memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
flow->dp_hash = 0;
@@ -1067,7 +1067,7 @@ match_format(const struct match *match, struct ds *s, int priority)
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
if (priority != OFP_DEFAULT_PRIORITY) {
ds_put_format(s, "%spriority=%s%d,",
@@ -1343,6 +1343,10 @@ match_format(const struct match *match, struct ds *s, int priority)
TCP_FLAGS(OVS_BE16_MAX));
}
+ if (wc->masks.next_base_layer) {
+ ds_put_format(s, "next_base_layer=%"PRIu8",", f->next_base_layer);
+ }
+
if (s->length > start_len) {
ds_chomp(s, ',');
}
@@ -5484,7 +5484,8 @@ get_etheraddr(const char *netdev_name, struct eth_addr *ea)
return error;
}
hwaddr_family = ifr.ifr_hwaddr.sa_family;
- if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) {
+ if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER &&
+ hwaddr_family != ARPHRD_IPGRE) {
VLOG_INFO("%s device has unknown hardware address family %d",
netdev_name, hwaddr_family);
return EINVAL;
@@ -144,9 +144,13 @@ netdev_vport_is_patch(const struct netdev *netdev)
bool
netdev_vport_is_layer3(const struct netdev *dev)
{
- const char *type = netdev_get_type(dev);
+ if (is_vport_class(netdev_get_class(dev))) {
+ struct netdev_vport *vport = netdev_vport_cast(dev);
+
+ return vport->tnl_cfg.is_layer3;
+ }
- return (!strcmp("lisp", type));
+ return false;
}
static bool
@@ -459,13 +463,14 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
struct netdev_vport *dev = netdev_vport_cast(dev_);
const char *name = netdev_get_name(dev_);
const char *type = netdev_get_type(dev_);
- bool ipsec_mech_set, needs_dst_port, has_csum;
+ bool ipsec_mech_set, needs_dst_port, has_csum, optional_layer3;
uint16_t dst_proto = 0, src_proto = 0;
struct netdev_tunnel_config tnl_cfg;
struct smap_node *node;
has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
strstr(type, "stt") || strstr(type, "vxlan");
+ optional_layer3 = !strcmp(type, "gre");
ipsec_mech_set = false;
memset(&tnl_cfg, 0, sizeof tnl_cfg);
@@ -480,6 +485,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
if (!strcmp(type, "lisp")) {
tnl_cfg.dst_port = htons(LISP_DST_PORT);
+ tnl_cfg.is_layer3 = true;
}
if (!strcmp(type, "stt")) {
@@ -591,6 +597,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
}
free(str);
+ } else if (!strcmp(node->key, "layer3") && optional_layer3) {
+ if (!strcmp(node->value, "true")) {
+ tnl_cfg.is_layer3 = true;
+ }
} else {
VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
}
@@ -671,6 +681,7 @@ static int
get_tunnel_config(const struct netdev *dev, struct smap *args)
{
struct netdev_vport *netdev = netdev_vport_cast(dev);
+ const char *type = netdev_get_type(dev);
struct netdev_tunnel_config tnl_cfg;
ovs_mutex_lock(&netdev->mutex);
@@ -724,7 +735,6 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
if (tnl_cfg.dst_port) {
uint16_t dst_port = ntohs(tnl_cfg.dst_port);
- const char *type = netdev_get_type(dev);
if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
(!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
@@ -738,6 +748,10 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
smap_add(args, "csum", "true");
}
+ if (tnl_cfg.is_layer3 && !strcmp("gre", type)) {
+ smap_add(args, "layer3", "true");
+ }
+
if (!tnl_cfg.dont_fragment) {
smap_add(args, "df_default", "false");
}
@@ -944,12 +958,17 @@ ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return l4;
}
+static ovs_be16
+header_eth_type(const void *header)
+{
+ const struct eth_header *eth = header;
+ return eth->eth_type;
+}
+
static bool
is_header_ipv6(const void *header)
{
- const struct eth_header *eth;
- eth = header;
- return eth->eth_type == htons(ETH_TYPE_IPV6);
+ return header_eth_type(header) == htons(ETH_TYPE_IPV6);
}
/* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
@@ -974,6 +993,9 @@ push_ip_header(struct dp_packet *packet,
memcpy(eth, header, size);
+ dp_packet_reset_offsets(packet);
+ packet->l3_ofs = sizeof (struct eth_header);
+
if (is_header_ipv6(header)) {
ip6 = ipv6_hdr(eth);
*ip_tot_size -= IPV6_HEADER_LEN;
@@ -1137,10 +1159,6 @@ parse_gre_header(struct dp_packet *packet,
return -EINVAL;
}
- if (greh->protocol != htons(ETH_TYPE_TEB)) {
- return -EINVAL;
- }
-
hlen = ulen + gre_header_len(greh->flags);
if (hlen > dp_packet_size(packet)) {
return -EINVAL;
@@ -1170,6 +1188,12 @@ parse_gre_header(struct dp_packet *packet,
options++;
}
+ if (greh->protocol == htons(ETH_TYPE_TEB)) {
+ packet->md.packet_ethertype = htons(0);
+ } else {
+ packet->md.packet_ethertype = greh->protocol;
+ }
+
return hlen;
}
@@ -1204,6 +1228,12 @@ netdev_gre_pop_header(struct dp_packet *packet)
dp_packet_reset_packet(packet, hlen);
+ if (eth_type_mpls(packet->md.packet_ethertype)) {
+ packet->l2_5_ofs = 0;
+ } else if (packet->md.packet_ethertype) {
+ packet->l3_ofs = 0;
+ }
+
return 0;
}
@@ -1220,6 +1250,7 @@ netdev_gre_push_header(struct dp_packet *packet,
ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
*csum_opt = csum(greh, ip_tot_size);
}
+ packet->md.packet_ethertype = header_eth_type(data->header);
}
static int
@@ -1252,7 +1283,11 @@ netdev_gre_build_header(const struct netdev *netdev,
greh = (struct gre_base_hdr *) (ip + 1);
}
- greh->protocol = htons(ETH_TYPE_TEB);
+ if (tnl_cfg->is_layer3) {
+ greh->protocol = tnl_flow->dl_type;
+ } else {
+ greh->protocol = htons(ETH_TYPE_TEB);
+ }
greh->flags = 0;
options = (ovs_16aligned_be32 *) (greh + 1);
@@ -98,6 +98,7 @@ struct netdev_tunnel_config {
bool csum;
bool ipsec;
bool dont_fragment;
+ bool is_layer3;
};
void netdev_run(void);
@@ -918,7 +918,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
int match_len;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
/* Metadata. */
if (match->wc.masks.dp_hash) {
@@ -340,6 +340,7 @@ odp_execute_set_action(struct dp_packet *packet, const struct nlattr *a)
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
+ case OVS_KEY_ATTR_NEXT_BASE_LAYER:
case __OVS_KEY_ATTR_MAX:
default:
OVS_NOT_REACHED();
@@ -444,6 +445,7 @@ odp_execute_masked_set_action(struct dp_packet *packet,
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_TCP_FLAGS:
+ case OVS_KEY_ATTR_NEXT_BASE_LAYER:
case __OVS_KEY_ATTR_MAX:
default:
OVS_NOT_REACHED();
@@ -166,6 +166,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize)
case OVS_KEY_ATTR_DP_HASH: return "dp_hash";
case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id";
case OVS_KEY_ATTR_PACKET_ETHERTYPE: return "pkt_eth";
+ case OVS_KEY_ATTR_NEXT_BASE_LAYER: return "next_base_layer";
case __OVS_KEY_ATTR_MAX:
default:
@@ -1821,6 +1822,7 @@ static const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] =
[OVS_KEY_ATTR_CT_MARK] = { .len = 4 },
[OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
[OVS_KEY_ATTR_PACKET_ETHERTYPE] = { .len = 2 },
+ [OVS_KEY_ATTR_NEXT_BASE_LAYER] = { .len = 1 },
};
/* Returns the correct length of the payload for a flow key attribute of the
@@ -2950,6 +2952,13 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
ds_chomp(ds, ',');
break;
}
+
+ case OVS_KEY_ATTR_NEXT_BASE_LAYER: {
+ const uint8_t *mask = ma ? nl_attr_get(ma) : NULL;
+ format_u8u(ds, "type", nl_attr_get_u8(a), mask, verbose);
+ break;
+ }
+
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
@@ -4425,6 +4434,11 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
sctp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_SCTP,
sizeof *sctp_key);
get_tp_key(data, sctp_key);
+ } else if (flow->nw_proto == IPPROTO_GRE) {
+ if (!export_mask || data->next_base_layer == 0xff) {
+ nl_msg_put_u8(buf, OVS_KEY_ATTR_NEXT_BASE_LAYER,
+ data->next_base_layer);
+ }
} else if (flow->dl_type == htons(ETH_TYPE_IP)
&& flow->nw_proto == IPPROTO_ICMP) {
struct ovs_key_icmp *icmp_key;
@@ -4998,6 +5012,13 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
put_tp_key(sctp_key, flow);
expected_bit = OVS_KEY_ATTR_SCTP;
}
+ } else if (src_flow->nw_proto == IPPROTO_GRE
+ && (src_flow->dl_type == htons(ETH_TYPE_IP) ||
+ src_flow->dl_type == htons(ETH_TYPE_IPV6))
+ && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) {
+ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_NEXT_BASE_LAYER)) {
+ flow->next_base_layer = nl_attr_get_u8(attrs[OVS_KEY_ATTR_NEXT_BASE_LAYER]);
+ }
} else if (src_flow->nw_proto == IPPROTO_ICMP
&& src_flow->dl_type == htons(ETH_TYPE_IP)
&& !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) {
@@ -141,7 +141,7 @@ void odp_portno_names_destroy(struct hmap *portno_names);
* add another field and forget to adjust this value.
*/
#define ODPUTIL_FLOW_KEY_BYTES 640
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
* key. An array of "struct nlattr" might not, in theory, be sufficiently
@@ -100,7 +100,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
void
ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
/* Initialize most of wc. */
flow_wildcards_init_catchall(wc);
@@ -27,6 +27,7 @@
#include "hash.h"
#include "openvswitch/list.h"
#include "netdev.h"
+#include "netdev-vport.h"
#include "openvswitch/ofpbuf.h"
#include "ovs-thread.h"
#include "odp-util.h"
@@ -52,6 +53,7 @@ static struct ovs_list addr_list;
struct tnl_port {
odp_port_t port;
ovs_be16 udp_port;
+ bool is_layer3;
char dev_name[IFNAMSIZ];
struct ovs_list node;
};
@@ -61,6 +63,7 @@ static struct ovs_list port_list;
struct tnl_port_in {
struct cls_rule cr;
odp_port_t portno;
+ bool match_base_layer;
struct ovs_refcount ref_cnt;
char dev_name[IFNAMSIZ];
};
@@ -82,7 +85,7 @@ tnl_port_free(struct tnl_port_in *p)
static void
tnl_port_init_flow(struct flow *flow, struct eth_addr mac,
- struct in6_addr *addr, ovs_be16 udp_port)
+ struct in6_addr *addr, ovs_be16 udp_port, bool is_layer3)
{
memset(flow, 0, sizeof *flow);
@@ -99,20 +102,21 @@ tnl_port_init_flow(struct flow *flow, struct eth_addr mac,
flow->nw_proto = IPPROTO_UDP;
} else {
flow->nw_proto = IPPROTO_GRE;
+ flow->next_base_layer = is_layer3 ? LAYER_3 : LAYER_2;
}
flow->tp_dst = udp_port;
}
static void
map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
- ovs_be16 udp_port, const char dev_name[])
+ ovs_be16 udp_port, const char dev_name[], bool is_layer3)
{
const struct cls_rule *cr;
struct tnl_port_in *p;
struct match match;
memset(&match, 0, sizeof match);
- tnl_port_init_flow(&match.flow, mac, addr, udp_port);
+ tnl_port_init_flow(&match.flow, mac, addr, udp_port, is_layer3);
do {
cr = classifier_lookup(&cls, CLS_MAX_VERSION, &match.flow, NULL);
@@ -133,6 +137,12 @@ map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
* doesn't make sense to match on UDP port numbers. */
if (udp_port) {
match.wc.masks.tp_dst = OVS_BE16_MAX;
+ } else {
+ /* Match base layer for GRE tunnels as it may
+ * be used to differentiate them.
+ */
+ match.wc.masks.next_base_layer = UINT8_MAX;
+ p->match_base_layer = true;
}
if (IN6_IS_ADDR_V4MAPPED(addr)) {
match.wc.masks.nw_dst = OVS_BE32_MAX;
@@ -152,28 +162,29 @@ map_insert(odp_port_t port, struct eth_addr mac, struct in6_addr *addr,
static void
map_insert_ipdev__(struct ip_device *ip_dev, char dev_name[],
- odp_port_t port, ovs_be16 udp_port)
+ odp_port_t port, ovs_be16 udp_port,
+ bool is_layer3)
{
if (ip_dev->n_addr) {
int i;
for (i = 0; i < ip_dev->n_addr; i++) {
map_insert(port, ip_dev->mac, &ip_dev->addr[i],
- udp_port, dev_name);
+ udp_port, dev_name, is_layer3);
}
}
}
void
-tnl_port_map_insert(odp_port_t port,
- ovs_be16 udp_port, const char dev_name[])
+tnl_port_map_insert(odp_port_t port, ovs_be16 udp_port,
+ const char dev_name[], bool is_layer3)
{
struct tnl_port *p;
struct ip_device *ip_dev;
ovs_mutex_lock(&mutex);
LIST_FOR_EACH(p, node, &port_list) {
- if (udp_port == p->udp_port) {
+ if ((udp_port == p->udp_port && udp_port)/* || port == p->port XXX */) {
goto out;
}
}
@@ -181,11 +192,13 @@ tnl_port_map_insert(odp_port_t port,
p = xzalloc(sizeof *p);
p->port = port;
p->udp_port = udp_port;
+ p->is_layer3 = is_layer3;
ovs_strlcpy(p->dev_name, dev_name, sizeof p->dev_name);
ovs_list_insert(&port_list, &p->node);
LIST_FOR_EACH(ip_dev, node, &addr_list) {
- map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->udp_port);
+ map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->udp_port,
+ p->is_layer3);
}
out:
@@ -205,12 +218,13 @@ tnl_port_unref(const struct cls_rule *cr)
}
static void
-map_delete(struct eth_addr mac, struct in6_addr *addr, ovs_be16 udp_port)
+map_delete(struct eth_addr mac, struct in6_addr *addr, ovs_be16 udp_port,
+ bool is_layer3)
{
const struct cls_rule *cr;
struct flow flow;
- tnl_port_init_flow(&flow, mac, addr, udp_port);
+ tnl_port_init_flow(&flow, mac, addr, udp_port, is_layer3);
cr = classifier_lookup(&cls, CLS_MAX_VERSION, &flow, NULL);
tnl_port_unref(cr);
@@ -219,11 +233,13 @@ map_delete(struct eth_addr mac, struct in6_addr *addr, ovs_be16 udp_port)
static void
ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 udp_port)
{
+ bool is_layer3 = netdev_vport_is_layer3(ip_dev->dev);
+
if (ip_dev->n_addr) {
int i;
for (i = 0; i < ip_dev->n_addr; i++) {
- map_delete(ip_dev->mac, &ip_dev->addr[i], udp_port);
+ map_delete(ip_dev->mac, &ip_dev->addr[i], udp_port, is_layer3);
}
}
}
@@ -316,7 +332,7 @@ tnl_port_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
struct ds ds = DS_EMPTY_INITIALIZER;
- struct tnl_port *p;
+ struct tnl_port *p, *q;
ds_put_format(&ds, "Listening ports:\n");
ovs_mutex_lock(&mutex);
@@ -328,7 +344,22 @@ tnl_port_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
}
LIST_FOR_EACH(p, node, &port_list) {
- ds_put_format(&ds, "%s (%"PRIu32")\n", p->dev_name, p->port);
+ bool skip = false;
+
+ /* Skip ports with duplicate 'port' field */
+ LIST_FOR_EACH(q, node, &port_list) {
+ if (p == q) {
+ break;
+ }
+ if (p->port == q->port) {
+ skip = true;
+ break;
+ }
+ }
+
+ if (!skip) {
+ ds_put_format(&ds, "%s (%"PRIu32")\n", p->dev_name, p->port);
+ }
}
out:
@@ -343,7 +374,8 @@ map_insert_ipdev(struct ip_device *ip_dev)
struct tnl_port *p;
LIST_FOR_EACH(p, node, &port_list) {
- map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->udp_port);
+ map_insert_ipdev__(ip_dev, p->dev_name, p->port, p->udp_port,
+ p->is_layer3);
}
}
@@ -27,7 +27,7 @@
odp_port_t tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc);
void tnl_port_map_insert(odp_port_t port, ovs_be16 udp_port,
- const char dev_name[]);
+ const char dev_name[], bool is_layer3);
void tnl_port_map_delete(ovs_be16 udp_port);
void tnl_port_map_insert_ipdev(const char dev[]);
@@ -99,7 +99,7 @@ struct rule;
/* Metadata for restoring pipeline context after recirculation. Helpers
* are inlined below to keep them together with the definition for easier
* updates. */
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
struct frozen_metadata {
/* Metadata in struct flow. */
@@ -1037,6 +1037,7 @@ sflow_read_set_action(const struct nlattr *attr,
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_NEXT_BASE_LAYER:
case __OVS_KEY_ATTR_MAX:
default:
break;
@@ -2949,7 +2949,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 37);
memset(&flow_tnl, 0, sizeof flow_tnl);
if (!xport) {
@@ -26,6 +26,7 @@
#include "hash.h"
#include "hmap.h"
#include "netdev.h"
+#include "netdev-vport.h"
#include "odp-util.h"
#include "openvswitch/ofpbuf.h"
#include "packets.h"
@@ -194,7 +195,7 @@ tnl_port_add__(const struct ofport_dpif *ofport, const struct netdev *netdev,
tnl_port_mod_log(tnl_port, "adding");
if (native_tnl) {
- tnl_port_map_insert(odp_port, cfg->dst_port, name);
+ tnl_port_map_insert(odp_port, cfg->dst_port, name, cfg->is_layer3);
}
return true;
}
@@ -12,6 +12,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \
options:remote_ip=2001:cafe::93 options:out_key=flow options:csum=true ofport_request=4\
-- add-port int-br t4 -- set Interface t4 type=geneve \
options:remote_ip=flow options:key=123 ofport_request=5\
+ -- add-port int-br t5 -- set Interface t5 type=gre \
+ options:remote_ip=2001:cafe::92 options:key=455 options:layer3=true ofport_request=6\
], [0])
AT_CHECK([ovs-appctl dpif/show], [0], [dnl
@@ -25,6 +27,7 @@ dummy@ovs-dummy: hit:0 missed:0
t2 2/4789: (vxlan: key=123, remote_ip=2001:cafe::92)
t3 4/4789: (vxlan: csum=true, out_key=flow, remote_ip=2001:cafe::93)
t4 5/6081: (geneve: key=123, remote_ip=flow)
+ t5 6/3: (gre: key=455, layer3=true, remote_ip=2001:cafe::92)
])
dnl First setup dummy interface IP address, then add the route
@@ -132,12 +135,12 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl
port 3: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
])
-dnl Check GRE only accepts encapsulated Ethernet frames
-AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6486dd60000000006a2f402001cafe0000000000000000000000922001cafe00000000000000000000008820000800000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+dnl Check decapsulation of L3GRE packet
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6486dd60000000005a2f402001cafe0000000000000000000000922001cafe00000000000000000000008820000800000001c745000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
ovs-appctl time/warp 1000
-AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl
- port 3: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
+AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 6'], [0], [dnl
+ port 6: rx pkts=1, bytes=84, drop=0, errs=0, frame=0, over=0, crc=0
])
dnl Check decapsulation of Geneve packet with options
@@ -12,6 +12,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \
options:remote_ip=1.1.2.93 options:out_key=flow options:csum=true ofport_request=4\
-- add-port int-br t4 -- set Interface t4 type=geneve \
options:remote_ip=flow options:key=123 ofport_request=5\
+ -- add-port int-br t5 -- set Interface t5 type=gre \
+ options:remote_ip=1.1.2.92 options:key=455 options:layer3=true ofport_request=6\
], [0])
AT_CHECK([ovs-appctl dpif/show], [0], [dnl
@@ -25,6 +27,7 @@ dummy@ovs-dummy: hit:0 missed:0
t2 2/4789: (vxlan: key=123, remote_ip=1.1.2.92)
t3 4/4789: (vxlan: csum=true, out_key=flow, remote_ip=1.1.2.93)
t4 5/6081: (geneve: key=123, remote_ip=flow)
+ t5 6/3: (gre: key=455, layer3=true, remote_ip=1.1.2.92)
])
dnl First setup dummy interface IP address, then add the route
@@ -105,8 +108,14 @@ AT_CHECK([tail -1 stdout], [0],
dnl Check GRE tunnel push
AT_CHECK([ovs-ofctl add-flow int-br action=3])
AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0], [Datapath actions: tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),key=0x1c8)),out_port(100))
+])
+
+dnl Check L3GRE tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=6])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
- [Datapath actions: tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x6558),key=0x1c8)),out_port(100))
+ [Datapath actions: pop_eth,tnl_push(tnl_port(3),header(size=42,type=3,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=47,tos=0,ttl=64,frag=0x4000),gre((flags=0x2000,proto=0x800),key=0x1c7)),out_port(100))
])
dnl Check Geneve tunnel push
@@ -132,12 +141,20 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl
port 3: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
])
-dnl Check GRE only accepts encapsulated Ethernet frames
-AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820000800000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+dnl Check decapsulation of L3GRE packet
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c745000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
ovs-appctl time/warp 1000
-AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 3'], [0], [dnl
+AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 6'], [0], [dnl
+ port 6: rx pkts=1, bytes=84, drop=0, errs=0, frame=0, over=0, crc=0
+])
+
+dnl Check GREL3 only accepts non-fragmented packets?
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820000800000001c7fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+
+AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[36]]' | sort], [0], [dnl
port 3: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0
+ port 6: rx pkts=1, bytes=84, drop=0, errs=0, frame=0, over=0, crc=0
])
dnl Check decapsulation of Geneve packet with options
@@ -2259,6 +2259,19 @@
</column>
</group>
+ <group title="Tunnel Options: gre">
+ <p>
+ <code>gre</code> interfaces support these options.
+ </p>
+
+ <column name="options" key="layer3" type='{"type": "boolean"}'>
+ <p>
+ Optional. Packets are sent and recieved without an ethernet
+ header present.
+ </p>
+ </column>
+ </group>
+
<group title="Tunnel Options: ipsec_gre only">
<p>
Only <code>ipsec_gre</code> interfaces support these options.
Add support for layer 3 GRE vports (non-tap aka non-VTEP). This makes use of a vport mode configuration for the existing (tap/VTEP) GRE vports. In order to differentiate packets for two different types of GRE vports a new flow key attribute, OVS_KEY_ATTR_NEXT_BASE_LAYER, is used. It is intended that this attribute is only used in userspace as there appears to be no need for it to be used in the kernel datapath. It is envisaged that this attribute may be used for other encapsulation protocols that support both layer3 and layer2 inner-packets. Signed-off-by: Simon Horman <simon.horman@netronome.com> --- v10 * Use a mode for layer3 ports rather than a new port type * Update BUILD_BUG_ON() call in ovs_key_attr_size() * Don't update tnl_port_map_lookup() to always match on next_base_layer: the implementation didn't actually do that and thus was a lot of code change for no behavioural change. v9 * New patch --- datapath/flow_netlink.c | 2 +- datapath/linux/compat/include/linux/openvswitch.h | 1 + include/openvswitch/flow.h | 8 ++- lib/flow.c | 30 ++++++++--- lib/match.c | 6 ++- lib/netdev-linux.c | 3 +- lib/netdev-vport.c | 59 ++++++++++++++++----- lib/netdev.h | 1 + lib/nx-match.c | 2 +- lib/odp-execute.c | 2 + lib/odp-util.c | 21 ++++++++ lib/odp-util.h | 2 +- lib/ofp-util.c | 2 +- lib/tnl-ports.c | 62 +++++++++++++++++------ lib/tnl-ports.h | 2 +- ofproto/ofproto-dpif-rid.h | 2 +- ofproto/ofproto-dpif-sflow.c | 1 + ofproto/ofproto-dpif-xlate.c | 2 +- ofproto/tunnel.c | 3 +- tests/tunnel-push-pop-ipv6.at | 11 ++-- tests/tunnel-push-pop.at | 25 +++++++-- vswitchd/vswitch.xml | 13 +++++ 22 files changed, 207 insertions(+), 53 deletions(-)