@@ -187,6 +187,10 @@ enum ovs_packet_cmd {
* %OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute, which is sent only if the
* output port is actually a tunnel port. Contains the output tunnel key
* extracted from the packet as nested %OVS_TUNNEL_KEY_ATTR_* attributes.
+ * @OVS_PACKET_ATTR_MRU: Present for an %OVS_PACKET_CMD_ACTION and
+ * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment
+ * size.
+ *
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_PACKET_* commands.
*/
@@ -202,6 +206,7 @@ enum ovs_packet_attr {
OVS_PACKET_ATTR_UNUSED2,
OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe,
error logging should be suppressed. */
+ OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */
__OVS_PACKET_ATTR_MAX
};
@@ -1547,6 +1547,9 @@ dpif_netlink_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec,
if (d_exec->probe) {
nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
}
+ if (d_exec->mtu) {
+ nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
+ }
}
/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
@@ -1965,6 +1968,7 @@ parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
[OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
[OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
+ [OVS_PACKET_ATTR_MRU] = { .type = NL_A_BE16, .optional = true }
};
struct ovs_header *ovs_header;
@@ -2002,6 +2006,7 @@ parse_odp_packet(const struct dpif_netlink *dpif, struct ofpbuf *buf,
upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
+ upcall->mru = a[OVS_PACKET_ATTR_MRU];
/* Allow overwriting the netlink attribute header without reallocating. */
dp_packet_use_stub(&upcall->packet,
@@ -1126,6 +1126,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet **packets, int cnt,
execute.packet = packet;
execute.needs_help = false;
execute.probe = false;
+ execute.mtu = 0;
aux->error = dpif_execute(aux->dpif, &execute);
log_execute_message(aux->dpif, &execute, true, aux->error);
@@ -1693,6 +1694,7 @@ log_execute_message(struct dpif *dpif, const struct dpif_execute *execute,
ds_put_format(&ds, " failed (%s)", ovs_strerror(error));
}
ds_put_format(&ds, " on packet %s", packet);
+ ds_put_format(&ds, " mtu %d", execute->mtu);
vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds));
ds_destroy(&ds);
free(packet);
@@ -697,6 +697,8 @@ struct dpif_execute {
size_t actions_len; /* Length of 'actions' in bytes. */
bool needs_help;
bool probe; /* Suppress error messages. */
+ unsigned int mtu; /* Maximum transmission unit to fragment.
+ 0 if not a fragmented packet */
/* Input, but possibly modified as a side effect of execution. */
struct dp_packet *packet; /* Packet to execute. */
@@ -780,6 +782,7 @@ struct dpif_upcall {
struct nlattr *key; /* Flow key. */
size_t key_len; /* Length of 'key' in bytes. */
ovs_u128 ufid; /* Unique flow identifier for 'key'. */
+ struct nlattr *mru; /* Maximum receive unit. */
/* DPIF_UC_ACTION only. */
struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
@@ -183,6 +183,8 @@ struct upcall {
unsigned pmd_id; /* Datapath poll mode driver id. */
const struct dp_packet *packet; /* Packet associated with this upcall. */
ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */
+ uint16_t mru; /* If !0, Maximum receive unit of
+ fragmented IP packet */
enum dpif_upcall_type type; /* Datapath type of the upcall. */
const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
@@ -329,6 +331,7 @@ static enum upcall_type classify_upcall(enum dpif_upcall_type type,
static int upcall_receive(struct upcall *, const struct dpif_backer *,
const struct dp_packet *packet, enum dpif_upcall_type,
const struct nlattr *userdata, const struct flow *,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id);
static void upcall_uninit(struct upcall *);
@@ -716,6 +719,7 @@ recv_upcalls(struct handler *handler)
struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
struct upcall *upcall = &upcalls[n_upcalls];
struct flow *flow = &flows[n_upcalls];
+ unsigned int mru;
int error;
ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
@@ -730,8 +734,14 @@ recv_upcalls(struct handler *handler)
goto free_dupcall;
}
+ if (dupcall->mru) {
+ mru = nl_attr_get_u16(dupcall->mru);
+ } else {
+ mru = 0;
+ }
+
error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
- dupcall->type, dupcall->userdata, flow,
+ dupcall->type, dupcall->userdata, flow, mru,
&dupcall->ufid, PMD_ID_NULL);
if (error) {
if (error == ENODEV) {
@@ -977,6 +987,7 @@ static int
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
const struct dp_packet *packet, enum dpif_upcall_type type,
const struct nlattr *userdata, const struct flow *flow,
+ const unsigned int mru,
const ovs_u128 *ufid, const unsigned pmd_id)
{
int error;
@@ -1006,6 +1017,7 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
upcall->ukey = NULL;
upcall->key = NULL;
upcall->key_len = 0;
+ upcall->mru = mru;
upcall->out_tun_key = NULL;
upcall->actions = NULL;
@@ -1133,7 +1145,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
- flow, ufid, pmd_id);
+ flow, 0, ufid, pmd_id);
if (error) {
return error;
}
@@ -1355,6 +1367,7 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
op->dop.u.execute.actions_len = upcall->odp_actions.size;
op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
op->dop.u.execute.probe = false;
+ op->dop.u.execute.mtu = upcall->mru;
}
}
@@ -1116,6 +1116,7 @@ check_variable_length_userdata(struct dpif_backer *backer)
execute.packet = &packet;
execute.needs_help = false;
execute.probe = true;
+ execute.mtu = 0;
error = dpif_execute(backer->dpif, &execute);
@@ -1214,6 +1215,7 @@ check_masked_set_action(struct dpif_backer *backer)
execute.packet = &packet;
execute.needs_help = false;
execute.probe = true;
+ execute.mtu = 0;
error = dpif_execute(backer->dpif, &execute);
@@ -3722,6 +3724,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
execute.packet = packet;
execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
execute.probe = false;
+ execute.mtu = 0;
/* Fix up in_port. */
in_port = flow->in_port.ofp_port;
@@ -927,3 +927,224 @@ TIME_WAIT src=10.1.1.1 dst=10.1.1.2 sport=<cleared> dport=<cleared> src=10.1.1.2
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([conntrack - IPv4 fragmentation ])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Sending ping through conntrack
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,icmp,action=ct(commit,zone=9),2
+priority=100,in_port=2,ct_state=-trk,icmp,action=ct(table=0,zone=9)
+priority=100,in_port=2,ct_state=+trk+est-new,icmp,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Basic connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 larger fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - IPv4 fragmentation + vlan])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+ADD_VLAN(p0, at_ns0, 100, "10.2.2.1/24")
+ADD_VLAN(p1, at_ns1, 100, "10.2.2.2/24")
+
+dnl Sending ping through conntrack
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,icmp,action=ct(commit,zone=9),2
+priority=100,in_port=2,ct_state=-trk,icmp,action=ct(table=0,zone=9)
+priority=100,in_port=2,ct_state=+trk+est-new,icmp,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Basic connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.2.2.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.2.2.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 larger fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.2.2.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - IPv6 fragmentation])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "fc00::1/96")
+ADD_VETH(p1, at_ns1, br0, "fc00::2/96")
+
+dnl Sending ping through conntrack
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,in_port=1,ipv6,action=ct(commit,zone=9),2
+priority=10,in_port=2,ct_state=-trk,ipv6,action=ct(table=0,zone=9)
+priority=10,in_port=2,ct_state=+trk+est-new,ipv6,action=1
+priority=100,icmp6,icmp_type=135,action=normal
+priority=100,icmp6,icmp_type=136,action=normal
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Without this sleep, we get occasional failures due to the following error:
+dnl "connect: Cannot assign requested address"
+sleep 2;
+
+dnl Basic connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -s 1600 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 larger fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - IPv6 fragmentation + vlan])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "fc00::1/96")
+ADD_VETH(p1, at_ns1, br0, "fc00::2/96")
+
+ADD_VLAN(p0, at_ns0, 100, "fc00:1::3/96")
+ADD_VLAN(p1, at_ns1, 100, "fc00:1::4/96")
+
+dnl Sending ping through conntrack
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,in_port=1,ipv6,action=ct(commit,zone=9),2
+priority=10,in_port=2,ct_state=-trk,ipv6,action=ct(table=0,zone=9)
+priority=10,in_port=2,ct_state=+trk+est-new,ipv6,action=1
+priority=100,icmp6,icmp_type=135,action=normal
+priority=100,icmp6,icmp_type=136,action=normal
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Without this sleep, we get occasional failures due to the following error:
+dnl "connect: Cannot assign requested address"
+sleep 2;
+
+dnl Basic connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00:1::4 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -s 1600 -q -c 3 -i 0.3 -w 2 fc00:1::4 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Ipv4 larger fragmentation connectivity check.
+NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00:1::4 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - Fragmentation over vxlan])
+AT_SKIP_IF([! ip link help 2>&1 | grep vxlan >/dev/null])
+CHECK_CONNTRACK()
+
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 standalone --])
+ADD_BR([br-underlay], [set-fail-mode br-underlay standalone])
+ADD_NAMESPACES(at_ns0)
+
+dnl Sending ping through conntrack
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,icmp,action=ct(commit,zone=9),LOCAL
+priority=100,in_port=LOCAL,ct_state=-trk,icmp,action=ct(table=0,zone=9)
+priority=100,in_port=LOCAL,ct_state=+trk+est,icmp,action=1
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([vxlan], [br0], [at_ns0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+ [id 0 dstport 4789])
+
+dnl First, check the underlay
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Okay, now check the overlay with different packet sizes
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP