@@ -209,10 +209,10 @@ dnl Configure Linux tc compat.
AC_DEFUN([OVS_CHECK_LINUX_TC], [
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([#include <linux/pkt_cls.h>], [
- int x = TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+ int x = TCA_POLICE_PKTRATE64;
])],
- [AC_DEFINE([HAVE_TCA_FLOWER_KEY_CT_FLAGS_REPLY], [1],
- [Define to 1 if TCA_FLOWER_KEY_CT_FLAGS_REPLY is available.])])
+ [AC_DEFINE([HAVE_TCA_POLICE_PKTRATE64], [1],
+ [Define to 1 if TCA_POLICE_PKTRATE64 is available.])])
AC_CHECK_MEMBERS([struct tcf_t.firstuse], [], [], [#include <linux/pkt_cls.h>])
@@ -1,7 +1,7 @@
#ifndef __LINUX_PKT_CLS_WRAPPER_H
#define __LINUX_PKT_CLS_WRAPPER_H 1
-#if defined(__KERNEL__) || defined(HAVE_TCA_FLOWER_KEY_CT_FLAGS_REPLY)
+#if defined(__KERNEL__) || defined(HAVE_TCA_POLICE_PKTRATE64)
#include_next <linux/pkt_cls.h>
#else
@@ -104,6 +104,10 @@ enum {
TCA_POLICE_RESULT,
TCA_POLICE_TM,
TCA_POLICE_PAD,
+ TCA_POLICE_RATE64,
+ TCA_POLICE_PEAKRATE64,
+ TCA_POLICE_PKTRATE64,
+ TCA_POLICE_PKTBURST64,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
@@ -3389,7 +3389,9 @@ netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
static int
netdev_dpdk_set_policing(struct netdev* netdev, uint32_t policer_rate,
- uint32_t policer_burst)
+ uint32_t policer_burst,
+ uint32_t policer_kpkts_rate OVS_UNUSED,
+ uint32_t policer_kpkts_burst OVS_UNUSED)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct ingress_policer *policer;
@@ -75,8 +75,10 @@ struct netdev_linux {
int mtu;
unsigned int ifi_flags;
long long int carrier_resets;
- uint32_t kbits_rate; /* Policing data. */
+ uint32_t kbits_rate; /* Policing data - kbps */
uint32_t kbits_burst;
+ uint32_t kpkts_rate; /* Policing data - kpps */
+ uint32_t kpkts_burst;
int vport_stats_error; /* Cached error code from vport_get_stats().
0 or an errno value. */
int netdev_mtu_error; /* Cached error code from SIOCGIFMTU
@@ -485,8 +485,10 @@ static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *,
int type,
unsigned int flags,
struct ofpbuf *);
-static int tc_add_policer(struct netdev *,
- uint32_t kbits_rate, uint32_t kbits_burst);
+
+static int tc_add_policer(struct netdev *, uint32_t kbits_rate,
+ uint32_t kbits_burst, uint32_t kpkts_rate,
+ uint32_t kpkts_burst);
static int tc_parse_qdisc(const struct ofpbuf *, const char **kind,
struct nlattr **options);
@@ -547,6 +549,12 @@ is_tap_netdev(const struct netdev *netdev)
return netdev_get_class(netdev) == &netdev_tap_class;
}
+enum {
+ OVS_TC_QOS_TYPE_BPS,
+ OVS_TC_QOS_TYPE_PPS,
+ OVS_TC_QOS_TYPE_MAX,
+};
+
static int
netdev_linux_netnsid_update__(struct netdev_linux *netdev)
{
@@ -2593,24 +2601,50 @@ tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst)
}
static void
-nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police)
+nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police,
+ uint32_t kpkts_rate, uint32_t kpkts_burst)
{
- size_t offset;
+ size_t offset, act_offset;
+ uint32_t i = 0, prio = 0;
- nl_msg_put_string(request, TCA_ACT_KIND, "police");
- offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
- nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
- tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
- nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_UNSPEC);
- nl_msg_end_nested(request, offset);
+ for (i = 0; i < OVS_TC_QOS_TYPE_MAX; i++) {
+ if (i == OVS_TC_QOS_TYPE_BPS && !police.rate.rate) {
+ continue;
+ }
+ if (i == OVS_TC_QOS_TYPE_PPS && !kpkts_rate) {
+ continue;
+ }
+ act_offset = nl_msg_start_nested(request, ++prio);
+ nl_msg_put_string(request, TCA_ACT_KIND, "police");
+ offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+ if (i == OVS_TC_QOS_TYPE_BPS && police.rate.rate) {
+ tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
+ } else if (i == OVS_TC_QOS_TYPE_PPS && kpkts_rate) {
+ unsigned int pkt_burst_ticks, pps_rate;
+ /* for PPS, set rate as 0 to act as a single action */
+ police.rate.rate = 0;
+ police.burst = 0;
+ police.rate.cell_log = 0;
+ pps_rate = kpkts_rate * 1000;
+ pkt_burst_ticks = tc_bytes_to_ticks(pps_rate,
+ MIN(UINT32_MAX / 1024, kpkts_burst) * 1024);
+ nl_msg_put_u64(request, TCA_POLICE_PKTRATE64, (uint64_t)pps_rate);
+ nl_msg_put_u64(request, TCA_POLICE_PKTBURST64, (uint64_t)pkt_burst_ticks);
+ }
+ nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
+ nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_PIPE);
+ nl_msg_end_nested(request, offset);
+ nl_msg_end_nested(request, act_offset);
+ }
}
static int
tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
- uint32_t kbits_burst)
+ uint32_t kbits_burst, uint32_t kpkts_rate,
+ uint32_t kpkts_burst)
{
uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL);
- size_t basic_offset, action_offset, inner_offset;
+ size_t basic_offset, action_offset;
uint16_t prio = TC_RESERVED_PRIORITY_POLICE;
int ifindex, err = 0;
struct tc_police pol_act;
@@ -2634,9 +2668,7 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
nl_msg_put_string(&request, TCA_KIND, "matchall");
basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
action_offset = nl_msg_start_nested(&request, TCA_MATCHALL_ACT);
- inner_offset = nl_msg_start_nested(&request, 1);
- nl_msg_put_act_police(&request, pol_act);
- nl_msg_end_nested(&request, inner_offset);
+ nl_msg_put_act_police(&request, pol_act, kpkts_rate, kpkts_burst);
nl_msg_end_nested(&request, action_offset);
nl_msg_end_nested(&request, basic_offset);
@@ -2676,8 +2708,9 @@ tc_del_matchall_policer(struct netdev *netdev)
/* Attempts to set input rate limiting (policing) policy. Returns 0 if
* successful, otherwise a positive errno value. */
static int
-netdev_linux_set_policing(struct netdev *netdev_,
- uint32_t kbits_rate, uint32_t kbits_burst)
+netdev_linux_set_policing(struct netdev *netdev_, uint32_t kbits_rate,
+ uint32_t kbits_burst, uint32_t kpkts_rate,
+ uint32_t kpkts_burst)
{
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
const char *netdev_name = netdev_get_name(netdev_);
@@ -2688,6 +2721,10 @@ netdev_linux_set_policing(struct netdev *netdev_,
: !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
: kbits_burst); /* Stick with user-specified value. */
+ kpkts_burst = (!kpkts_rate ? 0 /* Force to 0 if no rate specified. */
+ : !kpkts_burst ? 16 /* Default to 16000 packets if 0. */
+ : kpkts_burst); /* Stick with user-specified value. */
+
ovs_mutex_lock(&netdev->mutex);
if (netdev_linux_netnsid_is_remote(netdev)) {
error = EOPNOTSUPP;
@@ -2697,7 +2734,9 @@ netdev_linux_set_policing(struct netdev *netdev_,
if (netdev->cache_valid & VALID_POLICING) {
error = netdev->netdev_policing_error;
if (error || (netdev->kbits_rate == kbits_rate &&
- netdev->kbits_burst == kbits_burst)) {
+ netdev->kpkts_rate == kpkts_rate &&
+ netdev->kbits_burst == kbits_burst &&
+ netdev->kpkts_burst == kpkts_burst)) {
/* Assume that settings haven't changed since we last set them. */
goto out;
}
@@ -2709,8 +2748,9 @@ netdev_linux_set_policing(struct netdev *netdev_,
/* Use matchall for policing when offloadling ovs with tc-flower. */
if (netdev_is_flow_api_enabled()) {
error = tc_del_matchall_policer(netdev_);
- if (kbits_rate) {
- error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst);
+ if (kbits_rate || kpkts_rate) {
+ error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst,
+ kpkts_rate, kpkts_burst);
}
ovs_mutex_unlock(&netdev->mutex);
return error;
@@ -2729,7 +2769,7 @@ netdev_linux_set_policing(struct netdev *netdev_,
goto out;
}
- if (kbits_rate) {
+ if (kbits_rate || kpkts_rate) {
error = tc_add_del_qdisc(ifindex, true, 0, TC_INGRESS);
if (error) {
VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s",
@@ -2737,7 +2777,8 @@ netdev_linux_set_policing(struct netdev *netdev_,
goto out;
}
- error = tc_add_policer(netdev_, kbits_rate, kbits_burst);
+ error = tc_add_policer(netdev_, kbits_rate, kbits_burst,
+ kpkts_rate, kpkts_burst);
if (error){
VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s",
netdev_name, ovs_strerror(error));
@@ -2747,6 +2788,8 @@ netdev_linux_set_policing(struct netdev *netdev_,
netdev->kbits_rate = kbits_rate;
netdev->kbits_burst = kbits_burst;
+ netdev->kpkts_rate = kpkts_rate;
+ netdev->kpkts_burst = kpkts_burst;
out:
if (!error || error == ENODEV) {
@@ -5523,7 +5566,7 @@ netdev_linux_tc_make_request(const struct netdev *netdev, int type,
}
/* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size
- * of 'kbits_burst'.
+ * of 'kbits_burst', with a rate of 'kpkts_rate' and a burst size of 'kpkts_burst'.
*
* This function is equivalent to running:
* /sbin/tc filter add dev <devname> parent ffff: protocol all prio 49
@@ -5536,14 +5579,13 @@ netdev_linux_tc_make_request(const struct netdev *netdev, int type,
* Returns 0 if successful, otherwise a positive errno value.
*/
static int
-tc_add_policer(struct netdev *netdev,
- uint32_t kbits_rate, uint32_t kbits_burst)
+tc_add_policer(struct netdev *netdev, uint32_t kbits_rate,
+ uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t kpkts_burst)
{
+ size_t basic_offset, police_offset;
struct tc_police tc_police;
struct ofpbuf request;
struct tcmsg *tcmsg;
- size_t basic_offset;
- size_t police_offset;
int error;
int mtu = 65535;
@@ -5561,7 +5603,6 @@ tc_add_policer(struct netdev *netdev,
* tc's point of view. Whatever. */
tc_police.burst = tc_bytes_to_ticks(
tc_police.rate.rate, MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 8);
-
tcmsg = netdev_linux_tc_make_request(netdev, RTM_NEWTFILTER,
NLM_F_EXCL | NLM_F_CREATE, &request);
if (!tcmsg) {
@@ -5570,12 +5611,10 @@ tc_add_policer(struct netdev *netdev,
tcmsg->tcm_parent = tc_make_handle(0xffff, 0);
tcmsg->tcm_info = tc_make_handle(49,
(OVS_FORCE uint16_t) htons(ETH_P_ALL));
-
nl_msg_put_string(&request, TCA_KIND, "basic");
basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
- police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE);
- nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police);
- tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate);
+ police_offset = nl_msg_start_nested(&request, TCA_BASIC_ACT);
+ nl_msg_put_act_police(&request, tc_police, kpkts_rate, kpkts_burst);
nl_msg_end_nested(&request, police_offset);
nl_msg_end_nested(&request, basic_offset);
@@ -514,13 +514,16 @@ struct netdev_class {
* NETDEV_PT_LEGACY_L2. */
enum netdev_pt_mode (*get_pt_mode)(const struct netdev *netdev);
- /* Attempts to set input rate limiting (policing) policy, such that up to
- * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative
- * burst size of 'kbits' kb.
+ /* Attempts to set input rate limiting (policing) policy, such that:
+ * - up to 'kbits_rate' kbps of traffic is accepted, with a maximum
+ * accumulative burst size of 'kbits' kb; and
+ * - up to 'kpkts' kpps of traffic is accepted, with a maximum
+ * accumulative burst size of 'kpkts' kilo packets.
*
* This function may be set to null if policing is not supported. */
int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate,
- unsigned int kbits_burst);
+ unsigned int kbits_burst, unsigned int kpkts_rate,
+ unsigned int kpkts_burst);
/* Adds to 'types' all of the forms of QoS supported by 'netdev', or leaves
* it empty if 'netdev' does not support QoS. Any names added to 'types'
@@ -1619,17 +1619,20 @@ netdev_get_custom_stats(const struct netdev *netdev,
return error;
}
-
-/* Attempts to set input rate limiting (policing) policy, such that up to
- * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst
- * size of 'kbits' kb. */
+/* Attempts to set input rate limiting (policing) policy, such that:
+ * - up to 'kbits_rate' kbps of traffic is accepted, with a maximum
+ * accumulative burst size of 'kbits' kb; and
+ * - up to 'kpkts' kpps of traffic is accepted, with a maximum
+ * accumulative burst size of 'kpkts' kilo packets.
+ */
int
netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate,
- uint32_t kbits_burst)
+ uint32_t kbits_burst, uint32_t kpkts_rate,
+ uint32_t kpkts_burst)
{
return (netdev->netdev_class->set_policing
? netdev->netdev_class->set_policing(netdev,
- kbits_rate, kbits_burst)
+ kbits_rate, kbits_burst, kpkts_rate, kpkts_burst)
: EOPNOTSUPP);
}
@@ -289,7 +289,8 @@ struct netdev_queue_stats {
};
int netdev_set_policing(struct netdev *, uint32_t kbits_rate,
- uint32_t kbits_burst);
+ uint32_t kbits_burst, uint32_t kpkts_rate,
+ uint32_t kpkts_burst);
int netdev_get_qos_types(const struct netdev *, struct sset *types);
int netdev_get_qos_capabilities(const struct netdev *,
@@ -4896,7 +4896,9 @@ iface_configure_qos(struct iface *iface, const struct ovsrec_qos *qos)
netdev_set_policing(iface->netdev,
MIN(UINT32_MAX, iface->cfg->ingress_policing_rate),
- MIN(UINT32_MAX, iface->cfg->ingress_policing_burst));
+ MIN(UINT32_MAX, iface->cfg->ingress_policing_burst),
+ MIN(UINT32_MAX, iface->cfg->ingress_policing_kpkts_rate),
+ MIN(UINT32_MAX, iface->cfg->ingress_policing_kpkts_burst));
ofpbuf_uninit(&queues_buf);
}
@@ -1,6 +1,6 @@
{"name": "Open_vSwitch",
- "version": "8.2.0",
- "cksum": "1076640191 26427",
+ "version": "8.3.0",
+ "cksum": "3781850481 26690",
"tables": {
"Open_vSwitch": {
"columns": {
@@ -242,6 +242,12 @@
"ingress_policing_burst": {
"type": {"key": {"type": "integer",
"minInteger": 0}}},
+ "ingress_policing_kpkts_rate": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 0}}},
+ "ingress_policing_kpkts_burst": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 0}}},
"mac_in_use": {
"type": {"key": {"type": "string"},
"min": 0, "max": 1},
@@ -3654,8 +3654,18 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
table="Queue"/> tables).
</p>
<p>
- Policing is currently implemented on Linux and OVS with DPDK. Both
- implementations use a simple ``token bucket'' approach:
+ Policing settings can be set with byte rate or packet rate, and they
+ can be configured together, in which case they take effect together,
+ that means the smaller speed limit of them is in effect.
+ </p>
+ <p>
+ Currently, byte rate policing is implemented on Linux and OVS with
+ DPDK, while packet rate policing is only implemented on Linux. Both
+ Linux and OVS DPDK implementations use a simple ``token bucket’’
+ approach:
+ </p>
+ <p>
+ Byte rate policing:
</p>
<ul>
<li>
@@ -3673,6 +3683,26 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
rate specified by <ref column="ingress_policing_rate"/>.
</li>
</ul>
+ <p>
+ Packet rate policing:
+ </p>
+ <ul>
+ <li>
+ The size of the bucket corresponds to <ref
+ column="ingress_policing_kpkts_burst"/>. Initially the bucket is
+ full.
+ </li>
+ <li>
+ Whenever a packet is received, it will comsume one token from the
+ current bucket. If the token is available in the bucket, it's
+ removed and the packet is forwarded. Otherwise, the packet is
+ dropped.
+ </li>
+ <li>
+ Whenever it is not full, the bucket is refilled with tokens at the
+ rate specified by <ref column="ingress_policing_kpkts_rate"/>.
+ </li>
+ </ul>
<p>
Policing interacts badly with some network protocols, and especially
with fragmented IP packets. Suppose that there is enough network
@@ -3698,6 +3728,14 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
</p>
</column>
+ <column name="ingress_policing_kpkts_rate">
+ <p>
+ Maximum rate for data received on this interface, in kpps. Data
+ received faster than this rate is dropped. Set to <code>0</code>
+ (the default) to disable policing.
+ </p>
+ </column>
+
<column name="ingress_policing_burst">
<p>Maximum burst size for data received on this interface, in kb. The
default burst size if set to <code>0</code> is 8000 kbit. This value
@@ -3712,6 +3750,24 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
closer to achieving the full rate.
</p>
</column>
+
+ <column name="ingress_policing_kpkts_burst">
+ <p>
+ Maximum burst size for data received on this interface, in
+ kilo-packets. The default burst size if set to <code>0</code> is
+ 16 kilo-packets. This value has no effect if <ref
+ column="ingress_policing_kpkts_rate"/> is <code>0</code>.</p>
+ <p>
+ Specifying a larger burst size lets the algorithm be more
+ forgiving, which is important for protocols like TCP that react
+ severely to dropped packets. The burst size should be at least
+ the size of the interface's MTU. Specifying a value that is
+ numerically at least as large as 80% of <ref
+ column="ingress_policing_kpkts_rate"/> helps TCP come closer to
+ achieving the full rate.
+ </p>
+ </column>
+
</group>
<group title="Bidirectional Forwarding Detection (BFD)">