@@ -214,3 +214,37 @@ implementation ::
Compile OVS in debug mode to have `ovs_assert` statements error out if
there is a mis-match in the DPCLS lookup implementation.
+
+Datapath Interface Performance
+------------------------------
+
+The datapath interface (DPIF) or dp_netdev_input() is responsible for taking
+packets through the major components of the userspace datapath; such as
+miniflow_extract, EMC, SMC and DPCLS lookups, and a lot of the performance
+stats associated with the datapath.
+
+Just like with the SIMD DPCLS feature above, SIMD can be applied to the DPIF to
+improve performance.
+
+By default, dpif_scalar is used. The DPIF implementation can be selected by
+name ::
+
+ $ ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512
+ DPIF implementation set to dpif_avx512.
+
+ $ ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar
+ DPIF implementation set to dpif_scalar.
+
+Running Unit Tests with AVX512 DPIF
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since the AVX512 DPIF is disabled by default, a compile time option is
+available in order to test it with the OVS unit test suite. When building with
+a CPU that supports AVX512, use the following configure option ::
+
+ $ ./configure --enable-dpif-default-avx512
+
+The following line should be seen in the configure output when the above option
+is used ::
+
+ checking whether DPIF AVX512 is default implementation... yes
@@ -30,6 +30,21 @@ AC_DEFUN([OVS_CHECK_DPCLS_AUTOVALIDATOR], [
fi
])
+dnl Set OVS DPIF default implementation at configure time for running the unit
+dnl tests on the whole codebase without modifying tests per DPIF impl
+AC_DEFUN([OVS_CHECK_DPIF_AVX512_DEFAULT], [
+ AC_ARG_ENABLE([dpif-default-avx512],
+ [AC_HELP_STRING([--enable-dpif-default-avx512], [Enable DPIF AVX512 implementation as default.])],
+ [dpifavx512=yes],[dpifavx512=no])
+ AC_MSG_CHECKING([whether DPIF AVX512 is default implementation])
+ if test "$dpifavx512" != yes; then
+ AC_MSG_RESULT([no])
+ else
+ OVS_CFLAGS="$OVS_CFLAGS -DDPIF_AVX512_DEFAULT"
+ AC_MSG_RESULT([yes])
+ fi
+])
+
dnl OVS_ENABLE_WERROR
AC_DEFUN([OVS_ENABLE_WERROR],
[AC_ARG_ENABLE(
@@ -185,6 +185,7 @@ OVS_ENABLE_WERROR
OVS_ENABLE_SPARSE
OVS_CTAGS_IDENTIFIERS
OVS_CHECK_DPCLS_AUTOVALIDATOR
+OVS_CHECK_DPIF_AVX512_DEFAULT
OVS_CHECK_BINUTILS_AVX512
AC_ARG_VAR(KARCH, [Kernel Architecture String])
@@ -116,6 +116,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/dpif-netdev-private-dfc.c \
lib/dpif-netdev-private-dfc.h \
lib/dpif-netdev-private-dpcls.h \
+ lib/dpif-netdev-private-dpif.c \
lib/dpif-netdev-private-dpif.h \
lib/dpif-netdev-private-flow.h \
lib/dpif-netdev-private-thread.h \
@@ -24,6 +24,7 @@
#include "dpif-netdev-perf.h"
#include "dpif-netdev-private.h"
+#include <errno.h>
#include <immintrin.h>
#include "dp-packet.h"
@@ -57,6 +58,19 @@ struct dpif_userdata {
struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
};
+int32_t
+dp_netdev_input_outer_avx512_probe(void)
+{
+ bool avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f");
+ bool bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2");
+
+ if (!avx512f_available || !bmi2_available) {
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
int32_t
dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
new file mode 100644
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "dpif-netdev-private-dpif.h"
+#include "dpif-netdev-private-thread.h"
+
+#include <errno.h>
+#include <string.h>
+
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/vlog.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_netdev_impl);
+
+enum dpif_netdev_impl_info_idx {
+ DPIF_NETDEV_IMPL_SCALAR,
+ DPIF_NETDEV_IMPL_AVX512
+};
+
+/* Actual list of implementations goes here. */
+static struct dpif_netdev_impl_info_t dpif_impls[] = {
+ /* The default scalar C code implementation. */
+ [DPIF_NETDEV_IMPL_SCALAR] = { .input_func = dp_netdev_input,
+ .probe = NULL,
+ .name = "dpif_scalar", },
+
+#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+ /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
+ [DPIF_NETDEV_IMPL_AVX512] = { .input_func = dp_netdev_input_outer_avx512,
+ .probe = dp_netdev_input_outer_avx512_probe,
+ .name = "dpif_avx512", },
+#endif
+};
+
+static dp_netdev_input_func default_dpif_func;
+
+dp_netdev_input_func
+dp_netdev_impl_get_default(void)
+{
+ /* For the first call, this will be NULL. Compute the compile time default.
+ */
+ if (!default_dpif_func) {
+ int dpif_idx = DPIF_NETDEV_IMPL_SCALAR;
+
+/* Configure-time overriding to run test suite on all implementations. */
+#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+#ifdef DPIF_AVX512_DEFAULT
+ dp_netdev_input_func_probe probe;
+
+ /* Check if the compiled default is compatible. */
+ probe = dpif_impls[DPIF_NETDEV_IMPL_AVX512].probe;
+ if (!probe || !probe()) {
+ dpif_idx = DPIF_NETDEV_IMPL_AVX512;
+ }
+#endif
+#endif
+
+ VLOG_INFO("Default DPIF implementation is %s.\n",
+ dpif_impls[dpif_idx].name);
+ default_dpif_func = dpif_impls[dpif_idx].input_func;
+ }
+
+ return default_dpif_func;
+}
+
+/* This function checks all available DPIF implementations, and selects the
+ * returns the function pointer to the one requested by "name".
+ */
+static int32_t
+dp_netdev_impl_get_by_name(const char *name, dp_netdev_input_func *out_func)
+{
+ ovs_assert(name);
+ ovs_assert(out_func);
+
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(dpif_impls); i++) {
+ if (strcmp(dpif_impls[i].name, name) == 0) {
+ /* Probe function is optional - so check it is set before exec. */
+ if (dpif_impls[i].probe) {
+ int probe_err = dpif_impls[i].probe();
+ if (probe_err) {
+ *out_func = NULL;
+ return probe_err;
+ }
+ }
+ *out_func = dpif_impls[i].input_func;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+int32_t
+dp_netdev_impl_set_default_by_name(const char *name)
+{
+ dp_netdev_input_func new_default;
+
+ int32_t err = dp_netdev_impl_get_by_name(name, &new_default);
+
+ if (!err) {
+ default_dpif_func = new_default;
+ }
+
+ return err;
+
+}
@@ -23,7 +23,48 @@
struct dp_netdev_pmd_thread;
struct dp_packet_batch;
+/* Typedef for DPIF functions.
+ * Returns whether all packets were processed successfully.
+ */
+typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ odp_port_t port_no);
+
+/* Probe a DPIF implementation. This allows the implementation to validate CPU
+ * ISA availability. Returns -ENOTSUP if not available, returns 0 if valid to
+ * use.
+ */
+typedef int32_t (*dp_netdev_input_func_probe)(void);
+
+/* Structure describing each available DPIF implementation. */
+struct dpif_netdev_impl_info_t {
+ /* Function pointer to execute to have this DPIF implementation run. */
+ dp_netdev_input_func input_func;
+ /* Function pointer to execute to check the CPU ISA is available to run. If
+ * not necessary, it must be set to NULL which implies that it is always
+ * valid to use. */
+ dp_netdev_input_func_probe probe;
+ /* Name used to select this DPIF implementation. */
+ const char *name;
+};
+
+/* Returns the default DPIF which is first ./configure selected, but can be
+ * overridden at runtime. */
+dp_netdev_input_func dp_netdev_impl_get_default(void);
+
+/* Overrides the default DPIF with the user set DPIF. */
+int32_t dp_netdev_impl_set_default_by_name(const char *name);
+
/* Available DPIF implementations below. */
+int32_t
+dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ odp_port_t in_port);
+
+/* AVX512 enabled DPIF implementation and probe functions. */
+int32_t
+dp_netdev_input_outer_avx512_probe(void);
+
int32_t
dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
@@ -50,16 +50,6 @@ struct dp_netdev_pmd_thread_ctx {
bool smc_enable_db;
};
-/* Forward declaration for typedef. */
-struct dp_netdev_pmd_thread;
-
-/* Typedef for DPIF functions.
- * Returns a bitmask of packets to handle, possibly including upcall/misses.
- */
-typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t port_no);
-
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
* the performance overhead of interrupt processing. Therefore netdev can
* not implement rx-wait for these devices. dpif-netdev needs to poll
@@ -226,3 +226,6 @@ recirculation (only in balance-tcp mode).
When this is the case, the above command prints the load-balancing information
of the bonds configured in datapath \fIdp\fR showing the interface associated
with each bucket (hash).
+.
+.IP "\fBdpif-netdev/dpif-impl-set\fR \fIdpif_impl\fR"
+Sets the DPIF to be used to \fIdpif_impl\fR. By default "dpif_scalar" is used.
@@ -469,8 +469,6 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
const struct flow *flow,
const struct nlattr *actions,
size_t actions_len);
-static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *,
- struct dp_packet_batch *, odp_port_t port_no);
static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *);
@@ -967,6 +965,66 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_destroy(&reply);
}
+static void
+dpif_netdev_impl_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ /* This function requires just one parameter, the DPIF name. */
+ const char *dpif_name = argv[1];
+ struct shash_node *node;
+
+ static const char *error_description[2] = {
+ "Unknown DPIF implementation",
+ "CPU doesn't support the required instruction for",
+ };
+
+ ovs_mutex_lock(&dp_netdev_mutex);
+ int32_t err = dp_netdev_impl_set_default_by_name(dpif_name);
+
+ if (err) {
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_format(&reply, "DPIF implementation not available: %s %s.\n",
+ error_description[ (err == -ENOTSUP) ], dpif_name);
+ const char *reply_str = ds_cstr(&reply);
+ unixctl_command_reply_error(conn, reply_str);
+ VLOG_ERR("%s", reply_str);
+ ds_destroy(&reply);
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ return;
+ }
+
+ SHASH_FOR_EACH (node, &dp_netdevs) {
+ struct dp_netdev *dp = node->data;
+
+ /* Get PMD threads list, required to get DPCLS instances. */
+ size_t n;
+ struct dp_netdev_pmd_thread **pmd_list;
+ sorted_poll_thread_list(dp, &pmd_list, &n);
+
+ for (size_t i = 0; i < n; i++) {
+ struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+
+ /* Initialize DPIF function pointer to the newly configured
+ * default. */
+ dp_netdev_input_func default_func = dp_netdev_impl_get_default();
+ atomic_uintptr_t *pmd_func = (void *) &pmd->netdev_input_func;
+ atomic_store_relaxed(pmd_func, (uintptr_t) default_func);
+ };
+ }
+ ovs_mutex_unlock(&dp_netdev_mutex);
+
+ /* Reply with success to command. */
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_format(&reply, "DPIF implementation set to %s.\n", dpif_name);
+ const char *reply_str = ds_cstr(&reply);
+ unixctl_command_reply(conn, reply_str);
+ VLOG_INFO("%s", reply_str);
+ ds_destroy(&reply);
+}
+
static void
dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
const char *argv[], void *aux OVS_UNUSED)
@@ -1189,6 +1247,10 @@ dpif_netdev_init(void)
unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "",
0, 0, dpif_netdev_subtable_lookup_get,
NULL);
+ unixctl_command_register("dpif-netdev/dpif-impl-set",
+ "dpif_implementation_name",
+ 1, 1, dpif_netdev_impl_set,
+ NULL);
return 0;
}
@@ -6126,8 +6188,10 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
hmap_init(&pmd->send_port_cache);
cmap_init(&pmd->tx_bonds);
- /* Initialize the DPIF function pointer to the default scalar version. */
- pmd->netdev_input_func = dp_netdev_input;
+ /* Initialize DPIF function pointer to the default configured version. */
+ dp_netdev_input_func default_func = dp_netdev_impl_get_default();
+ atomic_uintptr_t *pmd_func = (void *) &pmd->netdev_input_func;
+ atomic_init(pmd_func, (uintptr_t) default_func);
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
@@ -7100,7 +7164,7 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
}
}
-static int32_t
+int32_t
dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
odp_port_t port_no)