@@ -21,6 +21,8 @@ Post-v2.15.0
using ct(src=0.0.0.0), the source port will be replaced with another
non-colliding port in the ephemeral range (1024, 65535).
* Refactor lib/dpif-netdev.c to multiple header files.
+ * Add avx512 implementation of dpif which can process non recirculated
+ packets. It supports partial HWOL, EMC, SMC and DPCLS lookups.
- ovs-ctl:
* New option '--no-record-hostname' to disable hostname configuration
in ovsdb on startup.
@@ -33,11 +33,13 @@ lib_libopenvswitchavx512_la_CFLAGS = \
-mavx512f \
-mavx512bw \
-mavx512dq \
+ -mbmi \
-mbmi2 \
-fPIC \
$(AM_CFLAGS)
lib_libopenvswitchavx512_la_SOURCES = \
- lib/dpif-netdev-lookup-avx512-gather.c
+ lib/dpif-netdev-lookup-avx512-gather.c \
+ lib/dpif-netdev-avx512.c
lib_libopenvswitchavx512_la_LDFLAGS = \
-static
endif
@@ -114,6 +116,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/dpif-netdev-private-dfc.c \
lib/dpif-netdev-private-dfc.h \
lib/dpif-netdev-private-dpcls.h \
+ lib/dpif-netdev-private-dpif.h \
lib/dpif-netdev-private-flow.h \
lib/dpif-netdev-private-thread.h \
lib/dpif-netdev-private.h \
new file mode 100644
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2021 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __x86_64__
+/* Sparse cannot handle the AVX512 instructions. */
+#if !defined(__CHECKER__)
+
+#include <config.h>
+
+#include "dpif-netdev.h"
+#include "dpif-netdev-perf.h"
+#include "dpif-netdev-private.h"
+
+#include <immintrin.h>
+
+#include "dp-packet.h"
+#include "netdev.h"
+#include "netdev-offload.h"
+
+/* Each AVX512 register (zmm register in assembly notation) can contain up to
+ * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum
+ * number of miniflow blocks that can be processed in a single pass of the
+ * AVX512 code at a time.
+ */
+#define NUM_U64_IN_ZMM_REG (8)
+
+/* Structure to contain per-packet metadata that must be attributed to the
+ * dp netdev flow. This is unfortunate to have to track per packet, however
+ * it's a bit awkward to maintain them in a performant way. This structure
+ * helps to keep two variables on a single cache line per packet.
+ */
+struct pkt_flow_meta {
+ uint16_t bytes;
+ uint16_t tcp_flags;
+};
+
+/* Structure of heap allocated memory for DPIF internals. */
+struct dpif_userdata {
+ OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
+ struct netdev_flow_key keys[NETDEV_MAX_BURST];
+ OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
+ struct netdev_flow_key *key_ptrs[NETDEV_MAX_BURST];
+ OVS_ALIGNED_VAR(CACHE_LINE_SIZE)
+ struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST];
+};
+
+int32_t
+dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ odp_port_t in_port)
+{
+ /* Allocate DPIF userdata. */
+ if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) {
+ pmd->netdev_input_func_userdata =
+ xmalloc_pagealign(sizeof(struct dpif_userdata));
+ }
+
+ struct dpif_userdata *ud = pmd->netdev_input_func_userdata;
+ struct netdev_flow_key *keys = ud->keys;
+ struct netdev_flow_key **key_ptrs = ud->key_ptrs;
+ struct pkt_flow_meta *pkt_meta = ud->pkt_meta;
+
+ /* The AVX512 DPIF implementation handles rules in a way that is optimized
+ * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is
+ * achieved by separating the rule arrays. Bitmasks are kept for each
+ * packet, indicating if it matched in the HWOL/EMC/SMC array or DPCLS
+ * array. Later the two arrays are merged by AVX-512 expand instructions.
+ */
+
+ /* Stores the computed output: a rule pointer for each packet. */
+ /* Used initially for HWOL/EMC/SMC. */
+ struct dpcls_rule *rules[NETDEV_MAX_BURST];
+ /* Used for DPCLS. */
+ struct dpcls_rule *dpcls_rules[NETDEV_MAX_BURST];
+
+ uint32_t dpcls_key_idx = 0;
+
+ for (uint32_t i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
+ _mm512_storeu_si512(&rules[i], _mm512_setzero_si512());
+ _mm512_storeu_si512(&dpcls_rules[i], _mm512_setzero_si512());
+ }
+
+ const size_t batch_size = dp_packet_batch_size(packets);
+
+ /* Prefetch 2 packets ahead when processing. This was found to perform best
+ * through testing. */
+ const uint32_t prefetch_ahead = 2;
+ const uint32_t initial_prefetch = MIN(prefetch_ahead, batch_size);
+ for (int i = 0; i < initial_prefetch; i++) {
+ struct dp_packet *packet = packets->packets[i];
+ OVS_PREFETCH(dp_packet_data(packet));
+ pkt_metadata_prefetch_init(&packet->md);
+ }
+
+ /* Check if EMC or SMC are enabled. */
+ struct dfc_cache *cache = &pmd->flow_cache;
+ const uint32_t hwol_enabled = netdev_is_flow_api_enabled();
+ const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0;
+ const uint32_t smc_enabled = pmd->ctx.smc_enable_db;
+
+ uint32_t emc_hits = 0;
+ uint32_t smc_hits = 0;
+
+ /* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */
+ uint32_t hwol_emc_smc_hitmask = 0;
+ uint32_t smc_hitmask = 0;
+
+ /* The below while loop is based on the 'iter' variable which has a number
+ * of bits set representing packets that we want to process
+ * (HWOL->MFEX->EMC->SMC). As each packet is processed, we clear (set to 0)
+ * the bit representing that packet using '_blsr_u64()'. The
+ * 'raw_ctz()' will give us the correct index into the 'packets',
+ * 'pkt_meta', 'keys' and 'rules' arrays.
+ *
+ * For one iteration of the while loop, here's some pseudocode as an
+ * example where 'iter' is represented in binary:
+ *
+ * while (iter) { // iter = 1100
+ * uint32_t i = raw_ctz(iter); // i = 2
+ * iter = _blsr_u64(iter); // iter = 1000
+ * // do all processing (HWOL->MFEX->EMC->SMC)
+ * }
+ */
+ uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1;
+ uint32_t iter = lookup_pkts_bitmask;
+ while (iter) {
+ uint32_t i = raw_ctz(iter);
+ iter = _blsr_u64(iter);
+
+ if (i + prefetch_ahead < batch_size) {
+ struct dp_packet **dp_packets = packets->packets;
+ /* Prefetch next packet data and metadata. */
+ OVS_PREFETCH(dp_packet_data(dp_packets[i + prefetch_ahead]));
+ pkt_metadata_prefetch_init(&dp_packets[i + prefetch_ahead]->md);
+ }
+
+ /* Get packet pointer from bitmask and packet md. */
+ struct dp_packet *packet = packets->packets[i];
+ pkt_metadata_init(&packet->md, in_port);
+
+ struct dp_netdev_flow *f = NULL;
+
+ /* Check for a partial hardware offload match. */
+ if (hwol_enabled) {
+ if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, in_port, packet, &f))) {
+ /* Packet restoration failed and it was dropped, do not
+ * continue processing. */
+ continue;
+ }
+ if (f) {
+ rules[i] = &f->cr;
+ pkt_meta[i].tcp_flags = parse_tcp_flags(packet);
+ pkt_meta[i].bytes = dp_packet_size(packet);
+ hwol_emc_smc_hitmask |= (1 << i);
+ continue;
+ }
+ }
+
+ /* Do miniflow extract into keys. */
+ struct netdev_flow_key *key = &keys[i];
+ miniflow_extract(packet, &key->mf);
+
+ /* Cache TCP and byte values for all packets. */
+ pkt_meta[i].bytes = dp_packet_size(packet);
+ pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf);
+
+ key->len = netdev_flow_key_size(miniflow_n_values(&key->mf));
+ key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf);
+
+ if (emc_enabled) {
+ f = emc_lookup(&cache->emc_cache, key);
+
+ if (f) {
+ rules[i] = &f->cr;
+ emc_hits++;
+ hwol_emc_smc_hitmask |= (1 << i);
+ continue;
+ }
+ }
+
+ if (smc_enabled) {
+ f = smc_lookup_single(pmd, packet, key);
+ if (f) {
+ rules[i] = &f->cr;
+ smc_hits++;
+ smc_hitmask |= (1 << i);
+ continue;
+ }
+ }
+
+ /* The flow pointer was not found in HWOL/EMC/SMC, so add it to the
+ * dpcls input keys array for batch lookup later.
+ */
+ key_ptrs[dpcls_key_idx] = &keys[i];
+ dpcls_key_idx++;
+ }
+
+ hwol_emc_smc_hitmask |= smc_hitmask;
+ uint32_t hwol_emc_smc_missmask = ~hwol_emc_smc_hitmask;
+
+ /* DPCLS handles any packets missed by HWOL/EMC/SMC. It operates on the
+ * key_ptrs[] for input miniflows to match, storing results in the
+ * dpcls_rules[] array.
+ */
+ if (dpcls_key_idx > 0) {
+ struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+ if (OVS_UNLIKELY(!cls)) {
+ return -1;
+ }
+ bool any_miss =
+ !dpcls_lookup(cls, (const struct netdev_flow_key **) key_ptrs,
+ dpcls_rules, dpcls_key_idx, NULL);
+ if (OVS_UNLIKELY(any_miss)) {
+ return -1;
+ }
+
+ /* Merge DPCLS rules and HWOL/EMC/SMC rules. */
+ uint32_t dpcls_idx = 0;
+ for (int i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) {
+ /* Indexing here is somewhat complicated due to DPCLS output rule
+ * load index depending on the hitmask of HWOL/EMC/SMC. More
+ * packets from HWOL/EMC/SMC bitmask means less DPCLS rules are
+ * used.
+ */
+ __m512i v_cache_rules = _mm512_loadu_si512(&rules[i]);
+ __m512i v_merged_rules =
+ _mm512_mask_expandloadu_epi64(v_cache_rules,
+ ~hwol_emc_smc_hitmask,
+ &dpcls_rules[dpcls_idx]);
+ _mm512_storeu_si512(&rules[i], v_merged_rules);
+
+ /* Update DPCLS load index and bitmask for HWOL/EMC/SMC hits.
+ * There are NUM_U64_IN_ZMM_REG output pointers per register,
+ * subtract the HWOL/EMC/SMC lanes equals the number of DPCLS rules
+ * consumed.
+ */
+ uint32_t hitmask_FF = (hwol_emc_smc_hitmask & 0xFF);
+ dpcls_idx += NUM_U64_IN_ZMM_REG - __builtin_popcountll(hitmask_FF);
+ hwol_emc_smc_hitmask =
+ (hwol_emc_smc_hitmask >> NUM_U64_IN_ZMM_REG);
+ }
+ }
+
+ /* At this point we have a 1:1 pkt to rules mapping, so update EMC/SMC
+ * if required.
+ */
+ /* Insert SMC and DPCLS hits into EMC. */
+ if (emc_enabled) {
+ uint32_t emc_insert_mask = smc_hitmask | hwol_emc_smc_missmask;
+ emc_insert_mask &= lookup_pkts_bitmask;
+ emc_probabilistic_insert_batch(pmd, keys, &rules[0], emc_insert_mask);
+ }
+ /* Insert DPCLS hits into SMC. */
+ if (smc_enabled) {
+ uint32_t smc_insert_mask = hwol_emc_smc_missmask;
+ smc_insert_mask &= lookup_pkts_bitmask;
+ smc_insert_batch(pmd, keys, &rules[0], smc_insert_mask);
+ }
+
+ /* At this point we don't return error anymore, so commit stats here. */
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size);
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits);
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits);
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT,
+ dpcls_key_idx);
+ pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP,
+ dpcls_key_idx);
+
+ /* Initialize the "Action Batch" for each flow handled below. */
+ struct dp_packet_batch action_batch;
+ action_batch.trunc = 0;
+
+ while (lookup_pkts_bitmask) {
+ uint32_t rule_pkt_idx = raw_ctz(lookup_pkts_bitmask);
+ uint64_t needle = (uintptr_t) rules[rule_pkt_idx];
+
+ /* Parallel compare NUM_U64_IN_ZMM_REG flow* 's to the needle, create a
+ * bitmask.
+ */
+ uint32_t batch_bitmask = 0;
+ for (uint32_t j = 0; j < NETDEV_MAX_BURST; j += NUM_U64_IN_ZMM_REG) {
+ /* Pre-calculate store addr. */
+ uint32_t num_pkts_in_batch = __builtin_popcountll(batch_bitmask);
+ void *store_addr = &action_batch.packets[num_pkts_in_batch];
+
+ /* Search for identical flow* in burst, update bitmask. */
+ __m512i v_needle = _mm512_set1_epi64(needle);
+ __m512i v_hay = _mm512_loadu_si512(&rules[j]);
+ __mmask8 k_cmp_bits = _mm512_cmpeq_epi64_mask(v_needle, v_hay);
+ uint32_t cmp_bits = k_cmp_bits;
+ batch_bitmask |= cmp_bits << j;
+
+ /* Compress and store the batched packets. */
+ struct dp_packet **packets_ptrs = &packets->packets[j];
+ __m512i v_pkt_ptrs = _mm512_loadu_si512(packets_ptrs);
+ _mm512_mask_compressstoreu_epi64(store_addr, cmp_bits, v_pkt_ptrs);
+ }
+
+ /* Strip all packets in this batch from the lookup_pkts_bitmask. */
+ lookup_pkts_bitmask &= (~batch_bitmask);
+ action_batch.count = __builtin_popcountll(batch_bitmask);
+
+ /* Loop over all packets in this batch, to gather the byte and tcp_flag
+ * values, and pass them to the execute function. It would be nice to
+ * optimize this away, however it is not easy to refactor in dpif.
+ */
+ uint32_t bytes = 0;
+ uint16_t tcp_flags = 0;
+ uint32_t bitmask_iter = batch_bitmask;
+ for (int i = 0; i < action_batch.count; i++) {
+ uint32_t idx = raw_ctz(bitmask_iter);
+ bitmask_iter = _blsr_u64(bitmask_iter);
+
+ bytes += pkt_meta[idx].bytes;
+ tcp_flags |= pkt_meta[idx].tcp_flags;
+ }
+
+ dp_netdev_batch_execute(pmd, &action_batch, rules[rule_pkt_idx],
+ bytes, tcp_flags);
+ }
+
+ return 0;
+}
+
+#endif
+#endif
@@ -81,6 +81,14 @@ extern "C" {
#define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \
DEFAULT_EM_FLOW_INSERT_INV_PROB)
+/* Forward declaration for SMC function prototype that requires access to
+ * 'struct dp_netdev_pmd_thread'. */
+struct dp_netdev_pmd_thread;
+
+/* Forward declaration for EMC and SMC batch insert function prototypes that
+ * require access to 'struct dpcls_rule'. */
+struct dpcls_rule;
+
struct emc_entry {
struct dp_netdev_flow *flow;
struct netdev_flow_key key; /* key.hash used for emc hash value. */
@@ -156,6 +164,23 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
return NULL;
}
+/* Insert a batch of keys/flows into the EMC and SMC caches. */
+void
+emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *keys,
+ struct dpcls_rule **rules,
+ uint32_t emc_insert_mask);
+
+void
+smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *keys,
+ struct dpcls_rule **rules,
+ uint32_t smc_insert_mask);
+
+struct dp_netdev_flow *
+smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet *packet,
+ struct netdev_flow_key *key);
#ifdef __cplusplus
}
@@ -33,6 +33,7 @@ extern "C" {
/* Forward declaration for lookup_func typedef. */
struct dpcls_subtable;
struct dpcls_rule;
+struct dpcls;
/* Must be public as it is instantiated in subtable struct below. */
struct netdev_flow_key {
@@ -121,6 +122,12 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet *packet,
return hash;
}
+/* Allow other implementations to call dpcls_lookup() for subtable search. */
+bool
+dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
+ struct dpcls_rule **rules, const size_t cnt,
+ int *num_lookups_p);
+
#ifdef __cplusplus
}
#endif
new file mode 100644
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_PRIVATE_DPIF_H
+#define DPIF_NETDEV_PRIVATE_DPIF_H 1
+
+#include "openvswitch/types.h"
+
+/* Forward declarations to avoid including files. */
+struct dp_netdev_pmd_thread;
+struct dp_packet_batch;
+
+/* Available DPIF implementations below. */
+int32_t
+dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ odp_port_t in_port);
+
+#endif /* netdev-private.h */
@@ -21,6 +21,7 @@
#include "dpif.h"
#include "dpif-netdev-perf.h"
#include "dpif-netdev-private-dfc.h"
+#include "dpif-netdev-private-dpif.h"
#include <stdbool.h>
#include <stdint.h>
@@ -45,14 +46,19 @@ struct dp_netdev_pmd_thread_ctx {
struct dp_netdev_rxq *last_rxq;
/* EMC insertion probability context for the current processing cycle. */
uint32_t emc_insert_min;
+ /* Enable the SMC cache from ovsdb config. */
+ bool smc_enable_db;
};
/* Forward declaration for typedef. */
struct dp_netdev_pmd_thread;
-typedef void (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
- struct dp_packet_batch *packets,
- odp_port_t port_no);
+/* Typedef for DPIF functions.
+ * Returns a bitmask of packets to handle, possibly including upcall/misses.
+ */
+typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ odp_port_t port_no);
/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
* the performance overhead of interrupt processing. Therefore netdev can
@@ -111,6 +117,9 @@ struct dp_netdev_pmd_thread {
/* Function pointer to call for dp_netdev_input() functionality. */
ATOMIC(dp_netdev_input_func) netdev_input_func;
+ /* Pointer for per-DPIF implementation scratch space. */
+ void *netdev_input_func_userdata;
+
struct seq *reload_seq;
uint64_t last_reload_seq;
@@ -31,4 +31,23 @@
#include "dpif-netdev-private-dfc.h"
#include "dpif-netdev-private-thread.h"
-#endif /* netdev-private.h */
+/* Allow other implementations to lookup the DPCLS instances. */
+struct dpcls *
+dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
+ odp_port_t in_port);
+
+/* Allow other implementations to execute actions on a batch. */
+void
+dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ struct dpcls_rule *rule,
+ uint32_t bytes,
+ uint16_t tcp_flags);
+
+int
+dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
+ odp_port_t port_no,
+ struct dp_packet *packet,
+ struct dp_netdev_flow **flow);
+
+#endif /* dpif-netdev-private.h */
@@ -182,10 +182,6 @@ static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls);
static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
const struct netdev_flow_key *mask);
static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
-static bool dpcls_lookup(struct dpcls *cls,
- const struct netdev_flow_key *keys[],
- struct dpcls_rule **rules, size_t cnt,
- int *num_lookups_p);
/* Set of supported meter flags */
#define DP_SUPPORTED_METER_FLAGS_MASK \
@@ -473,7 +469,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
const struct flow *flow,
const struct nlattr *actions,
size_t actions_len);
-static void dp_netdev_input(struct dp_netdev_pmd_thread *,
+static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *, odp_port_t port_no);
static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
struct dp_packet_batch *);
@@ -545,7 +541,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
bool purge);
static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
struct tx_port *tx);
-static inline struct dpcls *
+inline struct dpcls *
dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
odp_port_t in_port);
@@ -1935,7 +1931,7 @@ void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
}
}
-static inline struct dpcls *
+inline struct dpcls *
dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
odp_port_t in_port)
{
@@ -2767,13 +2763,46 @@ smc_insert(struct dp_netdev_pmd_thread *pmd,
bucket->flow_idx[i] = index;
}
+inline void
+emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *keys,
+ struct dpcls_rule **rules,
+ uint32_t emc_insert_mask)
+{
+ while (emc_insert_mask) {
+ uint32_t i = raw_ctz(emc_insert_mask);
+ emc_insert_mask &= emc_insert_mask - 1;
+ /* Get the require parameters for EMC/SMC from the rule */
+ struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
+ /* Insert the key into EMC/SMC. */
+ emc_probabilistic_insert(pmd, &keys[i], flow);
+ }
+}
+
+inline void
+smc_insert_batch(struct dp_netdev_pmd_thread *pmd,
+ const struct netdev_flow_key *keys,
+ struct dpcls_rule **rules,
+ uint32_t smc_insert_mask)
+{
+ while (smc_insert_mask) {
+ uint32_t i = raw_ctz(smc_insert_mask);
+ smc_insert_mask &= smc_insert_mask - 1;
+ /* Get the require parameters for EMC/SMC from the rule */
+ struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]);
+ uint32_t hash = dp_netdev_flow_hash(&flow->ufid);
+ /* Insert the key into EMC/SMC. */
+ smc_insert(pmd, &keys[i], hash);
+ }
+}
+
static struct dp_netdev_flow *
dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd,
const struct netdev_flow_key *key,
int *lookup_num_p)
{
struct dpcls *cls;
- struct dpcls_rule *rule;
+ struct dpcls_rule *rule = NULL;
odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(&key->mf,
in_port.odp_port));
struct dp_netdev_flow *netdev_flow = NULL;
@@ -4288,7 +4317,10 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
}
/* Process packet batch. */
- pmd->netdev_input_func(pmd, &batch, port_no);
+ int ret = pmd->netdev_input_func(pmd, &batch, port_no);
+ if (ret) {
+ dp_netdev_input(pmd, &batch, port_no);
+ }
/* Assign processing cycles to rx queue. */
cycles = cycle_timer_stop(&pmd->perf_stats, &timer);
@@ -5306,6 +5338,8 @@ dpif_netdev_run(struct dpif *dpif)
non_pmd->ctx.emc_insert_min = 0;
}
+ non_pmd->ctx.smc_enable_db = dp->smc_enable_db;
+
for (i = 0; i < port->n_rxq; i++) {
if (!netdev_rxq_enabled(port->rxqs[i].rx)) {
@@ -5577,6 +5611,8 @@ reload:
pmd->ctx.emc_insert_min = 0;
}
+ pmd->ctx.smc_enable_db = pmd->dp->smc_enable_db;
+
process_packets =
dp_netdev_process_rxq_port(pmd, poll_list[i].rxq,
poll_list[i].port_no);
@@ -6474,6 +6510,24 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
actions->actions, actions->size);
}
+void
+dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets,
+ struct dpcls_rule *rule,
+ uint32_t bytes,
+ uint16_t tcp_flags)
+{
+ /* Gets action* from the rule. */
+ struct dp_netdev_flow *flow = dp_netdev_flow_cast(rule);
+ struct dp_netdev_actions *actions = dp_netdev_flow_get_actions(flow);
+
+ dp_netdev_flow_used(flow, dp_packet_batch_size(packets), bytes,
+ tcp_flags, pmd->ctx.now / 1000);
+ const uint32_t steal = 1;
+ dp_netdev_execute_actions(pmd, packets, steal, &flow->flow,
+ actions->actions, actions->size);
+}
+
static inline void
dp_netdev_queue_batches(struct dp_packet *pkt,
struct dp_netdev_flow *flow, uint16_t tcp_flags,
@@ -6578,10 +6632,34 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd,
pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit);
}
+struct dp_netdev_flow *
+smc_lookup_single(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet *packet,
+ struct netdev_flow_key *key)
+{
+ const struct cmap_node *flow_node = smc_entry_get(pmd, key->hash);
+
+ if (OVS_LIKELY(flow_node != NULL)) {
+ struct dp_netdev_flow *flow = NULL;
+
+ CMAP_NODE_FOR_EACH (flow, node, flow_node) {
+ /* Since we dont have per-port megaflow to check the port
+ * number, we need to verify that the input ports match. */
+ if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key) &&
+ flow->flow.in_port.odp_port == packet->md.in_port.odp_port)) {
+
+ return (void *) flow;
+ }
+ }
+ }
+
+ return NULL;
+}
+
static struct tx_port * pmd_send_port_cache_lookup(
const struct dp_netdev_pmd_thread *pmd, odp_port_t port_no);
-static inline int
+inline int
dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd,
odp_port_t port_no,
struct dp_packet *packet,
@@ -7022,12 +7100,13 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
}
}
-static void
+static int32_t
dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
struct dp_packet_batch *packets,
odp_port_t port_no)
{
dp_netdev_input__(pmd, packets, false, port_no);
+ return 0;
}
static void
@@ -8478,7 +8557,7 @@ dpcls_flow_key_gen_masks(const struct netdev_flow_key *tbl,
/* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit
* in 'mask' the values in 'key' and 'target' are the same. */
-bool
+inline bool
dpcls_rule_matches_key(const struct dpcls_rule *rule,
const struct netdev_flow_key *target)
{
@@ -8504,7 +8583,7 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule,
* priorities, instead returning any rule which matches the flow.
*
* Returns true if all miniflows found a corresponding rule. */
-static bool
+bool
dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[],
struct dpcls_rule **rules, const size_t cnt,
int *num_lookups_p)