[ovs-dev,v16,03/10] dpif-avx512: Add ISA implementation of dpif.

Message ID	20210709155824.506447-4-harry.van.haaren@intel.com
State	Accepted
Headers	show Return-Path: <ovs-dev-bounces@openvswitch.org> From: Harry van Haaren <harry.van.haaren@intel.com> To: ovs-dev@openvswitch.org Date: Fri, 9 Jul 2021 15:58:17 +0000 Message-Id: <20210709155824.506447-4-harry.van.haaren@intel.com> In-Reply-To: <20210709155824.506447-1-harry.van.haaren@intel.com> References: <20210708140240.61172-1-cian.ferriter@intel.com> <20210709155824.506447-1-harry.van.haaren@intel.com> MIME-Version: 1.0 Cc: i.maximets@ovn.org, fbl@sysclose.org, Kumar Amber <kumar.amber@intel.com> Subject: [ovs-dev] [PATCH v16 03/10] dpif-avx512: Add ISA implementation of dpif. Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ovs-dev-bounces@openvswitch.org Sender: "dev" <ovs-dev-bounces@openvswitch.org>
Series	DPIF Framework + Optimizations \| expand [ovs-dev,v16,00/10] DPIF Framework + Optimizations [ovs-dev,v16,01/10] dpif-netdev: Refactor to multiple header files. [ovs-dev,v16,02/10] dpif-netdev: Add function pointer for netdev input. [ovs-dev,v16,03/10] dpif-avx512: Add ISA implementation of dpif. [ovs-dev,v16,04/10] dpif-netdev: Add command to switch dpif implementation. [ovs-dev,v16,05/10] dpif-netdev: Add command to get dpif implementations. [ovs-dev,v16,06/10] dpif-netdev: Add a partial HWOL PMD statistic. [ovs-dev,v16,07/10] dpif-netdev/dpcls-avx512: Enable 16 block processing. [ovs-dev,v16,08/10] dpif-netdev/dpcls: Specialize more subtable signatures. [ovs-dev,v16,09/10] dpdk: Cache result of CPU ISA checks. [ovs-dev,v16,10/10] dpcls-avx512: Enable avx512 vector popcount instruction.

Context	Check	Description
ovsrobot/apply-robot	success	apply and check: success
ovsrobot/github-robot	success	github build: passed

diff --git a/NEWS b/NEWS index 38ad891b9c..2625cabc85 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,8 @@ Post-v2.15.0 using ct(src=0.0.0.0), the source port will be replaced with another non-colliding port in the ephemeral range (1024, 65535). * Refactor lib/dpif-netdev.c to multiple header files. + * Add avx512 implementation of dpif which can process non recirculated + packets. It supports partial HWOL, EMC, SMC and DPCLS lookups. - ovs-ctl: * New option '--no-record-hostname' to disable hostname configuration in ovsdb on startup. diff --git a/lib/automake.mk b/lib/automake.mk index 8690bfb7a2..432b98e628 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -33,11 +33,13 @@ lib_libopenvswitchavx512_la_CFLAGS = \ -mavx512f \ -mavx512bw \ -mavx512dq \ + -mbmi \ -mbmi2 \ -fPIC \ $(AM_CFLAGS) lib_libopenvswitchavx512_la_SOURCES = \ - lib/dpif-netdev-lookup-avx512-gather.c + lib/dpif-netdev-lookup-avx512-gather.c \ + lib/dpif-netdev-avx512.c lib_libopenvswitchavx512_la_LDFLAGS = \ -static endif @@ -114,6 +116,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/dpif-netdev-private-dfc.c \ lib/dpif-netdev-private-dfc.h \ lib/dpif-netdev-private-dpcls.h \ + lib/dpif-netdev-private-dpif.h \ lib/dpif-netdev-private-flow.h \ lib/dpif-netdev-private-thread.h \ lib/dpif-netdev-private.h \ diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c new file mode 100644 index 0000000000..f59c1bbe0b --- /dev/null +++ b/lib/dpif-netdev-avx512.c @@ -0,0 +1,339 @@ +/* + * Copyright (c) 2021 Intel Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __x86_64__ +/* Sparse cannot handle the AVX512 instructions. */ +#if !defined(__CHECKER__) + +#include <config.h> + +#include "dpif-netdev.h" +#include "dpif-netdev-perf.h" +#include "dpif-netdev-private.h" + +#include <immintrin.h> + +#include "dp-packet.h" +#include "netdev.h" +#include "netdev-offload.h" + +/* Each AVX512 register (zmm register in assembly notation) can contain up to + * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum + * number of miniflow blocks that can be processed in a single pass of the + * AVX512 code at a time. + */ +#define NUM_U64_IN_ZMM_REG (8) + +/* Structure to contain per-packet metadata that must be attributed to the + * dp netdev flow. This is unfortunate to have to track per packet, however + * it's a bit awkward to maintain them in a performant way. This structure + * helps to keep two variables on a single cache line per packet. + */ +struct pkt_flow_meta { + uint16_t bytes; + uint16_t tcp_flags; +}; + +/* Structure of heap allocated memory for DPIF internals. */ +struct dpif_userdata { + OVS_ALIGNED_VAR(CACHE_LINE_SIZE) + struct netdev_flow_key keys[NETDEV_MAX_BURST]; + OVS_ALIGNED_VAR(CACHE_LINE_SIZE) + struct netdev_flow_key *key_ptrs[NETDEV_MAX_BURST]; + OVS_ALIGNED_VAR(CACHE_LINE_SIZE) + struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST]; +}; + +int32_t +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets, + odp_port_t in_port) +{ + /* Allocate DPIF userdata. */ + if (OVS_UNLIKELY(!pmd->netdev_input_func_userdata)) { + pmd->netdev_input_func_userdata = + xmalloc_pagealign(sizeof(struct dpif_userdata)); + } + + struct dpif_userdata *ud = pmd->netdev_input_func_userdata; + struct netdev_flow_key *keys = ud->keys; + struct netdev_flow_key **key_ptrs = ud->key_ptrs; + struct pkt_flow_meta *pkt_meta = ud->pkt_meta; + + /* The AVX512 DPIF implementation handles rules in a way that is optimized + * for reducing data-movement between HWOL/EMC/SMC and DPCLS. This is + * achieved by separating the rule arrays. Bitmasks are kept for each + * packet, indicating if it matched in the HWOL/EMC/SMC array or DPCLS + * array. Later the two arrays are merged by AVX-512 expand instructions. + */ + + /* Stores the computed output: a rule pointer for each packet. */ + /* Used initially for HWOL/EMC/SMC. */ + struct dpcls_rule *rules[NETDEV_MAX_BURST]; + /* Used for DPCLS. */ + struct dpcls_rule *dpcls_rules[NETDEV_MAX_BURST]; + + uint32_t dpcls_key_idx = 0; + + for (uint32_t i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) { + _mm512_storeu_si512(&rules[i], _mm512_setzero_si512()); + _mm512_storeu_si512(&dpcls_rules[i], _mm512_setzero_si512()); + } + + const size_t batch_size = dp_packet_batch_size(packets); + + /* Prefetch 2 packets ahead when processing. This was found to perform best + * through testing. */ + const uint32_t prefetch_ahead = 2; + const uint32_t initial_prefetch = MIN(prefetch_ahead, batch_size); + for (int i = 0; i < initial_prefetch; i++) { + struct dp_packet *packet = packets->packets[i]; + OVS_PREFETCH(dp_packet_data(packet)); + pkt_metadata_prefetch_init(&packet->md); + } + + /* Check if EMC or SMC are enabled. */ + struct dfc_cache *cache = &pmd->flow_cache; + const uint32_t hwol_enabled = netdev_is_flow_api_enabled(); + const uint32_t emc_enabled = pmd->ctx.emc_insert_min != 0; + const uint32_t smc_enabled = pmd->ctx.smc_enable_db; + + uint32_t emc_hits = 0; + uint32_t smc_hits = 0; + + /* A 1 bit in this mask indicates a hit, so no DPCLS lookup on the pkt. */ + uint32_t hwol_emc_smc_hitmask = 0; + uint32_t smc_hitmask = 0; + + /* The below while loop is based on the 'iter' variable which has a number + * of bits set representing packets that we want to process + * (HWOL->MFEX->EMC->SMC). As each packet is processed, we clear (set to 0) + * the bit representing that packet using '_blsr_u64()'. The + * 'raw_ctz()' will give us the correct index into the 'packets', + * 'pkt_meta', 'keys' and 'rules' arrays. + * + * For one iteration of the while loop, here's some pseudocode as an + * example where 'iter' is represented in binary: + * + * while (iter) { // iter = 1100 + * uint32_t i = raw_ctz(iter); // i = 2 + * iter = _blsr_u64(iter); // iter = 1000 + * // do all processing (HWOL->MFEX->EMC->SMC) + * } + */ + uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1; + uint32_t iter = lookup_pkts_bitmask; + while (iter) { + uint32_t i = raw_ctz(iter); + iter = _blsr_u64(iter); + + if (i + prefetch_ahead < batch_size) { + struct dp_packet **dp_packets = packets->packets; + /* Prefetch next packet data and metadata. */ + OVS_PREFETCH(dp_packet_data(dp_packets[i + prefetch_ahead])); + pkt_metadata_prefetch_init(&dp_packets[i + prefetch_ahead]->md); + } + + /* Get packet pointer from bitmask and packet md. */ + struct dp_packet *packet = packets->packets[i]; + pkt_metadata_init(&packet->md, in_port); + + struct dp_netdev_flow *f = NULL; + + /* Check for a partial hardware offload match. */ + if (hwol_enabled) { + if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, in_port, packet, &f))) { + /* Packet restoration failed and it was dropped, do not + * continue processing. */ + continue; + } + if (f) { + rules[i] = &f->cr; + pkt_meta[i].tcp_flags = parse_tcp_flags(packet); + pkt_meta[i].bytes = dp_packet_size(packet); + hwol_emc_smc_hitmask |= (1 << i); + continue; + } + } + + /* Do miniflow extract into keys. */ + struct netdev_flow_key *key = &keys[i]; + miniflow_extract(packet, &key->mf); + + /* Cache TCP and byte values for all packets. */ + pkt_meta[i].bytes = dp_packet_size(packet); + pkt_meta[i].tcp_flags = miniflow_get_tcp_flags(&key->mf); + + key->len = netdev_flow_key_size(miniflow_n_values(&key->mf)); + key->hash = dpif_netdev_packet_get_rss_hash_orig_pkt(packet, &key->mf); + + if (emc_enabled) { + f = emc_lookup(&cache->emc_cache, key); + + if (f) { + rules[i] = &f->cr; + emc_hits++; + hwol_emc_smc_hitmask |= (1 << i); + continue; + } + } + + if (smc_enabled) { + f = smc_lookup_single(pmd, packet, key); + if (f) { + rules[i] = &f->cr; + smc_hits++; + smc_hitmask |= (1 << i); + continue; + } + } + + /* The flow pointer was not found in HWOL/EMC/SMC, so add it to the + * dpcls input keys array for batch lookup later. + */ + key_ptrs[dpcls_key_idx] = &keys[i]; + dpcls_key_idx++; + } + + hwol_emc_smc_hitmask |= smc_hitmask; + uint32_t hwol_emc_smc_missmask = ~hwol_emc_smc_hitmask; + + /* DPCLS handles any packets missed by HWOL/EMC/SMC. It operates on the + * key_ptrs[] for input miniflows to match, storing results in the + * dpcls_rules[] array. + */ + if (dpcls_key_idx > 0) { + struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); + if (OVS_UNLIKELY(!cls)) { + return -1; + } + bool any_miss = + !dpcls_lookup(cls, (const struct netdev_flow_key **) key_ptrs, + dpcls_rules, dpcls_key_idx, NULL); + if (OVS_UNLIKELY(any_miss)) { + return -1; + } + + /* Merge DPCLS rules and HWOL/EMC/SMC rules. */ + uint32_t dpcls_idx = 0; + for (int i = 0; i < NETDEV_MAX_BURST; i += NUM_U64_IN_ZMM_REG) { + /* Indexing here is somewhat complicated due to DPCLS output rule + * load index depending on the hitmask of HWOL/EMC/SMC. More + * packets from HWOL/EMC/SMC bitmask means less DPCLS rules are + * used. + */ + __m512i v_cache_rules = _mm512_loadu_si512(&rules[i]); + __m512i v_merged_rules = + _mm512_mask_expandloadu_epi64(v_cache_rules, + ~hwol_emc_smc_hitmask, + &dpcls_rules[dpcls_idx]); + _mm512_storeu_si512(&rules[i], v_merged_rules); + + /* Update DPCLS load index and bitmask for HWOL/EMC/SMC hits. + * There are NUM_U64_IN_ZMM_REG output pointers per register, + * subtract the HWOL/EMC/SMC lanes equals the number of DPCLS rules + * consumed. + */ + uint32_t hitmask_FF = (hwol_emc_smc_hitmask & 0xFF); + dpcls_idx += NUM_U64_IN_ZMM_REG - __builtin_popcountll(hitmask_FF); + hwol_emc_smc_hitmask = + (hwol_emc_smc_hitmask >> NUM_U64_IN_ZMM_REG); + } + } + + /* At this point we have a 1:1 pkt to rules mapping, so update EMC/SMC + * if required. + */ + /* Insert SMC and DPCLS hits into EMC. */ + if (emc_enabled) { + uint32_t emc_insert_mask = smc_hitmask | hwol_emc_smc_missmask; + emc_insert_mask &= lookup_pkts_bitmask; + emc_probabilistic_insert_batch(pmd, keys, &rules[0], emc_insert_mask); + } + /* Insert DPCLS hits into SMC. */ + if (smc_enabled) { + uint32_t smc_insert_mask = hwol_emc_smc_missmask; + smc_insert_mask &= lookup_pkts_bitmask; + smc_insert_batch(pmd, keys, &rules[0], smc_insert_mask); + } + + /* At this point we don't return error anymore, so commit stats here. */ + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_RECV, batch_size); + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_EXACT_HIT, emc_hits); + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, smc_hits); + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_HIT, + dpcls_key_idx); + pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_MASKED_LOOKUP, + dpcls_key_idx); + + /* Initialize the "Action Batch" for each flow handled below. */ + struct dp_packet_batch action_batch; + action_batch.trunc = 0; + + while (lookup_pkts_bitmask) { + uint32_t rule_pkt_idx = raw_ctz(lookup_pkts_bitmask); + uint64_t needle = (uintptr_t) rules[rule_pkt_idx]; + + /* Parallel compare NUM_U64_IN_ZMM_REG flow* 's to the needle, create a + * bitmask. + */ + uint32_t batch_bitmask = 0; + for (uint32_t j = 0; j < NETDEV_MAX_BURST; j += NUM_U64_IN_ZMM_REG) { + /* Pre-calculate store addr. */ + uint32_t num_pkts_in_batch = __builtin_popcountll(batch_bitmask); + void *store_addr = &action_batch.packets[num_pkts_in_batch]; + + /* Search for identical flow* in burst, update bitmask. */ + __m512i v_needle = _mm512_set1_epi64(needle); + __m512i v_hay = _mm512_loadu_si512(&rules[j]); + __mmask8 k_cmp_bits = _mm512_cmpeq_epi64_mask(v_needle, v_hay); + uint32_t cmp_bits = k_cmp_bits; + batch_bitmask |= cmp_bits << j; + + /* Compress and store the batched packets. */ + struct dp_packet **packets_ptrs = &packets->packets[j]; + __m512i v_pkt_ptrs = _mm512_loadu_si512(packets_ptrs); + _mm512_mask_compressstoreu_epi64(store_addr, cmp_bits, v_pkt_ptrs); + } + + /* Strip all packets in this batch from the lookup_pkts_bitmask. */ + lookup_pkts_bitmask &= (~batch_bitmask); + action_batch.count = __builtin_popcountll(batch_bitmask); + + /* Loop over all packets in this batch, to gather the byte and tcp_flag + * values, and pass them to the execute function. It would be nice to + * optimize this away, however it is not easy to refactor in dpif. + */ + uint32_t bytes = 0; + uint16_t tcp_flags = 0; + uint32_t bitmask_iter = batch_bitmask; + for (int i = 0; i < action_batch.count; i++) { + uint32_t idx = raw_ctz(bitmask_iter); + bitmask_iter = _blsr_u64(bitmask_iter); + + bytes += pkt_meta[idx].bytes; + tcp_flags |= pkt_meta[idx].tcp_flags; + } + + dp_netdev_batch_execute(pmd, &action_batch, rules[rule_pkt_idx], + bytes, tcp_flags); + } + + return 0; +} + +#endif +#endif diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h index 6f15703553..92092ebec9 100644 --- a/lib/dpif-netdev-private-dfc.h +++ b/lib/dpif-netdev-private-dfc.h @@ -81,6 +81,14 @@ extern "C" { #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \ DEFAULT_EM_FLOW_INSERT_INV_PROB) +/* Forward declaration for SMC function prototype that requires access to + * 'struct dp_netdev_pmd_thread'. */ +struct dp_netdev_pmd_thread; + +/* Forward declaration for EMC and SMC batch insert function prototypes that + * require access to 'struct dpcls_rule'. */ +struct dpcls_rule; + struct emc_entry { struct dp_netdev_flow *flow; struct netdev_flow_key key; /* key.hash used for emc hash value. */ @@ -156,6 +164,23 @@ emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key) return NULL; } +/* Insert a batch of keys/flows into the EMC and SMC caches. */ +void +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *keys, + struct dpcls_rule **rules, + uint32_t emc_insert_mask); + +void +smc_insert_batch(struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *keys, + struct dpcls_rule **rules, + uint32_t smc_insert_mask); + +struct dp_netdev_flow * +smc_lookup_single(struct dp_netdev_pmd_thread *pmd, + struct dp_packet *packet, + struct netdev_flow_key *key); #ifdef __cplusplus } diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h index dc22431a32..7c4a840cb1 100644 --- a/lib/dpif-netdev-private-dpcls.h +++ b/lib/dpif-netdev-private-dpcls.h @@ -33,6 +33,7 @@ extern "C" { /* Forward declaration for lookup_func typedef. */ struct dpcls_subtable; struct dpcls_rule; +struct dpcls; /* Must be public as it is instantiated in subtable struct below. */ struct netdev_flow_key { @@ -121,6 +122,12 @@ dpif_netdev_packet_get_rss_hash_orig_pkt(struct dp_packet *packet, return hash; } +/* Allow other implementations to call dpcls_lookup() for subtable search. */ +bool +dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[], + struct dpcls_rule **rules, const size_t cnt, + int *num_lookups_p); + #ifdef __cplusplus } #endif diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h new file mode 100644 index 0000000000..bbd719b226 --- /dev/null +++ b/lib/dpif-netdev-private-dpif.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Intel Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPIF_NETDEV_PRIVATE_DPIF_H +#define DPIF_NETDEV_PRIVATE_DPIF_H 1 + +#include "openvswitch/types.h" + +/* Forward declarations to avoid including files. */ +struct dp_netdev_pmd_thread; +struct dp_packet_batch; + +/* Available DPIF implementations below. */ +int32_t +dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets, + odp_port_t in_port); + +#endif /* netdev-private.h */ diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h index d38a7a2c3f..63b99220b7 100644 --- a/lib/dpif-netdev-private-thread.h +++ b/lib/dpif-netdev-private-thread.h @@ -21,6 +21,7 @@ #include "dpif.h" #include "dpif-netdev-perf.h" #include "dpif-netdev-private-dfc.h" +#include "dpif-netdev-private-dpif.h" #include <stdbool.h> #include <stdint.h> @@ -45,14 +46,19 @@ struct dp_netdev_pmd_thread_ctx { struct dp_netdev_rxq *last_rxq; /* EMC insertion probability context for the current processing cycle. */ uint32_t emc_insert_min; + /* Enable the SMC cache from ovsdb config. */ + bool smc_enable_db; }; /* Forward declaration for typedef. */ struct dp_netdev_pmd_thread; -typedef void (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd, - struct dp_packet_batch *packets, - odp_port_t port_no); +/* Typedef for DPIF functions. + * Returns a bitmask of packets to handle, possibly including upcall/misses. + */ +typedef int32_t (*dp_netdev_input_func)(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets, + odp_port_t port_no); /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate * the performance overhead of interrupt processing. Therefore netdev can @@ -111,6 +117,9 @@ struct dp_netdev_pmd_thread { /* Function pointer to call for dp_netdev_input() functionality. */ ATOMIC(dp_netdev_input_func) netdev_input_func; + /* Pointer for per-DPIF implementation scratch space. */ + void *netdev_input_func_userdata; + struct seq *reload_seq; uint64_t last_reload_seq; diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h index d7b6fd7eca..4593649bd1 100644 --- a/lib/dpif-netdev-private.h +++ b/lib/dpif-netdev-private.h @@ -31,4 +31,23 @@ #include "dpif-netdev-private-dfc.h" #include "dpif-netdev-private-thread.h" -#endif /* netdev-private.h */ +/* Allow other implementations to lookup the DPCLS instances. */ +struct dpcls * +dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, + odp_port_t in_port); + +/* Allow other implementations to execute actions on a batch. */ +void +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets, + struct dpcls_rule *rule, + uint32_t bytes, + uint16_t tcp_flags); + +int +dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd, + odp_port_t port_no, + struct dp_packet *packet, + struct dp_netdev_flow **flow); + +#endif /* dpif-netdev-private.h */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ac36ae757b..ed7da5eb11 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -182,10 +182,6 @@ static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls); static void dpcls_insert(struct dpcls *, struct dpcls_rule *, const struct netdev_flow_key *mask); static void dpcls_remove(struct dpcls *, struct dpcls_rule *); -static bool dpcls_lookup(struct dpcls *cls, - const struct netdev_flow_key *keys[], - struct dpcls_rule **rules, size_t cnt, - int *num_lookups_p); /* Set of supported meter flags */ #define DP_SUPPORTED_METER_FLAGS_MASK \ @@ -473,7 +469,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, const struct flow *flow, const struct nlattr *actions, size_t actions_len); -static void dp_netdev_input(struct dp_netdev_pmd_thread *, +static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet_batch *, odp_port_t port_no); static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, struct dp_packet_batch *); @@ -545,7 +541,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, bool purge); static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, struct tx_port *tx); -static inline struct dpcls * +inline struct dpcls * dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port); @@ -1935,7 +1931,7 @@ void dp_netdev_flow_unref(struct dp_netdev_flow *flow) } } -static inline struct dpcls * +inline struct dpcls * dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port) { @@ -2767,13 +2763,46 @@ smc_insert(struct dp_netdev_pmd_thread *pmd, bucket->flow_idx[i] = index; } +inline void +emc_probabilistic_insert_batch(struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *keys, + struct dpcls_rule **rules, + uint32_t emc_insert_mask) +{ + while (emc_insert_mask) { + uint32_t i = raw_ctz(emc_insert_mask); + emc_insert_mask &= emc_insert_mask - 1; + /* Get the require parameters for EMC/SMC from the rule */ + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]); + /* Insert the key into EMC/SMC. */ + emc_probabilistic_insert(pmd, &keys[i], flow); + } +} + +inline void +smc_insert_batch(struct dp_netdev_pmd_thread *pmd, + const struct netdev_flow_key *keys, + struct dpcls_rule **rules, + uint32_t smc_insert_mask) +{ + while (smc_insert_mask) { + uint32_t i = raw_ctz(smc_insert_mask); + smc_insert_mask &= smc_insert_mask - 1; + /* Get the require parameters for EMC/SMC from the rule */ + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rules[i]); + uint32_t hash = dp_netdev_flow_hash(&flow->ufid); + /* Insert the key into EMC/SMC. */ + smc_insert(pmd, &keys[i], hash); + } +} + static struct dp_netdev_flow * dp_netdev_pmd_lookup_flow(struct dp_netdev_pmd_thread *pmd, const struct netdev_flow_key *key, int *lookup_num_p) { struct dpcls *cls; - struct dpcls_rule *rule; + struct dpcls_rule *rule = NULL; odp_port_t in_port = u32_to_odp(MINIFLOW_GET_U32(&key->mf, in_port.odp_port)); struct dp_netdev_flow *netdev_flow = NULL; @@ -4288,7 +4317,10 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, } /* Process packet batch. */ - pmd->netdev_input_func(pmd, &batch, port_no); + int ret = pmd->netdev_input_func(pmd, &batch, port_no); + if (ret) { + dp_netdev_input(pmd, &batch, port_no); + } /* Assign processing cycles to rx queue. */ cycles = cycle_timer_stop(&pmd->perf_stats, &timer); @@ -5306,6 +5338,8 @@ dpif_netdev_run(struct dpif *dpif) non_pmd->ctx.emc_insert_min = 0; } + non_pmd->ctx.smc_enable_db = dp->smc_enable_db; + for (i = 0; i < port->n_rxq; i++) { if (!netdev_rxq_enabled(port->rxqs[i].rx)) { @@ -5577,6 +5611,8 @@ reload: pmd->ctx.emc_insert_min = 0; } + pmd->ctx.smc_enable_db = pmd->dp->smc_enable_db; + process_packets = dp_netdev_process_rxq_port(pmd, poll_list[i].rxq, poll_list[i].port_no); @@ -6474,6 +6510,24 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, actions->actions, actions->size); } +void +dp_netdev_batch_execute(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets, + struct dpcls_rule *rule, + uint32_t bytes, + uint16_t tcp_flags) +{ + /* Gets action* from the rule. */ + struct dp_netdev_flow *flow = dp_netdev_flow_cast(rule); + struct dp_netdev_actions *actions = dp_netdev_flow_get_actions(flow); + + dp_netdev_flow_used(flow, dp_packet_batch_size(packets), bytes, + tcp_flags, pmd->ctx.now / 1000); + const uint32_t steal = 1; + dp_netdev_execute_actions(pmd, packets, steal, &flow->flow, + actions->actions, actions->size); +} + static inline void dp_netdev_queue_batches(struct dp_packet *pkt, struct dp_netdev_flow *flow, uint16_t tcp_flags, @@ -6578,10 +6632,34 @@ smc_lookup_batch(struct dp_netdev_pmd_thread *pmd, pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SMC_HIT, n_smc_hit); } +struct dp_netdev_flow * +smc_lookup_single(struct dp_netdev_pmd_thread *pmd, + struct dp_packet *packet, + struct netdev_flow_key *key) +{ + const struct cmap_node *flow_node = smc_entry_get(pmd, key->hash); + + if (OVS_LIKELY(flow_node != NULL)) { + struct dp_netdev_flow *flow = NULL; + + CMAP_NODE_FOR_EACH (flow, node, flow_node) { + /* Since we dont have per-port megaflow to check the port + * number, we need to verify that the input ports match. */ + if (OVS_LIKELY(dpcls_rule_matches_key(&flow->cr, key) && + flow->flow.in_port.odp_port == packet->md.in_port.odp_port)) { + + return (void *) flow; + } + } + } + + return NULL; +} + static struct tx_port * pmd_send_port_cache_lookup( const struct dp_netdev_pmd_thread *pmd, odp_port_t port_no); -static inline int +inline int dp_netdev_hw_flow(const struct dp_netdev_pmd_thread *pmd, odp_port_t port_no, struct dp_packet *packet, @@ -7022,12 +7100,13 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd, } } -static void +static int32_t dp_netdev_input(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, odp_port_t port_no) { dp_netdev_input__(pmd, packets, false, port_no); + return 0; } static void @@ -8478,7 +8557,7 @@ dpcls_flow_key_gen_masks(const struct netdev_flow_key *tbl, /* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit * in 'mask' the values in 'key' and 'target' are the same. */ -bool +inline bool dpcls_rule_matches_key(const struct dpcls_rule *rule, const struct netdev_flow_key *target) { @@ -8504,7 +8583,7 @@ dpcls_rule_matches_key(const struct dpcls_rule *rule, * priorities, instead returning any rule which matches the flow. * * Returns true if all miniflows found a corresponding rule. */ -static bool +bool dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[], struct dpcls_rule **rules, const size_t cnt, int *num_lookups_p)

[ovs-dev,v16,03/10] dpif-avx512: Add ISA implementation of dpif.

Checks

Commit Message

Patch