@@ -450,6 +450,12 @@ dpdk_init__(const struct smap *ovs_other_config)
/* Finally, register the dpdk classes */
netdev_dpdk_register();
+
+ bool multi_seg_mbufs_enable = smap_get_bool(ovs_other_config, "dpdk-multi-seg-mbufs", false);
+ if (multi_seg_mbufs_enable) {
+ VLOG_INFO("DPDK multi-segment mbufs enabled\n");
+ netdev_dpdk_multi_segment_mbufs_enable();
+ }
}
void
@@ -58,6 +58,7 @@
VLOG_DEFINE_THIS_MODULE(netdev_dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+static bool dpdk_multi_segment_mbufs = false;
#define DPDK_PORT_WATCHDOG_INTERVAL 5
@@ -480,7 +481,7 @@ dpdk_mp_create(int socket_id, int mtu)
* when the number of ports and rxqs that utilize a particular mempool can
* change dynamically at runtime. For now, use this rough heurisitic.
*/
- if (mtu >= ETHER_MTU) {
+ if (mtu >= ETHER_MTU || dpdk_multi_segment_mbufs) {
mp_size = MAX_NB_MBUF;
} else {
mp_size = MIN_NB_MBUF;
@@ -558,17 +559,33 @@ dpdk_mp_put(struct dpdk_mp *dmp)
ovs_mutex_unlock(&dpdk_mp_mutex);
}
-/* Tries to allocate new mempool on requested_socket_id with
- * mbuf size corresponding to requested_mtu.
+/* Tries to configure a mempool for 'dev' on requested socket_id to accommodate
+ * packets of size 'requested_mtu'. The properties of the mempool's elements
+ * are dependent on the value of 'dpdk_multi_segment_mbufs':
+ * - if 'true', then the mempool contains standard-sized mbufs that are chained
+ * together to accommodate packets of size 'requested_mtu'. All ports on the
+ * same socket will share this mempool, irrespective of their MTU.
+ * - if 'false', then a mempool is allocated, the members of which are non-
+ * standard-sized mbufs. Each mbuf in the mempool is large enough to fully
+ * accomdate packets of size 'requested_mtu'.
+ *
* On success new configuration will be applied.
* On error, device will be left unchanged. */
static int
netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
OVS_REQUIRES(dev->mutex)
{
- uint32_t buf_size = dpdk_buf_size(dev->requested_mtu);
+ uint32_t buf_size = 0;
struct dpdk_mp *mp;
+ /* Contiguous mbufs in use - permit oversized mbufs */
+ if (!dpdk_multi_segment_mbufs) {
+ buf_size = dpdk_buf_size(dev->requested_mtu);
+ } else {
+ /* multi-segment mbufs - use standard mbuf size */
+ buf_size = dpdk_buf_size(ETHER_MTU);
+ }
+
mp = dpdk_mp_get(dev->requested_socket_id, FRAME_LEN_TO_MTU(buf_size));
if (!mp) {
VLOG_ERR("Failed to create memory pool for netdev "
@@ -577,7 +594,13 @@ netdev_dpdk_mempool_configure(struct netdev_dpdk *dev)
rte_strerror(rte_errno));
return rte_errno;
} else {
- dpdk_mp_put(dev->dpdk_mp);
+ /* When single-segment mbufs are in use, a new mempool is allocated,
+ * so release the old one. In the case of multi-segment mbufs, the
+ * same mempool is used for all MTUs.
+ */
+ if (!dpdk_multi_segment_mbufs) {
+ dpdk_mp_put(dev->dpdk_mp);
+ }
dev->dpdk_mp = mp;
dev->mtu = dev->requested_mtu;
dev->socket_id = dev->requested_socket_id;
@@ -639,6 +662,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
int diag = 0;
int i;
struct rte_eth_conf conf = port_conf;
+ struct rte_eth_txconf txconf;
if (dev->mtu > ETHER_MTU) {
conf.rxmode.jumbo_frame = 1;
@@ -666,9 +690,21 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
break;
}
+ /* DPDK PMDs typically attempt to use simple or vectorized
+ * transmit functions, neither of which are compatible with
+ * multi-segment mbufs. Ensure that these are disabled in the
+ * multi-segment mbuf case.
+ */
+ if (dpdk_multi_segment_mbufs) {
+ memset(&txconf, 0, sizeof(txconf));
+ txconf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
+ }
+
for (i = 0; i < n_txq; i++) {
diag = rte_eth_tx_queue_setup(dev->port_id, i, dev->txq_size,
- dev->socket_id, NULL);
+ dev->socket_id,
+ dpdk_multi_segment_mbufs ? &txconf
+ : NULL);
if (diag) {
VLOG_INFO("Interface %s txq(%d) setup error: %s",
dev->up.name, i, rte_strerror(-diag));
@@ -3287,6 +3323,12 @@ unlock:
return err;
}
+void
+netdev_dpdk_multi_segment_mbufs_enable(void)
+{
+ dpdk_multi_segment_mbufs = true;
+}
+
#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, \
SET_CONFIG, SET_TX_MULTIQ, SEND, \
GET_CARRIER, GET_STATS, \
@@ -26,6 +26,7 @@ struct dp_packet;
#ifdef DPDK_NETDEV
void netdev_dpdk_register(void);
+void netdev_dpdk_multi_segment_mbufs_enable(void);
void free_dpdk_buf(struct dp_packet *);
#else
@@ -283,6 +283,25 @@
</p>
</column>
+ <column name="other_config" key="dpdk-multi-seg-mbufs"
+ type='{"type": "boolean"}'>
+ <p>
+ Specifies if DPDK uses multi-segment mbufs for handling jumbo frames.
+ </p>
+ <p>
+ If true, DPDK allocates a single mempool for all ports, irrespective
+ of the ports' requested MTU sizes. The elements of this mempool are
+ 'standard'-sized mbufs (typically 2k MB), which may be chained
+ together to accommodate jumbo frames. In this approach, each mbuf
+ typically stores a fragment of the overall jumbo frame.
+ </p>
+ <p>
+ If not specified, defaults to <code>false</code>, in which case, the size
+ of each mbuf within a DPDK port's mempool will be grown to accommodate
+ jumbo frames within a single mbuf.
+ </p>
+ </column>
+
<column name="other_config" key="dpdk-extra"
type='{"type": "string"}'>
<p>