Message ID | 1442521785-32423-1-git-send-email-zoltan.kiss@linaro.org |
---|---|
State | Deferred |
Headers | show |
I tested the patch, but I wasn't able to reproduce your measurements. On my test setup I noticed no difference in throughput for different packet sizes/flow tables. Could you describe your setup in more details? I'd be happy to improve prefetching if it a simple change like the this. Thanks, Daniele On 17/09/2015 21:29, "Zoltan Kiss" <zoltan.kiss@linaro.org> wrote: >It's better to have it in the cache as soon as possible. On my test setup >it >meant a 0.7 Mpps increase. > >Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org> > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index 72e5653..3312cc0 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, >struct dp_packet **packets, > continue; > } > >- if (i != cnt - 1) { >- /* Prefetch next packet data */ >- OVS_PREFETCH(dp_packet_data(packets[i+1])); >- } >- > miniflow_extract(packets[i], &key.mf); > key.len = 0; /* Not computed yet. */ > key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); >diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >index e4e3d2c..c3c7ec0 100644 >--- a/lib/netdev-dpdk.c >+++ b/lib/netdev-dpdk.c >@@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, >struct dp_packet **packets, > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); > struct netdev *netdev = rx->up.netdev; > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >- int nb_rx; >+ int nb_rx, i; > > /* There is only one tx queue for this core. Do not flush other > * queues. >@@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, >struct dp_packet **packets, > return EAGAIN; > } > >+ for (i = 0; i < nb_rx; i++) >+ OVS_PREFETCH(dp_packet_data(packets[i])); >+ > *c = nb_rx; > > return 0; > >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Sm >B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=3RVqArIQwG9h7SC_5ZdxmrRZ3p0soQ >NTcUPJwi3-ZZA&s=Q9jC4I1Pmb4XizqudZUy0cMZBgHvmlSGcZerBzZW-Ig&e=
Hi, I've used an Intel 82599ES card with the ixgbe vector PMD. The DPDK drivers generally not very consistent about prefetching the header, e.g. the other ixgbe receive functions do it, but the vector one doesn't. And I used a single flow of 64 bytes UDP packets. Regards, Zoltan On 25/09/15 09:44, Daniele Di Proietto wrote: > I tested the patch, but I wasn't able to reproduce your measurements. > > On my test setup I noticed no difference in throughput for different packet > sizes/flow tables. > > Could you describe your setup in more details? > > I'd be happy to improve prefetching if it a simple change like the this. > > Thanks, > > Daniele > > On 17/09/2015 21:29, "Zoltan Kiss" <zoltan.kiss@linaro.org> wrote: > >> It's better to have it in the cache as soon as possible. On my test setup >> it >> meant a 0.7 Mpps increase. >> >> Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org> >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 72e5653..3312cc0 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, >> struct dp_packet **packets, >> continue; >> } >> >> - if (i != cnt - 1) { >> - /* Prefetch next packet data */ >> - OVS_PREFETCH(dp_packet_data(packets[i+1])); >> - } >> - >> miniflow_extract(packets[i], &key.mf); >> key.len = 0; /* Not computed yet. */ >> key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index e4e3d2c..c3c7ec0 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, >> struct dp_packet **packets, >> struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); >> struct netdev *netdev = rx->up.netdev; >> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >> - int nb_rx; >> + int nb_rx, i; >> >> /* There is only one tx queue for this core. Do not flush other >> * queues. >> @@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, >> struct dp_packet **packets, >> return EAGAIN; >> } >> >> + for (i = 0; i < nb_rx; i++) >> + OVS_PREFETCH(dp_packet_data(packets[i])); >> + >> *c = nb_rx; >> >> return 0; >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >> n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Sm >> B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=3RVqArIQwG9h7SC_5ZdxmrRZ3p0soQ >> NTcUPJwi3-ZZA&s=Q9jC4I1Pmb4XizqudZUy0cMZBgHvmlSGcZerBzZW-Ig&e=
> > I tested the patch, but I wasn't able to reproduce your measurements. I was able to reproduce a slight performance improvement with this patch for single-flow uni-directional 64B traffic - up about 0.3Mpps for me. Thanks, Ciara > > On my test setup I noticed no difference in throughput for different packet > sizes/flow tables. > > Could you describe your setup in more details? > > I'd be happy to improve prefetching if it a simple change like the this. > > Thanks, > > Daniele > > On 17/09/2015 21:29, "Zoltan Kiss" <zoltan.kiss@linaro.org> wrote: > > >It's better to have it in the cache as soon as possible. On my test setup > >it > >meant a 0.7 Mpps increase. > > > >Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org> > > > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > >index 72e5653..3312cc0 100644 > >--- a/lib/dpif-netdev.c > >+++ b/lib/dpif-netdev.c > >@@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread > *pmd, > >struct dp_packet **packets, > > continue; > > } > > > >- if (i != cnt - 1) { > >- /* Prefetch next packet data */ > >- OVS_PREFETCH(dp_packet_data(packets[i+1])); > >- } > >- > > miniflow_extract(packets[i], &key.mf); > > key.len = 0; /* Not computed yet. */ > > key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); > >diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >index e4e3d2c..c3c7ec0 100644 > >--- a/lib/netdev-dpdk.c > >+++ b/lib/netdev-dpdk.c > >@@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, > >struct dp_packet **packets, > > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); > > struct netdev *netdev = rx->up.netdev; > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > >- int nb_rx; > >+ int nb_rx, i; > > > > /* There is only one tx queue for this core. Do not flush other > > * queues. > >@@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, > >struct dp_packet **packets, > > return EAGAIN; > > } > > > >+ for (i = 0; i < nb_rx; i++) > >+ OVS_PREFETCH(dp_packet_data(packets[i])); > >+ > > *c = nb_rx; > > > > return 0; > > > >_______________________________________________ > >dev mailing list > >dev@openvswitch.org > >https://urldefense.proofpoint.com/v2/url?u=http- > 3A__openvswitch.org_mailma > >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw- > YihVMNtXt-uEs&r=Sm > >B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=3RVqArIQwG9h7SC > _5ZdxmrRZ3p0soQ > >NTcUPJwi3-ZZA&s=Q9jC4I1Pmb4XizqudZUy0cMZBgHvmlSGcZerBzZW- > Ig&e= > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
On Thu, Sep 17, 2015 at 09:29:45PM +0100, Zoltan Kiss wrote: > It's better to have it in the cache as soon as possible. On my test setup it > meant a 0.7 Mpps increase. > > Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org> > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 72e5653..3312cc0 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, > continue; > } > > - if (i != cnt - 1) { > - /* Prefetch next packet data */ > - OVS_PREFETCH(dp_packet_data(packets[i+1])); > - } > - > miniflow_extract(packets[i], &key.mf); > key.len = 0; /* Not computed yet. */ > key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e4e3d2c..c3c7ec0 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); > struct netdev *netdev = rx->up.netdev; > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > - int nb_rx; > + int nb_rx, i; > > /* There is only one tx queue for this core. Do not flush other > * queues. > @@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, > return EAGAIN; > } > > + for (i = 0; i < nb_rx; i++) > + OVS_PREFETCH(dp_packet_data(packets[i])); > + Please use {} even for single statements to follow the coding style. Do you know if removing the prefetch completely helps? fbl > *c = nb_rx; > > return 0; > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 72e5653..3312cc0 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet **packets, continue; } - if (i != cnt - 1) { - /* Prefetch next packet data */ - OVS_PREFETCH(dp_packet_data(packets[i+1])); - } - miniflow_extract(packets[i], &key.mf); key.len = 0; /* Not computed yet. */ key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index e4e3d2c..c3c7ec0 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); struct netdev *netdev = rx->up.netdev; struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - int nb_rx; + int nb_rx, i; /* There is only one tx queue for this core. Do not flush other * queues. @@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets, return EAGAIN; } + for (i = 0; i < nb_rx; i++) + OVS_PREFETCH(dp_packet_data(packets[i])); + *c = nb_rx; return 0;
It's better to have it in the cache as soon as possible. On my test setup it meant a 0.7 Mpps increase. Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org>