Message ID | 1449300749-50275-1-git-send-email-yankejian@huawei.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
From: yankejian <yankejian@huawei.com> Date: Sat, 5 Dec 2015 15:32:29 +0800 > +#if (PAGE_SIZE < 8192) > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { > + truesize = hnae_buf_size(ring); > + } else { > + truesize = ALIGN(size, L1_CACHE_BYTES); > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > + } > + > +#else > + truesize = ALIGN(size, L1_CACHE_BYTES); > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > +#endif This is not indented properly, and it looks terrible. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote: > From: yankejian <yankejian@huawei.com> > Date: Sat, 5 Dec 2015 15:32:29 +0800 > > > +#if (PAGE_SIZE < 8192) > > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { > > + truesize = hnae_buf_size(ring); > > + } else { > > + truesize = ALIGN(size, L1_CACHE_BYTES); > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > > + } > > + > > +#else > > + truesize = ALIGN(size, L1_CACHE_BYTES); > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > > +#endif > > This is not indented properly, and it looks terrible. And it makes one curious as to why last_offset isn't set in the first block. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/7 11:29, David Miller wrote: > From: yankejian <yankejian@huawei.com> > Date: Sat, 5 Dec 2015 15:32:29 +0800 > >> +#if (PAGE_SIZE < 8192) >> + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { >> + truesize = hnae_buf_size(ring); >> + } else { >> + truesize = ALIGN(size, L1_CACHE_BYTES); >> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >> + } >> + >> +#else >> + truesize = ALIGN(size, L1_CACHE_BYTES); >> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >> +#endif > This is not indented properly, and it looks terrible. > > . > Hi David, Thanks for pointing it out. i will pay attention next time. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/5 15:32, yankejian wrote: > here is the patch raising the performance of XGE by: > 1)changes the way page management method for enet momery, and > 2)reduces the count of rmb, and > 3)adds Memory prefetching Any numbers on how much it boost performance? > Signed-off-by: yankejian <yankejian@huawei.com> > --- > drivers/net/ethernet/hisilicon/hns/hnae.h | 5 +- > drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 1 - > drivers/net/ethernet/hisilicon/hns/hns_enet.c | 79 +++++++++++++++-------- > 3 files changed, 55 insertions(+), 30 deletions(-) > > diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h > index d1f3316..6ca94dc 100644 > --- a/drivers/net/ethernet/hisilicon/hns/hnae.h > +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h > @@ -341,7 +341,8 @@ struct hnae_queue { > void __iomem *io_base; > phys_addr_t phy_base; > struct hnae_ae_dev *dev; /* the device who use this queue */ > - struct hnae_ring rx_ring, tx_ring; > + struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; > + struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp; > struct hnae_handle *handle; > }; > > @@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, > struct hnae_desc_cb *res_cb) > { > struct hnae_buf_ops *bops = ring->q->handle->bops; > - struct hnae_desc_cb tmp_cb = ring->desc_cb[i]; > > bops->unmap_buffer(ring, &ring->desc_cb[i]); > ring->desc_cb[i] = *res_cb; > - *res_cb = tmp_cb; > ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; > ring->desc[i].rx.ipoff_bnum_pid_flag = 0; > } > diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c > index 77c6edb..522b264 100644 > --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c > +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c > @@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask) > else > flag = RCB_INT_FLAG_RX; > > - hns_rcb_int_clr_hw(ring->q, flag); > hns_rcb_int_ctrl_hw(ring->q, flag, mask); > } > > diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c > index cad2663..e2be510 100644 > --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c > +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c > @@ -33,6 +33,7 @@ > > #define RCB_IRQ_NOT_INITED 0 > #define RCB_IRQ_INITED 1 > +#define HNS_BUFFER_SIZE_2048 2048 > > #define BD_MAX_SEND_SIZE 8191 > #define SKB_TMP_LEN(SKB) \ > @@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, > return max_size; > } > > -static void > -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) > +static void hns_nic_reuse_page(struct sk_buff *skb, int i, > + struct hnae_ring *ring, int pull_len, > + struct hnae_desc_cb *desc_cb) > { > + struct hnae_desc *desc; > + int truesize, size; > + int last_offset = 0; > + > + desc = &ring->desc[ring->next_to_clean]; > + size = le16_to_cpu(desc->rx.size); > + > +#if (PAGE_SIZE < 8192) > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { > + truesize = hnae_buf_size(ring); > + } else { > + truesize = ALIGN(size, L1_CACHE_BYTES); > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > + } > + > +#else > + truesize = ALIGN(size, L1_CACHE_BYTES); > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > +#endif > + > + skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, > + size - pull_len, truesize - pull_len); > + > /* avoid re-using remote pages,flag default unreuse */ > if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) { > +#if (PAGE_SIZE < 8192) > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { > + /* if we are only owner of page we can reuse it */ > + if (likely(page_count(desc_cb->priv) == 1)) { > + /* flip page offset to other buffer */ > + desc_cb->page_offset ^= truesize; > + > + desc_cb->reuse_flag = 1; > + /* bump ref count on page before it is given*/ > + get_page(desc_cb->priv); > + } > + return; > + } > +#endif > /* move offset up to the next cache line */ > - desc_cb->page_offset += tsize; > + desc_cb->page_offset += truesize; > > if (desc_cb->page_offset <= last_offset) { > desc_cb->reuse_flag = 1; > @@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, > struct hnae_desc *desc; > struct hnae_desc_cb *desc_cb; > unsigned char *va; > - int bnum, length, size, i, truesize, last_offset; > + int bnum, length, i; > int pull_len; > u32 bnum_flag; > > - last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > desc = &ring->desc[ring->next_to_clean]; > desc_cb = &ring->desc_cb[ring->next_to_clean]; > > @@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, > return -ENOMEM; > } > > + prefetchw(skb->data); > length = le16_to_cpu(desc->rx.pkt_len); > bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); > priv->ops.get_rxd_bnum(bnum_flag, &bnum); > *out_bnum = bnum; > > - /* we will be copying header into skb->data in > - * pskb_may_pull so it is in our interest to prefetch > - * it now to avoid a possible cache miss > - */ > - prefetchw(skb->data); > - > if (length <= HNS_RX_HEAD_SIZE) { > memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); > > @@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, > memcpy(__skb_put(skb, pull_len), va, > ALIGN(pull_len, sizeof(long))); > > - size = le16_to_cpu(desc->rx.size); > - truesize = ALIGN(size, L1_CACHE_BYTES); > - skb_add_rx_frag(skb, 0, desc_cb->priv, > - desc_cb->page_offset + pull_len, > - size - pull_len, truesize - pull_len); > - > - hns_nic_reuse_page(desc_cb, truesize, last_offset); > + hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb); > ring_ptr_move_fw(ring, next_to_clean); > > if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/ > @@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, > for (i = 1; i < bnum; i++) { > desc = &ring->desc[ring->next_to_clean]; > desc_cb = &ring->desc_cb[ring->next_to_clean]; > - size = le16_to_cpu(desc->rx.size); > - truesize = ALIGN(size, L1_CACHE_BYTES); > - skb_add_rx_frag(skb, i, desc_cb->priv, > - desc_cb->page_offset, > - size, truesize); > > - hns_nic_reuse_page(desc_cb, truesize, last_offset); > + hns_nic_reuse_page(skb, i, ring, 0, desc_cb); > ring_ptr_move_fw(ring, next_to_clean); > } > } > @@ -750,9 +772,10 @@ recv: > /* make all data has been write before submit */ > if (recv_pkts < budget) { > ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM); > - rmb(); /*complete read rx ring bd number*/ > + > if (ex_num > clean_count) { > num += ex_num - clean_count; > + rmb(); /*complete read rx ring bd number*/ > goto recv; > } > } > @@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, > > bytes = 0; > pkts = 0; > - while (head != ring->next_to_clean) > + while (head != ring->next_to_clean) { > hns_nic_reclaim_one_desc(ring, &bytes, &pkts); > + /* issue prefetch for next Tx descriptor */ > + prefetch(&ring->desc_cb[ring->next_to_clean]); > + } > > NETIF_TX_UNLOCK(ndev); > > @@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) > ring_data->ring, 0); > > ring_data->fini_process(ring_data); > + return 0; > } > > return clean_complete; > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/7 11:32, Joe Perches wrote: > On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote: >> > From: yankejian <yankejian@huawei.com> >> > Date: Sat, 5 Dec 2015 15:32:29 +0800 >> > >>> > > +#if (PAGE_SIZE < 8192) >>> > > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { >>> > > + truesize = hnae_buf_size(ring); >>> > > + } else { >>> > > + truesize = ALIGN(size, L1_CACHE_BYTES); >>> > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >>> > > + } >>> > > + >>> > > +#else >>> > > + truesize = ALIGN(size, L1_CACHE_BYTES); >>> > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >>> > > +#endif >> > >> > This is not indented properly, and it looks terrible. > And it makes one curious as to why last_offset isn't set > in the first block. Hi Joe, if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function. so it is ignored in the first block. thanks for your suggestion. Best regards, yankejian -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-12-07 at 16:58 +0800, Yankejian (Hackim Yim) wrote: > On 2015/12/7 11:32, Joe Perches wrote: > > On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote: > > > > From: yankejian <yankejian@huawei.com> > > > > Date: Sat, 5 Dec 2015 15:32:29 +0800 > > > > > > > > > > +#if (PAGE_SIZE < 8192) > > > > > > + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { > > > > > > + truesize = hnae_buf_size(ring); > > > > > > + } else { > > > > > > + truesize = ALIGN(size, L1_CACHE_BYTES); > > > > > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > > > > > > + } > > > > > > + > > > > > > +#else > > > > > > + truesize = ALIGN(size, L1_CACHE_BYTES); > > > > > > + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); > > > > > > +#endif > > > > > > > > This is not indented properly, and it looks terrible. > > And it makes one curious as to why last_offset isn't set > > in the first block. > > Hi Joe, Hello. > if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function. > so it is ignored in the first block. thanks for your suggestion. More to the point, last_offset is initialized to 0. It'd be clearer not to initialize it at all and set it to 0 in the first block and not overwrite the initialization in each subsequent block. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/7 17:05, Joe Perches wrote: > On Mon, 2015-12-07 at 16:58 +0800, Yankejian (Hackim Yim) wrote: >> On 2015/12/7 11:32, Joe Perches wrote: >>> On Sun, 2015-12-06 at 22:29 -0500, David Miller wrote: >>>>> From: yankejian <yankejian@huawei.com> >>>>> Date: Sat, 5 Dec 2015 15:32:29 +0800 >>>>> >>>>>>> +#if (PAGE_SIZE < 8192) >>>>>>> + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { >>>>>>> + truesize = hnae_buf_size(ring); >>>>>>> + } else { >>>>>>> + truesize = ALIGN(size, L1_CACHE_BYTES); >>>>>>> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >>>>>>> + } >>>>>>> + >>>>>>> +#else >>>>>>> + truesize = ALIGN(size, L1_CACHE_BYTES); >>>>>>> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >>>>>>> +#endif >>>>> This is not indented properly, and it looks terrible. >>> And it makes one curious as to why last_offset isn't set >>> in the first block. >> Hi Joe, > Hello. > >> if hnae_buf_size que equal to HNS_BUFFER_SIZE, last_offset is useless in the routines of this function. >> so it is ignored in the first block. thanks for your suggestion. > More to the point, last_offset is initialized to 0. > > It'd be clearer not to initialize it at all and thanks, that is a good idea. > set it to 0 in the first block and not overwrite > the initialization in each subsequent block. because it is useless, i think we'd better ignored it. > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > > . > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/7 16:58, Du, Fan wrote: > > > On 2015/12/5 15:32, yankejian wrote: >> here is the patch raising the performance of XGE by: >> 1)changes the way page management method for enet momery, and >> 2)reduces the count of rmb, and >> 3)adds Memory prefetching > > Any numbers on how much it boost performance? > it is almost the same as 82599. >> Signed-off-by: yankejian <yankejian@huawei.com> >> --- >> drivers/net/ethernet/hisilicon/hns/hnae.h | 5 +- >> drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 1 - >> drivers/net/ethernet/hisilicon/hns/hns_enet.c | 79 +++++++++++++++-------- >> 3 files changed, 55 insertions(+), 30 deletions(-) >> >> diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h >> index d1f3316..6ca94dc 100644 >> --- a/drivers/net/ethernet/hisilicon/hns/hnae.h >> +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h >> @@ -341,7 +341,8 @@ struct hnae_queue { >> void __iomem *io_base; >> phys_addr_t phy_base; >> struct hnae_ae_dev *dev; /* the device who use this queue */ >> - struct hnae_ring rx_ring, tx_ring; >> + struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; >> + struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp; >> struct hnae_handle *handle; >> }; >> >> @@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, >> struct hnae_desc_cb *res_cb) >> { >> struct hnae_buf_ops *bops = ring->q->handle->bops; >> - struct hnae_desc_cb tmp_cb = ring->desc_cb[i]; >> >> bops->unmap_buffer(ring, &ring->desc_cb[i]); >> ring->desc_cb[i] = *res_cb; >> - *res_cb = tmp_cb; >> ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; >> ring->desc[i].rx.ipoff_bnum_pid_flag = 0; >> } >> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c >> index 77c6edb..522b264 100644 >> --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c >> +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c >> @@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask) >> else >> flag = RCB_INT_FLAG_RX; >> >> - hns_rcb_int_clr_hw(ring->q, flag); >> hns_rcb_int_ctrl_hw(ring->q, flag, mask); >> } >> >> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c >> index cad2663..e2be510 100644 >> --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c >> +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c >> @@ -33,6 +33,7 @@ >> >> #define RCB_IRQ_NOT_INITED 0 >> #define RCB_IRQ_INITED 1 >> +#define HNS_BUFFER_SIZE_2048 2048 >> >> #define BD_MAX_SEND_SIZE 8191 >> #define SKB_TMP_LEN(SKB) \ >> @@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, >> return max_size; >> } >> >> -static void >> -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) >> +static void hns_nic_reuse_page(struct sk_buff *skb, int i, >> + struct hnae_ring *ring, int pull_len, >> + struct hnae_desc_cb *desc_cb) >> { >> + struct hnae_desc *desc; >> + int truesize, size; >> + int last_offset = 0; >> + >> + desc = &ring->desc[ring->next_to_clean]; >> + size = le16_to_cpu(desc->rx.size); >> + >> +#if (PAGE_SIZE < 8192) >> + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { >> + truesize = hnae_buf_size(ring); >> + } else { >> + truesize = ALIGN(size, L1_CACHE_BYTES); >> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >> + } >> + >> +#else >> + truesize = ALIGN(size, L1_CACHE_BYTES); >> + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >> +#endif >> + >> + skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, >> + size - pull_len, truesize - pull_len); >> + >> /* avoid re-using remote pages,flag default unreuse */ >> if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) { >> +#if (PAGE_SIZE < 8192) >> + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { >> + /* if we are only owner of page we can reuse it */ >> + if (likely(page_count(desc_cb->priv) == 1)) { >> + /* flip page offset to other buffer */ >> + desc_cb->page_offset ^= truesize; >> + >> + desc_cb->reuse_flag = 1; >> + /* bump ref count on page before it is given*/ >> + get_page(desc_cb->priv); >> + } >> + return; >> + } >> +#endif >> /* move offset up to the next cache line */ >> - desc_cb->page_offset += tsize; >> + desc_cb->page_offset += truesize; >> >> if (desc_cb->page_offset <= last_offset) { >> desc_cb->reuse_flag = 1; >> @@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, >> struct hnae_desc *desc; >> struct hnae_desc_cb *desc_cb; >> unsigned char *va; >> - int bnum, length, size, i, truesize, last_offset; >> + int bnum, length, i; >> int pull_len; >> u32 bnum_flag; >> >> - last_offset = hnae_page_size(ring) - hnae_buf_size(ring); >> desc = &ring->desc[ring->next_to_clean]; >> desc_cb = &ring->desc_cb[ring->next_to_clean]; >> >> @@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, >> return -ENOMEM; >> } >> >> + prefetchw(skb->data); >> length = le16_to_cpu(desc->rx.pkt_len); >> bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); >> priv->ops.get_rxd_bnum(bnum_flag, &bnum); >> *out_bnum = bnum; >> >> - /* we will be copying header into skb->data in >> - * pskb_may_pull so it is in our interest to prefetch >> - * it now to avoid a possible cache miss >> - */ >> - prefetchw(skb->data); >> - >> if (length <= HNS_RX_HEAD_SIZE) { >> memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); >> >> @@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, >> memcpy(__skb_put(skb, pull_len), va, >> ALIGN(pull_len, sizeof(long))); >> >> - size = le16_to_cpu(desc->rx.size); >> - truesize = ALIGN(size, L1_CACHE_BYTES); >> - skb_add_rx_frag(skb, 0, desc_cb->priv, >> - desc_cb->page_offset + pull_len, >> - size - pull_len, truesize - pull_len); >> - >> - hns_nic_reuse_page(desc_cb, truesize, last_offset); >> + hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb); >> ring_ptr_move_fw(ring, next_to_clean); >> >> if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/ >> @@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, >> for (i = 1; i < bnum; i++) { >> desc = &ring->desc[ring->next_to_clean]; >> desc_cb = &ring->desc_cb[ring->next_to_clean]; >> - size = le16_to_cpu(desc->rx.size); >> - truesize = ALIGN(size, L1_CACHE_BYTES); >> - skb_add_rx_frag(skb, i, desc_cb->priv, >> - desc_cb->page_offset, >> - size, truesize); >> >> - hns_nic_reuse_page(desc_cb, truesize, last_offset); >> + hns_nic_reuse_page(skb, i, ring, 0, desc_cb); >> ring_ptr_move_fw(ring, next_to_clean); >> } >> } >> @@ -750,9 +772,10 @@ recv: >> /* make all data has been write before submit */ >> if (recv_pkts < budget) { >> ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM); >> - rmb(); /*complete read rx ring bd number*/ >> + >> if (ex_num > clean_count) { >> num += ex_num - clean_count; >> + rmb(); /*complete read rx ring bd number*/ >> goto recv; >> } >> } >> @@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, >> >> bytes = 0; >> pkts = 0; >> - while (head != ring->next_to_clean) >> + while (head != ring->next_to_clean) { >> hns_nic_reclaim_one_desc(ring, &bytes, &pkts); >> + /* issue prefetch for next Tx descriptor */ >> + prefetch(&ring->desc_cb[ring->next_to_clean]); >> + } >> >> NETIF_TX_UNLOCK(ndev); >> >> @@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) >> ring_data->ring, 0); >> >> ring_data->fini_process(ring_data); >> + return 0; >> } >> >> return clean_complete; >> > > . > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/8 14:22, Yankejian (Hackim Yim) wrote: > > On 2015/12/7 16:58, Du, Fan wrote: >> > >> > >> >On 2015/12/5 15:32, yankejian wrote: >>> >>here is the patch raising the performance of XGE by: >>> >>1)changes the way page management method for enet momery, and >>> >>2)reduces the count of rmb, and >>> >>3)adds Memory prefetching >> > >> >Any numbers on how much it boost performance? >> > > it is almost the same as 82599. I mean how much it improves performance *BEFORE* and *AFTER* this patch for Huawei XGE chip, because the commit log states it "raising the performance", but did give numbers of the testing. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2015/12/8 14:30, Du, Fan wrote: > > > On 2015/12/8 14:22, Yankejian (Hackim Yim) wrote: >> >> On 2015/12/7 16:58, Du, Fan wrote: >>> > >>> > >>> >On 2015/12/5 15:32, yankejian wrote: >>>> >>here is the patch raising the performance of XGE by: >>>> >>1)changes the way page management method for enet momery, and >>>> >>2)reduces the count of rmb, and >>>> >>3)adds Memory prefetching >>> > >>> >Any numbers on how much it boost performance? >>> > >> it is almost the same as 82599. > > I mean how much it improves performance *BEFORE* and *AFTER* this patch > for Huawei XGE chip, because the commit log states it "raising the performance", > but did give numbers of the testing. > > . > Hi Du Fan, the bandwidth's raising is not obviously, but the cpu usage degracing up to 50% -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index d1f3316..6ca94dc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -341,7 +341,8 @@ struct hnae_queue { void __iomem *io_base; phys_addr_t phy_base; struct hnae_ae_dev *dev; /* the device who use this queue */ - struct hnae_ring rx_ring, tx_ring; + struct hnae_ring rx_ring ____cacheline_internodealigned_in_smp; + struct hnae_ring tx_ring ____cacheline_internodealigned_in_smp; struct hnae_handle *handle; }; @@ -597,11 +598,9 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, struct hnae_desc_cb *res_cb) { struct hnae_buf_ops *bops = ring->q->handle->bops; - struct hnae_desc_cb tmp_cb = ring->desc_cb[i]; bops->unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - *res_cb = tmp_cb; ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 77c6edb..522b264 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -341,7 +341,6 @@ void hns_ae_toggle_ring_irq(struct hnae_ring *ring, u32 mask) else flag = RCB_INT_FLAG_RX; - hns_rcb_int_clr_hw(ring->q, flag); hns_rcb_int_ctrl_hw(ring->q, flag, mask); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index cad2663..e2be510 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -33,6 +33,7 @@ #define RCB_IRQ_NOT_INITED 0 #define RCB_IRQ_INITED 1 +#define HNS_BUFFER_SIZE_2048 2048 #define BD_MAX_SEND_SIZE 8191 #define SKB_TMP_LEN(SKB) \ @@ -491,13 +492,51 @@ static unsigned int hns_nic_get_headlen(unsigned char *data, u32 flag, return max_size; } -static void -hns_nic_reuse_page(struct hnae_desc_cb *desc_cb, int tsize, int last_offset) +static void hns_nic_reuse_page(struct sk_buff *skb, int i, + struct hnae_ring *ring, int pull_len, + struct hnae_desc_cb *desc_cb) { + struct hnae_desc *desc; + int truesize, size; + int last_offset = 0; + + desc = &ring->desc[ring->next_to_clean]; + size = le16_to_cpu(desc->rx.size); + +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + truesize = hnae_buf_size(ring); + } else { + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); + } + +#else + truesize = ALIGN(size, L1_CACHE_BYTES); + last_offset = hnae_page_size(ring) - hnae_buf_size(ring); +#endif + + skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, + size - pull_len, truesize - pull_len); + /* avoid re-using remote pages,flag default unreuse */ if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) { +#if (PAGE_SIZE < 8192) + if (hnae_buf_size(ring) == HNS_BUFFER_SIZE_2048) { + /* if we are only owner of page we can reuse it */ + if (likely(page_count(desc_cb->priv) == 1)) { + /* flip page offset to other buffer */ + desc_cb->page_offset ^= truesize; + + desc_cb->reuse_flag = 1; + /* bump ref count on page before it is given*/ + get_page(desc_cb->priv); + } + return; + } +#endif /* move offset up to the next cache line */ - desc_cb->page_offset += tsize; + desc_cb->page_offset += truesize; if (desc_cb->page_offset <= last_offset) { desc_cb->reuse_flag = 1; @@ -529,11 +568,10 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, struct hnae_desc *desc; struct hnae_desc_cb *desc_cb; unsigned char *va; - int bnum, length, size, i, truesize, last_offset; + int bnum, length, i; int pull_len; u32 bnum_flag; - last_offset = hnae_page_size(ring) - hnae_buf_size(ring); desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; @@ -555,17 +593,12 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, return -ENOMEM; } + prefetchw(skb->data); length = le16_to_cpu(desc->rx.pkt_len); bnum_flag = le32_to_cpu(desc->rx.ipoff_bnum_pid_flag); priv->ops.get_rxd_bnum(bnum_flag, &bnum); *out_bnum = bnum; - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - if (length <= HNS_RX_HEAD_SIZE) { memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); @@ -588,13 +621,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, 0, desc_cb->priv, - desc_cb->page_offset + pull_len, - size - pull_len, truesize - pull_len); - - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, 0, ring, pull_len, desc_cb); ring_ptr_move_fw(ring, next_to_clean); if (unlikely(bnum >= (int)MAX_SKB_FRAGS)) { /* check err*/ @@ -604,13 +631,8 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, for (i = 1; i < bnum; i++) { desc = &ring->desc[ring->next_to_clean]; desc_cb = &ring->desc_cb[ring->next_to_clean]; - size = le16_to_cpu(desc->rx.size); - truesize = ALIGN(size, L1_CACHE_BYTES); - skb_add_rx_frag(skb, i, desc_cb->priv, - desc_cb->page_offset, - size, truesize); - hns_nic_reuse_page(desc_cb, truesize, last_offset); + hns_nic_reuse_page(skb, i, ring, 0, desc_cb); ring_ptr_move_fw(ring, next_to_clean); } } @@ -750,9 +772,10 @@ recv: /* make all data has been write before submit */ if (recv_pkts < budget) { ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM); - rmb(); /*complete read rx ring bd number*/ + if (ex_num > clean_count) { num += ex_num - clean_count; + rmb(); /*complete read rx ring bd number*/ goto recv; } } @@ -849,8 +872,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data, bytes = 0; pkts = 0; - while (head != ring->next_to_clean) + while (head != ring->next_to_clean) { hns_nic_reclaim_one_desc(ring, &bytes, &pkts); + /* issue prefetch for next Tx descriptor */ + prefetch(&ring->desc_cb[ring->next_to_clean]); + } NETIF_TX_UNLOCK(ndev); @@ -926,6 +952,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) ring_data->ring, 0); ring_data->fini_process(ring_data); + return 0; } return clean_complete;
here is the patch raising the performance of XGE by: 1)changes the way page management method for enet momery, and 2)reduces the count of rmb, and 3)adds Memory prefetching Signed-off-by: yankejian <yankejian@huawei.com> --- drivers/net/ethernet/hisilicon/hns/hnae.h | 5 +- drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 1 - drivers/net/ethernet/hisilicon/hns/hns_enet.c | 79 +++++++++++++++-------- 3 files changed, 55 insertions(+), 30 deletions(-)