Message ID | 1352963114-628-1-git-send-email-annie.li@oracle.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On 15/11/12 08:05, Annie Li wrote: > Tx/rx page pool are maintained. New grant is mapped and put into > pool, unmap only happens when releasing/removing device. > > Signed-off-by: Annie Li <annie.li@oracle.com> > --- > drivers/net/xen-netfront.c | 372 +++++++++++++++++++++++++++++++++++++------- > 1 files changed, 315 insertions(+), 57 deletions(-) > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index 0ebbb19..17b81c0 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -79,6 +79,13 @@ struct netfront_stats { > struct u64_stats_sync syncp; > }; > > +struct gnt_list { > + grant_ref_t gref; > + struct page *gnt_pages; > + void *gnt_target; > + struct gnt_list *tail; > +}; This could also be shared with blkfront. > + > struct netfront_info { > struct list_head list; > struct net_device *netdev; > @@ -109,6 +116,10 @@ struct netfront_info { > grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; > unsigned tx_skb_freelist; > > + struct gnt_list *tx_grant[NET_TX_RING_SIZE]; > + struct gnt_list *tx_gnt_list; > + unsigned int tx_gnt_cnt; I don't understand this, why do you need both an array and a list? I'm not familiar with net code, so I don't know if this is some kind of special netfront thing? Anyway if you have to use a list I would recommend using one of the list constructions that's already in the kernel, it simplifies the code and makes it more easy to understand than creating your own list structure. > + > spinlock_t rx_lock ____cacheline_aligned_in_smp; > struct xen_netif_rx_front_ring rx; > int rx_ring_ref; > @@ -126,6 +137,10 @@ struct netfront_info { > grant_ref_t gref_rx_head; > grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; > > + struct gnt_list *rx_grant[NET_RX_RING_SIZE]; > + struct gnt_list *rx_gnt_list; > + unsigned int rx_gnt_cnt; Same comment above here. > + > unsigned long rx_pfn_array[NET_RX_RING_SIZE]; > struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; > struct mmu_update rx_mmu[NET_RX_RING_SIZE]; > @@ -134,6 +149,7 @@ struct netfront_info { > struct netfront_stats __percpu *stats; > > unsigned long rx_gso_checksum_fixup; > + u8 persistent_gnt:1; > }; > > struct netfront_rx_info { > @@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, > return ref; > } > > +static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np, > + RING_IDX ri) > +{ > + int i = xennet_rxidx(ri); > + struct gnt_list *gntlist = np->rx_grant[i]; > + np->rx_grant[i] = NULL; Ok, I think I get why do you need both an array and a list, is that because netfront doesn't have some kind of shadow ring to keep track of issued requests? So each issued request has an associated gnt_list with the list of used grants? If so it would be good to add a comment about it. > + return gntlist; > +} > + > #ifdef CONFIG_SYSFS > static int xennet_sysfs_addif(struct net_device *netdev); > static void xennet_sysfs_delif(struct net_device *netdev); > @@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev) > netif_wake_queue(dev); > } > > +static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev, > + unsigned long mfn, void *vaddr, > + unsigned int id, > + grant_ref_t ref) > +{ > + struct netfront_info *np = netdev_priv(dev); > + grant_ref_t gnt_ref; > + struct gnt_list *gnt_list_entry; > + > + if (np->persistent_gnt && np->rx_gnt_cnt) { > + gnt_list_entry = np->rx_gnt_list; > + np->rx_gnt_list = np->rx_gnt_list->tail; > + np->rx_gnt_cnt--; > + > + gnt_list_entry->gnt_target = vaddr; > + gnt_ref = gnt_list_entry->gref; > + np->rx_grant[id] = gnt_list_entry; > + } else { > + struct page *page; > + > + BUG_ON(!np->persistent_gnt && np->rx_gnt_cnt); > + if (!ref) > + gnt_ref = > + gnttab_claim_grant_reference(&np->gref_rx_head); > + else > + gnt_ref = ref; > + BUG_ON((signed short)gnt_ref < 0); > + > + if (np->persistent_gnt) { So you are only using persistent grants if the backend also supports them. Have you benchmarked the performance of a persistent frontend with a non-persistent backend. In the block case, usign a persistent frontend with a non-persistent backend let to an overall performance improvement, so blkfront uses persistent grants even if blkback doesn't support them. Take a look at the following graph: http://xenbits.xen.org/people/royger/persistent_grants/nonpers_read.png > + page = alloc_page(GFP_KERNEL); > + if (!page) { > + if (!ref) > + gnttab_release_grant_reference( > + &np->gref_rx_head, ref); > + return -ENOMEM; > + } > + mfn = pfn_to_mfn(page_to_pfn(page)); > + > + gnt_list_entry = kmalloc(sizeof(struct gnt_list), > + GFP_KERNEL); > + if (!gnt_list_entry) { > + __free_page(page); > + if (!ref) > + gnttab_release_grant_reference( > + &np->gref_rx_head, ref); > + return -ENOMEM; > + } > + gnt_list_entry->gref = gnt_ref; > + gnt_list_entry->gnt_pages = page; > + gnt_list_entry->gnt_target = vaddr; > + > + np->rx_grant[id] = gnt_list_entry; > + } > + > + gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id, > + mfn, 0); > + } > + np->grant_rx_ref[id] = gnt_ref; > + > + return gnt_ref; > +} > + > static void xennet_alloc_rx_buffers(struct net_device *dev) > { > unsigned short id; > @@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev) > int i, batch_target, notify; > RING_IDX req_prod = np->rx.req_prod_pvt; > grant_ref_t ref; > - unsigned long pfn; > - void *vaddr; > struct xen_netif_rx_request *req; > > if (unlikely(!netif_carrier_ok(dev))) > @@ -306,19 +392,16 @@ no_skb: > BUG_ON(np->rx_skbs[id]); > np->rx_skbs[id] = skb; > > - ref = gnttab_claim_grant_reference(&np->gref_rx_head); > - BUG_ON((signed short)ref < 0); > - np->grant_rx_ref[id] = ref; > + page = skb_frag_page(&skb_shinfo(skb)->frags[0]); > > - pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); > - vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0])); > + ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), > + page_address(page), id, 0); > + if ((signed short)ref < 0) { > + __skb_queue_tail(&np->rx_batch, skb); > + break; > + } > > req = RING_GET_REQUEST(&np->rx, req_prod + i); > - gnttab_grant_foreign_access_ref(ref, > - np->xbdev->otherend_id, > - pfn_to_mfn(pfn), > - 0); > - > req->id = id; > req->gref = ref; > } > @@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev) > > id = txrsp->id; > skb = np->tx_skbs[id].skb; > - if (unlikely(gnttab_query_foreign_access( > - np->grant_tx_ref[id]) != 0)) { > - printk(KERN_ALERT "xennet_tx_buf_gc: warning " > - "-- grant still in use by backend " > - "domain.\n"); > - BUG(); > + > + if (np->persistent_gnt) { > + struct gnt_list *gnt_list_entry; > + > + gnt_list_entry = np->tx_grant[id]; > + BUG_ON(!gnt_list_entry); > + > + gnt_list_entry->tail = np->tx_gnt_list; > + np->tx_gnt_list = gnt_list_entry; > + np->tx_gnt_cnt++; > + } else { > + if (unlikely(gnttab_query_foreign_access( > + np->grant_tx_ref[id]) != 0)) { > + printk(KERN_ALERT "xennet_tx_buf_gc: warning " > + "-- grant still in use by backend " > + "domain.\n"); > + BUG(); > + } > + > + gnttab_end_foreign_access_ref( > + np->grant_tx_ref[id], GNTMAP_readonly); If I've read the code correctly, you are giving this frame both read/write permissions to the other end on xennet_alloc_tx_ref, but then you are only removing the read permissions? (see comment below on the xennet_alloc_tx_ref function). > + gnttab_release_grant_reference( > + &np->gref_tx_head, np->grant_tx_ref[id]); > } > - gnttab_end_foreign_access_ref( > - np->grant_tx_ref[id], GNTMAP_readonly); > - gnttab_release_grant_reference( > - &np->gref_tx_head, np->grant_tx_ref[id]); > np->grant_tx_ref[id] = GRANT_INVALID_REF; > add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); > dev_kfree_skb_irq(skb); > @@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev) > xennet_maybe_wake_tx(dev); > } > > +static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev, > + unsigned long mfn, > + unsigned int id) > +{ > + struct netfront_info *np = netdev_priv(dev); > + grant_ref_t ref; > + struct page *granted_page; > + > + if (np->persistent_gnt && np->tx_gnt_cnt) { > + struct gnt_list *gnt_list_entry; > + > + gnt_list_entry = np->tx_gnt_list; > + np->tx_gnt_list = np->tx_gnt_list->tail; > + np->tx_gnt_cnt--; > + > + ref = gnt_list_entry->gref; > + np->tx_grant[id] = gnt_list_entry; > + } else { > + struct gnt_list *gnt_list_entry; > + > + BUG_ON(!np->persistent_gnt && np->tx_gnt_cnt); > + ref = gnttab_claim_grant_reference(&np->gref_tx_head); > + BUG_ON((signed short)ref < 0); > + > + if (np->persistent_gnt) { > + granted_page = alloc_page(GFP_KERNEL); > + if (!granted_page) { > + gnttab_release_grant_reference( > + &np->gref_tx_head, ref); > + return -ENOMEM; > + } > + > + mfn = pfn_to_mfn(page_to_pfn(granted_page)); > + gnt_list_entry = kmalloc(sizeof(struct gnt_list), > + GFP_KERNEL); > + if (!gnt_list_entry) { > + __free_page(granted_page); > + gnttab_release_grant_reference( > + &np->gref_tx_head, ref); > + return -ENOMEM; > + } > + > + gnt_list_entry->gref = ref; > + gnt_list_entry->gnt_pages = granted_page; > + np->tx_grant[id] = gnt_list_entry; > + } > + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, > + mfn, 0); If you are not always using persistent grants I guess you need to give read only permissions to this frame (GNTMAP_readonly). Also, for keeping things in logical order, isn't it best that this function comes before xennet_tx_buf_gc? > + } > + > + return ref; > +} > + > @@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np) > } > > skb = np->rx_skbs[id]; > - mfn = gnttab_end_foreign_transfer_ref(ref); > - gnttab_release_grant_reference(&np->gref_rx_head, ref); > + if (!np->persistent_gnt) { > + mfn = gnttab_end_foreign_transfer_ref(ref); > + gnttab_release_grant_reference(&np->gref_rx_head, ref); > + } > np->grant_rx_ref[id] = GRANT_INVALID_REF; > > if (0 == mfn) { > @@ -1607,6 +1834,13 @@ again: > goto abort_transaction; > } > > + err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants", > + "%u", info->persistent_gnt); As in netback, I think "feature-persistent" should be used. > + if (err) { > + message = "writing feature-persistent-grants"; > + xenbus_dev_fatal(dev, err, "%s", message); > + } > + > err = xenbus_transaction_end(xbt, 0); > if (err) { > if (err == -EAGAIN) > @@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev) > grant_ref_t ref; > struct xen_netif_rx_request *req; > unsigned int feature_rx_copy; > + int ret, val; > > err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, > "feature-rx-copy", "%u", &feature_rx_copy); > @@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev) > return -ENODEV; > } > > + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, > + "feature-persistent-grants", "%u", &val); > + if (err != 1) > + val = 0; > + > + np->persistent_gnt = !!val; > + > err = talk_to_netback(np->xbdev, np); > if (err) > return err; > @@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev) > spin_lock_bh(&np->rx_lock); > spin_lock_irq(&np->tx_lock); > > + np->tx_gnt_cnt = 0; > + np->rx_gnt_cnt = 0; > + > /* Step 1: Discard all pending TX packet fragments. */ > xennet_release_tx_bufs(np); > > + if (np->persistent_gnt) { > + struct gnt_list *gnt_list_entry; > + > + while (np->rx_gnt_list) { > + gnt_list_entry = np->rx_gnt_list; > + np->rx_gnt_list = np->rx_gnt_list->tail; > + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); > + __free_page(gnt_list_entry->gnt_pages); > + kfree(gnt_list_entry); > + } > + } > + > /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ > for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { > skb_frag_t *frag; > @@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev) > > frag = &skb_shinfo(skb)->frags[0]; > page = skb_frag_page(frag); > - gnttab_grant_foreign_access_ref( > - ref, np->xbdev->otherend_id, > - pfn_to_mfn(page_to_pfn(page)), > - 0); > + ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), > + page_address(page), requeue_idx, ref); > + if ((signed short)ret < 0) > + break; > + > req->gref = ref; > req->id = requeue_idx; > > -- > 1.7.3.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2012-11-15 18:52, Roger Pau Monné wrote: > On 15/11/12 08:05, Annie Li wrote: >> Tx/rx page pool are maintained. New grant is mapped and put into >> pool, unmap only happens when releasing/removing device. >> >> Signed-off-by: Annie Li<annie.li@oracle.com> >> --- >> drivers/net/xen-netfront.c | 372 +++++++++++++++++++++++++++++++++++++------- >> 1 files changed, 315 insertions(+), 57 deletions(-) >> >> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c >> index 0ebbb19..17b81c0 100644 >> --- a/drivers/net/xen-netfront.c >> +++ b/drivers/net/xen-netfront.c >> @@ -79,6 +79,13 @@ struct netfront_stats { >> struct u64_stats_sync syncp; >> }; >> >> +struct gnt_list { >> + grant_ref_t gref; >> + struct page *gnt_pages; >> + void *gnt_target; >> + struct gnt_list *tail; >> +}; > This could also be shared with blkfront. Netfront does not have the shadow like blkfront, and it needs the gnt_target to save skb address of rx path. So we can share this too? blkfront would not use it actually. > >> + >> struct netfront_info { >> struct list_head list; >> struct net_device *netdev; >> @@ -109,6 +116,10 @@ struct netfront_info { >> grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; >> unsigned tx_skb_freelist; >> >> + struct gnt_list *tx_grant[NET_TX_RING_SIZE]; >> + struct gnt_list *tx_gnt_list; >> + unsigned int tx_gnt_cnt; > I don't understand this, why do you need both an array and a list? The array tx_grant is just like other tx_skbs, grant_tx_ref. It saves grant entries corresponding every request in the ring. This is what netfront different from blkfront, netfront does not have shadow ring, and it only uses a ring size array to track every request in the ring. The list is like a pool to save all available persistent grants. > I'm > not familiar with net code, so I don't know if this is some kind of > special netfront thing? Yes, this is different from blkfront. netfront uses ring size arrays to track every request in the ring. > > Anyway if you have to use a list I would recommend using one of the list > constructions that's already in the kernel, it simplifies the code and > makes it more easy to understand than creating your own list structure. Ok, thanks. > >> + >> spinlock_t rx_lock ____cacheline_aligned_in_smp; >> struct xen_netif_rx_front_ring rx; >> int rx_ring_ref; >> @@ -126,6 +137,10 @@ struct netfront_info { >> grant_ref_t gref_rx_head; >> grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; >> >> + struct gnt_list *rx_grant[NET_RX_RING_SIZE]; >> + struct gnt_list *rx_gnt_list; >> + unsigned int rx_gnt_cnt; > Same comment above here. Same as above. > >> + >> unsigned long rx_pfn_array[NET_RX_RING_SIZE]; >> struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; >> struct mmu_update rx_mmu[NET_RX_RING_SIZE]; >> @@ -134,6 +149,7 @@ struct netfront_info { >> struct netfront_stats __percpu *stats; >> >> unsigned long rx_gso_checksum_fixup; >> + u8 persistent_gnt:1; >> }; >> >> struct netfront_rx_info { >> @@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, >> return ref; >> } >> >> +static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np, >> + RING_IDX ri) >> +{ >> + int i = xennet_rxidx(ri); >> + struct gnt_list *gntlist = np->rx_grant[i]; >> + np->rx_grant[i] = NULL; > Ok, I think I get why do you need both an array and a list, is that > because netfront doesn't have some kind of shadow ring to keep track of > issued requests? Yes. > > So each issued request has an associated gnt_list with the list of used > grants? gnt_list is kind of free grants. It is like a pool of free grants. If free grants exist in this list, free grant will be gotten from this list. If no, new grant will be allocated. In xennet_tx_buf_gc, free grants will be put into the list again if response status is OK. > If so it would be good to add a comment about it. > >> + return gntlist; >> +} >> + >> #ifdef CONFIG_SYSFS >> static int xennet_sysfs_addif(struct net_device *netdev); >> static void xennet_sysfs_delif(struct net_device *netdev); >> @@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev) >> netif_wake_queue(dev); >> } >> >> +static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev, >> + unsigned long mfn, void *vaddr, >> + unsigned int id, >> + grant_ref_t ref) >> +{ >> + struct netfront_info *np = netdev_priv(dev); >> + grant_ref_t gnt_ref; >> + struct gnt_list *gnt_list_entry; >> + >> + if (np->persistent_gnt&& np->rx_gnt_cnt) { >> + gnt_list_entry = np->rx_gnt_list; >> + np->rx_gnt_list = np->rx_gnt_list->tail; >> + np->rx_gnt_cnt--; >> + >> + gnt_list_entry->gnt_target = vaddr; >> + gnt_ref = gnt_list_entry->gref; >> + np->rx_grant[id] = gnt_list_entry; >> + } else { >> + struct page *page; >> + >> + BUG_ON(!np->persistent_gnt&& np->rx_gnt_cnt); >> + if (!ref) >> + gnt_ref = >> + gnttab_claim_grant_reference(&np->gref_rx_head); >> + else >> + gnt_ref = ref; >> + BUG_ON((signed short)gnt_ref< 0); >> + >> + if (np->persistent_gnt) { > So you are only using persistent grants if the backend also supports > them. Current implementation is: If netback supports persistent grant, the frontend will work with persistent grant feature too. If netback does not support persistent grant, the frontend will work without persistent grant feature. > Have you benchmarked the performance of a persistent frontend with > a non-persistent backend. I remember I did some test before, not so sure. Will check it. > In the block case, usign a persistent frontend > with a non-persistent backend let to an overall performance improvement, > so blkfront uses persistent grants even if blkback doesn't support them. > Take a look at the following graph: > > http://xenbits.xen.org/people/royger/persistent_grants/nonpers_read.png Good idea, that makes sense. I will change netfront too, thanks. > >> + page = alloc_page(GFP_KERNEL); >> + if (!page) { >> + if (!ref) >> + gnttab_release_grant_reference( >> +&np->gref_rx_head, ref); >> + return -ENOMEM; >> + } >> + mfn = pfn_to_mfn(page_to_pfn(page)); >> + >> + gnt_list_entry = kmalloc(sizeof(struct gnt_list), >> + GFP_KERNEL); >> + if (!gnt_list_entry) { >> + __free_page(page); >> + if (!ref) >> + gnttab_release_grant_reference( >> +&np->gref_rx_head, ref); >> + return -ENOMEM; >> + } >> + gnt_list_entry->gref = gnt_ref; >> + gnt_list_entry->gnt_pages = page; >> + gnt_list_entry->gnt_target = vaddr; >> + >> + np->rx_grant[id] = gnt_list_entry; >> + } >> + >> + gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id, >> + mfn, 0); >> + } >> + np->grant_rx_ref[id] = gnt_ref; >> + >> + return gnt_ref; >> +} >> + >> static void xennet_alloc_rx_buffers(struct net_device *dev) >> { >> unsigned short id; >> @@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev) >> int i, batch_target, notify; >> RING_IDX req_prod = np->rx.req_prod_pvt; >> grant_ref_t ref; >> - unsigned long pfn; >> - void *vaddr; >> struct xen_netif_rx_request *req; >> >> if (unlikely(!netif_carrier_ok(dev))) >> @@ -306,19 +392,16 @@ no_skb: >> BUG_ON(np->rx_skbs[id]); >> np->rx_skbs[id] = skb; >> >> - ref = gnttab_claim_grant_reference(&np->gref_rx_head); >> - BUG_ON((signed short)ref< 0); >> - np->grant_rx_ref[id] = ref; >> + page = skb_frag_page(&skb_shinfo(skb)->frags[0]); >> >> - pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); >> - vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0])); >> + ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), >> + page_address(page), id, 0); >> + if ((signed short)ref< 0) { >> + __skb_queue_tail(&np->rx_batch, skb); >> + break; >> + } >> >> req = RING_GET_REQUEST(&np->rx, req_prod + i); >> - gnttab_grant_foreign_access_ref(ref, >> - np->xbdev->otherend_id, >> - pfn_to_mfn(pfn), >> - 0); >> - >> req->id = id; >> req->gref = ref; >> } >> @@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev) >> >> id = txrsp->id; >> skb = np->tx_skbs[id].skb; >> - if (unlikely(gnttab_query_foreign_access( >> - np->grant_tx_ref[id]) != 0)) { >> - printk(KERN_ALERT "xennet_tx_buf_gc: warning " >> - "-- grant still in use by backend " >> - "domain.\n"); >> - BUG(); >> + >> + if (np->persistent_gnt) { >> + struct gnt_list *gnt_list_entry; >> + >> + gnt_list_entry = np->tx_grant[id]; >> + BUG_ON(!gnt_list_entry); >> + >> + gnt_list_entry->tail = np->tx_gnt_list; >> + np->tx_gnt_list = gnt_list_entry; >> + np->tx_gnt_cnt++; >> + } else { >> + if (unlikely(gnttab_query_foreign_access( >> + np->grant_tx_ref[id]) != 0)) { >> + printk(KERN_ALERT "xennet_tx_buf_gc: warning " >> + "-- grant still in use by backend " >> + "domain.\n"); >> + BUG(); >> + } >> + >> + gnttab_end_foreign_access_ref( >> + np->grant_tx_ref[id], GNTMAP_readonly); > If I've read the code correctly, you are giving this frame both > read/write permissions to the other end on xennet_alloc_tx_ref, but then > you are only removing the read permissions? (see comment below on the > xennet_alloc_tx_ref function). Yes, this is a bug. For non persistent grant, it should remove the read permissions. For persistent grant, it should remove both. As mentioned above, it is better to enable persistent grant, I will change code and not consider non persistent grant. See comments below about why needing both permissions in xennet_alloc_tx_ref. > >> + gnttab_release_grant_reference( >> +&np->gref_tx_head, np->grant_tx_ref[id]); >> } >> - gnttab_end_foreign_access_ref( >> - np->grant_tx_ref[id], GNTMAP_readonly); >> - gnttab_release_grant_reference( >> -&np->gref_tx_head, np->grant_tx_ref[id]); >> np->grant_tx_ref[id] = GRANT_INVALID_REF; >> add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); >> dev_kfree_skb_irq(skb); >> @@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev) >> xennet_maybe_wake_tx(dev); >> } >> >> +static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev, >> + unsigned long mfn, >> + unsigned int id) >> +{ >> + struct netfront_info *np = netdev_priv(dev); >> + grant_ref_t ref; >> + struct page *granted_page; >> + >> + if (np->persistent_gnt&& np->tx_gnt_cnt) { >> + struct gnt_list *gnt_list_entry; >> + >> + gnt_list_entry = np->tx_gnt_list; >> + np->tx_gnt_list = np->tx_gnt_list->tail; >> + np->tx_gnt_cnt--; >> + >> + ref = gnt_list_entry->gref; >> + np->tx_grant[id] = gnt_list_entry; >> + } else { >> + struct gnt_list *gnt_list_entry; >> + >> + BUG_ON(!np->persistent_gnt&& np->tx_gnt_cnt); >> + ref = gnttab_claim_grant_reference(&np->gref_tx_head); >> + BUG_ON((signed short)ref< 0); >> + >> + if (np->persistent_gnt) { >> + granted_page = alloc_page(GFP_KERNEL); >> + if (!granted_page) { >> + gnttab_release_grant_reference( >> +&np->gref_tx_head, ref); >> + return -ENOMEM; >> + } >> + >> + mfn = pfn_to_mfn(page_to_pfn(granted_page)); >> + gnt_list_entry = kmalloc(sizeof(struct gnt_list), >> + GFP_KERNEL); >> + if (!gnt_list_entry) { >> + __free_page(granted_page); >> + gnttab_release_grant_reference( >> +&np->gref_tx_head, ref); >> + return -ENOMEM; >> + } >> + >> + gnt_list_entry->gref = ref; >> + gnt_list_entry->gnt_pages = granted_page; >> + np->tx_grant[id] = gnt_list_entry; >> + } >> + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, >> + mfn, 0); > If you are not always using persistent grants I guess you need to give > read only permissions to this frame (GNTMAP_readonly). We can not use GNTMAP_readonly here because tx path packet data will be copied into these persistent grant pages. Mabbe it is better to use GNTMAP_readonly for nonpersistent and 0 for persistent grant. As mentioned above, it is better to enable persistent grant, I will change code and not consider non persistent grant. > Also, for keeping > things in logical order, isn't it best that this function comes before > xennet_tx_buf_gc? xennet_alloc_tx_ref is called by following function xennet_make_frags, so I assume xennet_alloc_tx_ref is better to be close to xennet_make_frags. Xennet_tx_buf_gc does not have any connection with xennet_alloc_tx_ref, did I miss something? > >> + } >> + >> + return ref; >> +} >> + >> @@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np) >> } >> >> skb = np->rx_skbs[id]; >> - mfn = gnttab_end_foreign_transfer_ref(ref); >> - gnttab_release_grant_reference(&np->gref_rx_head, ref); >> + if (!np->persistent_gnt) { >> + mfn = gnttab_end_foreign_transfer_ref(ref); >> + gnttab_release_grant_reference(&np->gref_rx_head, ref); >> + } >> np->grant_rx_ref[id] = GRANT_INVALID_REF; >> >> if (0 == mfn) { >> @@ -1607,6 +1834,13 @@ again: >> goto abort_transaction; >> } >> >> + err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants", >> + "%u", info->persistent_gnt); > As in netback, I think "feature-persistent" should be used. Same in blkback, I assume it is "feature-persistent-grants", right? I referred your RFC patch, did you change it later? Or I missed something? Thanks Annie > >> + if (err) { >> + message = "writing feature-persistent-grants"; >> + xenbus_dev_fatal(dev, err, "%s", message); >> + } >> + >> err = xenbus_transaction_end(xbt, 0); >> if (err) { >> if (err == -EAGAIN) >> @@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev) >> grant_ref_t ref; >> struct xen_netif_rx_request *req; >> unsigned int feature_rx_copy; >> + int ret, val; >> >> err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, >> "feature-rx-copy", "%u",&feature_rx_copy); >> @@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev) >> return -ENODEV; >> } >> >> + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, >> + "feature-persistent-grants", "%u",&val); >> + if (err != 1) >> + val = 0; >> + >> + np->persistent_gnt = !!val; >> + >> err = talk_to_netback(np->xbdev, np); >> if (err) >> return err; >> @@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev) >> spin_lock_bh(&np->rx_lock); >> spin_lock_irq(&np->tx_lock); >> >> + np->tx_gnt_cnt = 0; >> + np->rx_gnt_cnt = 0; >> + >> /* Step 1: Discard all pending TX packet fragments. */ >> xennet_release_tx_bufs(np); >> >> + if (np->persistent_gnt) { >> + struct gnt_list *gnt_list_entry; >> + >> + while (np->rx_gnt_list) { >> + gnt_list_entry = np->rx_gnt_list; >> + np->rx_gnt_list = np->rx_gnt_list->tail; >> + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); >> + __free_page(gnt_list_entry->gnt_pages); >> + kfree(gnt_list_entry); >> + } >> + } >> + >> /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ >> for (requeue_idx = 0, i = 0; i< NET_RX_RING_SIZE; i++) { >> skb_frag_t *frag; >> @@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev) >> >> frag =&skb_shinfo(skb)->frags[0]; >> page = skb_frag_page(frag); >> - gnttab_grant_foreign_access_ref( >> - ref, np->xbdev->otherend_id, >> - pfn_to_mfn(page_to_pfn(page)), >> - 0); >> + ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), >> + page_address(page), requeue_idx, ref); >> + if ((signed short)ret< 0) >> + break; >> + >> req->gref = ref; >> req->id = requeue_idx; >> >> -- >> 1.7.3.4 >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel >> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2012-11-16 13:22, ANNIE LI wrote: > > > On 2012-11-15 18:52, Roger Pau Monné wrote: >>> + err = xenbus_printf(xbt, dev->nodename, >>> "feature-persistent-grants", >>> + "%u", info->persistent_gnt); >> As in netback, I think "feature-persistent" should be used. > > Same in blkback, I assume it is "feature-persistent-grants", right? > I referred your RFC patch, did you change it later? Or I missed > something? > > My mistake. In your v2 patch, it is "feature-persistent". I will change the code as blkback/blkfront. Thanks Annie -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0ebbb19..17b81c0 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -79,6 +79,13 @@ struct netfront_stats { struct u64_stats_sync syncp; }; +struct gnt_list { + grant_ref_t gref; + struct page *gnt_pages; + void *gnt_target; + struct gnt_list *tail; +}; + struct netfront_info { struct list_head list; struct net_device *netdev; @@ -109,6 +116,10 @@ struct netfront_info { grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; + struct gnt_list *tx_grant[NET_TX_RING_SIZE]; + struct gnt_list *tx_gnt_list; + unsigned int tx_gnt_cnt; + spinlock_t rx_lock ____cacheline_aligned_in_smp; struct xen_netif_rx_front_ring rx; int rx_ring_ref; @@ -126,6 +137,10 @@ struct netfront_info { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + struct gnt_list *rx_grant[NET_RX_RING_SIZE]; + struct gnt_list *rx_gnt_list; + unsigned int rx_gnt_cnt; + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; struct mmu_update rx_mmu[NET_RX_RING_SIZE]; @@ -134,6 +149,7 @@ struct netfront_info { struct netfront_stats __percpu *stats; unsigned long rx_gso_checksum_fixup; + u8 persistent_gnt:1; }; struct netfront_rx_info { @@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, return ref; } +static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + struct gnt_list *gntlist = np->rx_grant[i]; + np->rx_grant[i] = NULL; + + return gntlist; +} + #ifdef CONFIG_SYSFS static int xennet_sysfs_addif(struct net_device *netdev); static void xennet_sysfs_delif(struct net_device *netdev); @@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev) netif_wake_queue(dev); } +static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev, + unsigned long mfn, void *vaddr, + unsigned int id, + grant_ref_t ref) +{ + struct netfront_info *np = netdev_priv(dev); + grant_ref_t gnt_ref; + struct gnt_list *gnt_list_entry; + + if (np->persistent_gnt && np->rx_gnt_cnt) { + gnt_list_entry = np->rx_gnt_list; + np->rx_gnt_list = np->rx_gnt_list->tail; + np->rx_gnt_cnt--; + + gnt_list_entry->gnt_target = vaddr; + gnt_ref = gnt_list_entry->gref; + np->rx_grant[id] = gnt_list_entry; + } else { + struct page *page; + + BUG_ON(!np->persistent_gnt && np->rx_gnt_cnt); + if (!ref) + gnt_ref = + gnttab_claim_grant_reference(&np->gref_rx_head); + else + gnt_ref = ref; + BUG_ON((signed short)gnt_ref < 0); + + if (np->persistent_gnt) { + page = alloc_page(GFP_KERNEL); + if (!page) { + if (!ref) + gnttab_release_grant_reference( + &np->gref_rx_head, ref); + return -ENOMEM; + } + mfn = pfn_to_mfn(page_to_pfn(page)); + + gnt_list_entry = kmalloc(sizeof(struct gnt_list), + GFP_KERNEL); + if (!gnt_list_entry) { + __free_page(page); + if (!ref) + gnttab_release_grant_reference( + &np->gref_rx_head, ref); + return -ENOMEM; + } + gnt_list_entry->gref = gnt_ref; + gnt_list_entry->gnt_pages = page; + gnt_list_entry->gnt_target = vaddr; + + np->rx_grant[id] = gnt_list_entry; + } + + gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id, + mfn, 0); + } + np->grant_rx_ref[id] = gnt_ref; + + return gnt_ref; +} + static void xennet_alloc_rx_buffers(struct net_device *dev) { unsigned short id; @@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev) int i, batch_target, notify; RING_IDX req_prod = np->rx.req_prod_pvt; grant_ref_t ref; - unsigned long pfn; - void *vaddr; struct xen_netif_rx_request *req; if (unlikely(!netif_carrier_ok(dev))) @@ -306,19 +392,16 @@ no_skb: BUG_ON(np->rx_skbs[id]); np->rx_skbs[id] = skb; - ref = gnttab_claim_grant_reference(&np->gref_rx_head); - BUG_ON((signed short)ref < 0); - np->grant_rx_ref[id] = ref; + page = skb_frag_page(&skb_shinfo(skb)->frags[0]); - pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0])); - vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0])); + ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), + page_address(page), id, 0); + if ((signed short)ref < 0) { + __skb_queue_tail(&np->rx_batch, skb); + break; + } req = RING_GET_REQUEST(&np->rx, req_prod + i); - gnttab_grant_foreign_access_ref(ref, - np->xbdev->otherend_id, - pfn_to_mfn(pfn), - 0); - req->id = id; req->gref = ref; } @@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev) id = txrsp->id; skb = np->tx_skbs[id].skb; - if (unlikely(gnttab_query_foreign_access( - np->grant_tx_ref[id]) != 0)) { - printk(KERN_ALERT "xennet_tx_buf_gc: warning " - "-- grant still in use by backend " - "domain.\n"); - BUG(); + + if (np->persistent_gnt) { + struct gnt_list *gnt_list_entry; + + gnt_list_entry = np->tx_grant[id]; + BUG_ON(!gnt_list_entry); + + gnt_list_entry->tail = np->tx_gnt_list; + np->tx_gnt_list = gnt_list_entry; + np->tx_gnt_cnt++; + } else { + if (unlikely(gnttab_query_foreign_access( + np->grant_tx_ref[id]) != 0)) { + printk(KERN_ALERT "xennet_tx_buf_gc: warning " + "-- grant still in use by backend " + "domain.\n"); + BUG(); + } + + gnttab_end_foreign_access_ref( + np->grant_tx_ref[id], GNTMAP_readonly); + gnttab_release_grant_reference( + &np->gref_tx_head, np->grant_tx_ref[id]); } - gnttab_end_foreign_access_ref( - np->grant_tx_ref[id], GNTMAP_readonly); - gnttab_release_grant_reference( - &np->gref_tx_head, np->grant_tx_ref[id]); np->grant_tx_ref[id] = GRANT_INVALID_REF; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); dev_kfree_skb_irq(skb); @@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev) xennet_maybe_wake_tx(dev); } +static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev, + unsigned long mfn, + unsigned int id) +{ + struct netfront_info *np = netdev_priv(dev); + grant_ref_t ref; + struct page *granted_page; + + if (np->persistent_gnt && np->tx_gnt_cnt) { + struct gnt_list *gnt_list_entry; + + gnt_list_entry = np->tx_gnt_list; + np->tx_gnt_list = np->tx_gnt_list->tail; + np->tx_gnt_cnt--; + + ref = gnt_list_entry->gref; + np->tx_grant[id] = gnt_list_entry; + } else { + struct gnt_list *gnt_list_entry; + + BUG_ON(!np->persistent_gnt && np->tx_gnt_cnt); + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + if (np->persistent_gnt) { + granted_page = alloc_page(GFP_KERNEL); + if (!granted_page) { + gnttab_release_grant_reference( + &np->gref_tx_head, ref); + return -ENOMEM; + } + + mfn = pfn_to_mfn(page_to_pfn(granted_page)); + gnt_list_entry = kmalloc(sizeof(struct gnt_list), + GFP_KERNEL); + if (!gnt_list_entry) { + __free_page(granted_page); + gnttab_release_grant_reference( + &np->gref_tx_head, ref); + return -ENOMEM; + } + + gnt_list_entry->gref = ref; + gnt_list_entry->gnt_pages = granted_page; + np->tx_grant[id] = gnt_list_entry; + } + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, 0); + } + + return ref; +} + static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, struct xen_netif_tx_request *tx) { @@ -421,6 +570,9 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, unsigned int len = skb_headlen(skb); unsigned int id; grant_ref_t ref; + struct gnt_list *gnt_list_entry; + void *out_addr; + void *in_addr; int i; /* While the header overlaps a page boundary (including being @@ -436,17 +588,19 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; - ref = gnttab_claim_grant_reference(&np->gref_tx_head); - BUG_ON((signed short)ref < 0); - mfn = virt_to_mfn(data); - gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, - mfn, GNTMAP_readonly); - + ref = xennet_alloc_tx_ref(dev, mfn, id); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; tx->flags = 0; + if (np->persistent_gnt) { + gnt_list_entry = np->tx_grant[id]; + out_addr = page_address(gnt_list_entry->gnt_pages); + in_addr = (void *)((unsigned long)data + & ~(PAGE_SIZE-1)); + memcpy(out_addr, in_addr, PAGE_SIZE); + } } /* Grant backend access to each skb fragment page. */ @@ -459,17 +613,19 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, np->tx_skbs[id].skb = skb_get(skb); tx = RING_GET_REQUEST(&np->tx, prod++); tx->id = id; - ref = gnttab_claim_grant_reference(&np->gref_tx_head); - BUG_ON((signed short)ref < 0); - mfn = pfn_to_mfn(page_to_pfn(skb_frag_page(frag))); - gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, - mfn, GNTMAP_readonly); - + ref = xennet_alloc_tx_ref(dev, mfn, id); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = frag->page_offset; tx->size = skb_frag_size(frag); tx->flags = 0; + if (np->persistent_gnt) { + gnt_list_entry = np->tx_grant[id]; + out_addr = page_address(gnt_list_entry->gnt_pages); + in_addr = (void *)((unsigned long)page_address( + skb_frag_page(frag)) & ~(PAGE_SIZE-1)); + memcpy(out_addr, in_addr, PAGE_SIZE); + } } np->tx.req_prod_pvt = prod; @@ -491,6 +647,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); unsigned long flags; + struct gnt_list *gnt_list_entry; + void *out_addr; + void *in_addr; frags += DIV_ROUND_UP(offset + len, PAGE_SIZE); if (unlikely(frags > MAX_SKB_FRAGS + 1)) { @@ -517,16 +676,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) tx = RING_GET_REQUEST(&np->tx, i); tx->id = id; - ref = gnttab_claim_grant_reference(&np->gref_tx_head); - BUG_ON((signed short)ref < 0); mfn = virt_to_mfn(data); - gnttab_grant_foreign_access_ref( - ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); + ref = xennet_alloc_tx_ref(dev, mfn, id); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; extra = NULL; + if (np->persistent_gnt) { + gnt_list_entry = np->tx_grant[id]; + out_addr = page_address(gnt_list_entry->gnt_pages); + in_addr = (void *)((unsigned long)data & ~(PAGE_SIZE-1)); + memcpy(out_addr, in_addr, PAGE_SIZE); + } + tx->flags = 0; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ @@ -595,13 +758,17 @@ static int xennet_close(struct net_device *dev) } static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, - grant_ref_t ref) + grant_ref_t ref, RING_IDX cons) { int new = xennet_rxidx(np->rx.req_prod_pvt); BUG_ON(np->rx_skbs[new]); np->rx_skbs[new] = skb; np->grant_rx_ref[new] = ref; + + if (np->persistent_gnt) + np->rx_grant[new] = xennet_get_rx_grant(np, cons); + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; np->rx.req_prod_pvt++; @@ -644,7 +811,7 @@ static int xennet_get_extras(struct netfront_info *np, skb = xennet_get_rx_skb(np, cons); ref = xennet_get_rx_ref(np, cons); - xennet_move_rx_slot(np, skb, ref); + xennet_move_rx_slot(np, skb, ref, cons); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); np->rx.rsp_cons = cons; @@ -665,6 +832,12 @@ static int xennet_get_responses(struct netfront_info *np, int frags = 1; int err = 0; unsigned long ret; + struct gnt_list *gnt_list_entry; + + if (np->persistent_gnt) { + gnt_list_entry = xennet_get_rx_grant(np, cons); + BUG_ON(!gnt_list_entry); + } if (rx->flags & XEN_NETRXF_extra_info) { err = xennet_get_extras(np, extras, rp); @@ -677,7 +850,7 @@ static int xennet_get_responses(struct netfront_info *np, if (net_ratelimit()) dev_warn(dev, "rx->offset: %x, size: %u\n", rx->offset, rx->status); - xennet_move_rx_slot(np, skb, ref); + xennet_move_rx_slot(np, skb, ref, cons); err = -EINVAL; goto next; } @@ -695,11 +868,29 @@ static int xennet_get_responses(struct netfront_info *np, goto next; } - ret = gnttab_end_foreign_access_ref(ref, 0); - BUG_ON(!ret); - - gnttab_release_grant_reference(&np->gref_rx_head, ref); - + if (!np->persistent_gnt) { + ret = gnttab_end_foreign_access_ref(ref, 0); + BUG_ON(!ret); + gnttab_release_grant_reference(&np->gref_rx_head, ref); + } else { + struct page *grant_page; + void *grant_target; + + grant_page = gnt_list_entry->gnt_pages; + grant_target = gnt_list_entry->gnt_target; + BUG_ON(grant_page == 0); + BUG_ON(grant_target == 0); + + if (rx->status > 0) + memcpy(grant_target+rx->offset, + page_address(grant_page)+rx->offset, + rx->status); /* status encodes size */ + + gnt_list_entry->gref = ref; + gnt_list_entry->tail = np->rx_gnt_list; + np->rx_gnt_list = gnt_list_entry; + np->rx_gnt_cnt++; + } __skb_queue_tail(list, skb); next: @@ -716,6 +907,10 @@ next: rx = RING_GET_RESPONSE(&np->rx, cons + frags); skb = xennet_get_rx_skb(np, cons + frags); ref = xennet_get_rx_ref(np, cons + frags); + if (np->persistent_gnt) { + gnt_list_entry = xennet_get_rx_grant(np, cons + frags); + BUG_ON(!gnt_list_entry); + } frags++; } @@ -1090,16 +1285,32 @@ static void xennet_release_tx_bufs(struct netfront_info *np) struct sk_buff *skb; int i; + if (np->persistent_gnt) { + struct gnt_list *gnt_list_entry; + + while (np->tx_gnt_list) { + gnt_list_entry = np->tx_gnt_list; + np->tx_gnt_list = np->tx_gnt_list->tail; + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + gnttab_release_grant_reference(&np->gref_tx_head, + gnt_list_entry->gref); + + __free_page(gnt_list_entry->gnt_pages); + kfree(gnt_list_entry); + } + } + for (i = 0; i < NET_TX_RING_SIZE; i++) { /* Skip over entries which are actually freelist references */ if (skb_entry_is_link(&np->tx_skbs[i])) continue; - skb = np->tx_skbs[i].skb; - gnttab_end_foreign_access_ref(np->grant_tx_ref[i], - GNTMAP_readonly); - gnttab_release_grant_reference(&np->gref_tx_head, - np->grant_tx_ref[i]); + if (!np->persistent_gnt) { + gnttab_end_foreign_access_ref(np->grant_tx_ref[i], + GNTMAP_readonly); + gnttab_release_grant_reference(&np->gref_tx_head, + np->grant_tx_ref[i]); + } np->grant_tx_ref[i] = GRANT_INVALID_REF; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); dev_kfree_skb_irq(skb); @@ -1124,6 +1335,20 @@ static void xennet_release_rx_bufs(struct netfront_info *np) spin_lock_bh(&np->rx_lock); + if (np->persistent_gnt) { + struct gnt_list *gnt_list_entry; + + while (np->rx_gnt_list) { + gnt_list_entry = np->rx_gnt_list; + np->rx_gnt_list = np->rx_gnt_list->tail; + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + gnttab_release_grant_reference(&np->gref_rx_head, + gnt_list_entry->gref); + __free_page(gnt_list_entry->gnt_pages); + kfree(gnt_list_entry); + } + } + for (id = 0; id < NET_RX_RING_SIZE; id++) { ref = np->grant_rx_ref[id]; if (ref == GRANT_INVALID_REF) { @@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np) } skb = np->rx_skbs[id]; - mfn = gnttab_end_foreign_transfer_ref(ref); - gnttab_release_grant_reference(&np->gref_rx_head, ref); + if (!np->persistent_gnt) { + mfn = gnttab_end_foreign_transfer_ref(ref); + gnttab_release_grant_reference(&np->gref_rx_head, ref); + } np->grant_rx_ref[id] = GRANT_INVALID_REF; if (0 == mfn) { @@ -1607,6 +1834,13 @@ again: goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants", + "%u", info->persistent_gnt); + if (err) { + message = "writing feature-persistent-grants"; + xenbus_dev_fatal(dev, err, "%s", message); + } + err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) @@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev) grant_ref_t ref; struct xen_netif_rx_request *req; unsigned int feature_rx_copy; + int ret, val; err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-rx-copy", "%u", &feature_rx_copy); @@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev) return -ENODEV; } + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-persistent-grants", "%u", &val); + if (err != 1) + val = 0; + + np->persistent_gnt = !!val; + err = talk_to_netback(np->xbdev, np); if (err) return err; @@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev) spin_lock_bh(&np->rx_lock); spin_lock_irq(&np->tx_lock); + np->tx_gnt_cnt = 0; + np->rx_gnt_cnt = 0; + /* Step 1: Discard all pending TX packet fragments. */ xennet_release_tx_bufs(np); + if (np->persistent_gnt) { + struct gnt_list *gnt_list_entry; + + while (np->rx_gnt_list) { + gnt_list_entry = np->rx_gnt_list; + np->rx_gnt_list = np->rx_gnt_list->tail; + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); + __free_page(gnt_list_entry->gnt_pages); + kfree(gnt_list_entry); + } + } + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { skb_frag_t *frag; @@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev) frag = &skb_shinfo(skb)->frags[0]; page = skb_frag_page(frag); - gnttab_grant_foreign_access_ref( - ref, np->xbdev->otherend_id, - pfn_to_mfn(page_to_pfn(page)), - 0); + ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)), + page_address(page), requeue_idx, ref); + if ((signed short)ret < 0) + break; + req->gref = ref; req->id = requeue_idx;
Tx/rx page pool are maintained. New grant is mapped and put into pool, unmap only happens when releasing/removing device. Signed-off-by: Annie Li <annie.li@oracle.com> --- drivers/net/xen-netfront.c | 372 +++++++++++++++++++++++++++++++++++++------- 1 files changed, 315 insertions(+), 57 deletions(-)