diff mbox

[V2,07/11] virtio-pci: address space translation service (ATS) support

Message ID 1478165243-4767-8-git-send-email-jasowang@redhat.com
State New
Headers show

Commit Message

Jason Wang Nov. 3, 2016, 9:27 a.m. UTC
This patches enable the Address Translation Service support for virtio
pci devices. This is needed for a guest visible Device IOTLB
implementation and will be required by vhost device IOTLB API
implementation for intel IOMMU.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/pci/pcie.c                             | 16 ++++++++++++++++
 hw/virtio/virtio-pci.c                    |  7 +++++++
 hw/virtio/virtio-pci.h                    |  4 ++++
 include/hw/pci/pcie.h                     |  4 ++++
 include/standard-headers/linux/pci_regs.h |  1 +
 5 files changed, 32 insertions(+)

Comments

Michael S. Tsirkin Nov. 3, 2016, 7:49 p.m. UTC | #1
On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
> This patches enable the Address Translation Service support for virtio
> pci devices. This is needed for a guest visible Device IOTLB
> implementation and will be required by vhost device IOTLB API
> implementation for intel IOMMU.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I'd like to understand why do you think this is strictly required.
Won't setting CM bit in the IOMMU do the trick.

Also, could you remind me pls - can guests just disable ATS?

What happens then?


> ---
>  hw/pci/pcie.c                             | 16 ++++++++++++++++
>  hw/virtio/virtio-pci.c                    |  7 +++++++
>  hw/virtio/virtio-pci.h                    |  4 ++++
>  include/hw/pci/pcie.h                     |  4 ++++
>  include/standard-headers/linux/pci_regs.h |  1 +
>  5 files changed, 32 insertions(+)
> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 99cfb45..02195d9 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -717,3 +717,19 @@ void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num)
>                          PCI_EXT_CAP_DSN_SIZEOF);
>      pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num);
>  }
> +
> +void pcie_ats_init(PCIDevice *dev, uint16_t offset)
> +{
> +    pcie_add_capability(dev, PCI_EXT_CAP_ID_ATS, 0x1,
> +                        offset, PCI_EXT_CAP_ATS_SIZEOF);
> +
> +    dev->exp.ats_cap = offset;
> +
> +    /* Invalidate Queue Depth 0, Page Aligned Request 0 */
> +    pci_set_word(dev->config + offset + PCI_ATS_CAP, 0);
> +    /* STU 0, Disabled by default */
> +    pci_set_word(dev->config + offset + PCI_ATS_CTRL, 0);
> +
> +    pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
> +}
> +
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 6ceb43e..e357bdf 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1838,6 +1838,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
>           * PCI Power Management Interface Specification.
>           */
>          pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
> +
> +        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
> +            pcie_ats_init(pci_dev, 256);
> +        }
> +
>      } else {
>          /*
>           * make future invocations of pci_is_express() return false
> @@ -1889,6 +1894,8 @@ static Property virtio_pci_properties[] = {
>                      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
>      DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
>                      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
> +    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
> +                    VIRTIO_PCI_FLAG_ATS_BIT, false),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index b4edea6..057d49d 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -69,6 +69,7 @@ enum {
>      VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
>      VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
>      VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
> +    VIRTIO_PCI_FLAG_ATS_BIT,
>  };
>  
>  /* Need to activate work-arounds for buggy guests at vmstate load. */
> @@ -93,6 +94,9 @@ enum {
>  #define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
>      (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
>  
> +/* address space translation service */
> +#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
> +
>  typedef struct {
>      MSIMessage msg;
>      int virq;
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 056d25e..b08451d 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -74,6 +74,9 @@ struct PCIExpressDevice {
>      /* AER */
>      uint16_t aer_cap;
>      PCIEAERLog aer_log;
> +
> +    /* Offset of ATS capability in config space */
> +    uint16_t ats_cap;
>  };
>  
>  #define COMPAT_PROP_PCP "power_controller_present"
> @@ -120,6 +123,7 @@ void pcie_add_capability(PCIDevice *dev,
>  
>  void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
>  void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
> +void pcie_ats_init(PCIDevice *dev, uint16_t offset);
>  
>  extern const VMStateDescription vmstate_pcie_device;
>  
> diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
> index 4040951..ac426a0 100644
> --- a/include/standard-headers/linux/pci_regs.h
> +++ b/include/standard-headers/linux/pci_regs.h
> @@ -674,6 +674,7 @@
>  #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
>  
>  #define PCI_EXT_CAP_DSN_SIZEOF	12
> +#define PCI_EXT_CAP_ATS_SIZEOF	8
>  #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
>  
>  /* Advanced Error Reporting */
> -- 
> 2.7.4
Jason Wang Nov. 4, 2016, 6:48 a.m. UTC | #2
On 2016年11月04日 03:49, Michael S. Tsirkin wrote:
> On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
>> >This patches enable the Address Translation Service support for virtio
>> >pci devices. This is needed for a guest visible Device IOTLB
>> >implementation and will be required by vhost device IOTLB API
>> >implementation for intel IOMMU.
>> >
>> >Cc: Michael S. Tsirkin<mst@redhat.com>
>> >Signed-off-by: Jason Wang<jasowang@redhat.com>
> I'd like to understand why do you think this is strictly required.
> Won't setting CM bit in the IOMMU do the trick.

ATS was chosen for performance. Since there're many problems for CM:

- CM was slow (10%-20% slower on real hardware for things like netperf) 
because of each transition between non-present and present mapping needs 
an explicit invalidation. It may slow down the whole VM.
- Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of 
contending of IOTLB entries. (What we can do in this case is in fact 
userspace IOTLB snooping, this could be done even without CM).

It was natural to think of ATS when designing interface between IOMMU 
and device/remote IOTLBs. Do you see any drawbacks on ATS here?

Thanks

>
> Also, could you remind me pls - can guests just disable ATS?
>
> What happens then?
>
>
Michael S. Tsirkin Nov. 10, 2016, 5:32 p.m. UTC | #3
On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote:
> 
> 
> On 2016年11月04日 03:49, Michael S. Tsirkin wrote:
> > On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
> > > >This patches enable the Address Translation Service support for virtio
> > > >pci devices. This is needed for a guest visible Device IOTLB
> > > >implementation and will be required by vhost device IOTLB API
> > > >implementation for intel IOMMU.
> > > >
> > > >Cc: Michael S. Tsirkin<mst@redhat.com>
> > > >Signed-off-by: Jason Wang<jasowang@redhat.com>
> > I'd like to understand why do you think this is strictly required.
> > Won't setting CM bit in the IOMMU do the trick.
> 
> ATS was chosen for performance. Since there're many problems for CM:
> 
> - CM was slow (10%-20% slower on real hardware for things like netperf)
> because of each transition between non-present and present mapping needs an
> explicit invalidation. It may slow down the whole VM.
> - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending
> of IOTLB entries. (What we can do in this case is in fact userspace IOTLB
> snooping, this could be done even without CM).
> It was natural to think of ATS when designing interface between IOMMU and
> device/remote IOTLBs. Do you see any drawbacks on ATS here?
> 
> Thanks

In fact at this point I'm confused. Any mapping needs to be programmed
in the IOMMU. We need to implement this correctly.
Once we do why do we need ATS?
I think what you need is map/unmap notifiers that Aviv is working on.
No?


> > 
> > Also, could you remind me pls - can guests just disable ATS?
> > 
> > What happens then?
> > 
> >
Jason Wang Nov. 11, 2016, 3:26 a.m. UTC | #4
On 2016年11月11日 01:32, Michael S. Tsirkin wrote:
> On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote:
>>
>> On 2016年11月04日 03:49, Michael S. Tsirkin wrote:
>>> On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
>>>>> This patches enable the Address Translation Service support for virtio
>>>>> pci devices. This is needed for a guest visible Device IOTLB
>>>>> implementation and will be required by vhost device IOTLB API
>>>>> implementation for intel IOMMU.
>>>>>
>>>>> Cc: Michael S. Tsirkin<mst@redhat.com>
>>>>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>>> I'd like to understand why do you think this is strictly required.
>>> Won't setting CM bit in the IOMMU do the trick.
>> ATS was chosen for performance. Since there're many problems for CM:
>>
>> - CM was slow (10%-20% slower on real hardware for things like netperf)
>> because of each transition between non-present and present mapping needs an
>> explicit invalidation. It may slow down the whole VM.
>> - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending
>> of IOTLB entries. (What we can do in this case is in fact userspace IOTLB
>> snooping, this could be done even without CM).
>> It was natural to think of ATS when designing interface between IOMMU and
>> device/remote IOTLBs. Do you see any drawbacks on ATS here?
>>
>> Thanks
> In fact at this point I'm confused. Any mapping needs to be programmed
> in the IOMMU. We need to implement this correctly.
> Once we do why do we need ATS?
> I think what you need is map/unmap notifiers that Aviv is working on.
> No?

Let me clarify, device IOTLB API can work without ATS or CM. So there're 
three ways to do:

1) without ATS or CM support, the function could be implemented through:
1.1: asking for qemu help if there's an IOTLB miss in vhost
1.2: snooping the userspace IOTLB invalidation (present to non-present 
mapping) and update device IOTLB

2) with CM enabled, the only thing we can add is snooping the 
non-present to present mapping and update the device IOTLB. This is not 
a requirement since we still can get this through asking qemu's(1.2) help.

3) with ATS enabled, guest knows the existence of device IOTLB, and 
device IOTLB entires needs to be flushed explicitly by guest. In this 
case there's no need to snoop the ordinary IOTLB invalidation in 1.2. We 
just need to snoop the device IOTLB specific invalidation request from 
guest.

All the above 3 methods work very well, but let's have a look at 
performance impact:

- Method 1 (without CM or ATS), the performance is not the best since 
guest does not know about the existence of remote IOTLB, this means the 
flush of device IOTLB entry could not be done on demand. One example is 
some IOMMU driver (e.g intel) tends to optimize the IOTLB invalidations 
by issuing a global invalidation periodically. We need to flush the 
device IOTLB too in this case. Thus we can notice some jitter (because 
of IOTLB miss).

- Method 2 (with CM but without ATS) seems to be the worst case. It has 
not only all problems above a but also a new one: each transition needs 
to notify the device explicitly. Even if dpdk use static mappings, all 
other devices in the VM use dynamic ones which slows down the whole the 
system. According to the test, CM is about 10%-20% slower in real hardware.

- Method 3 (ATS) can give the best performance, all the problems have 
gone since guest can flush the device IOTLB entry on demand. It was 
defined by spec and was designed to solve the issues just like what we 
meet here, and was supported by modern IOMMUs.

And what's even better, implementing ATS turns out less than 100 lines 
of codes. And it was much more easier to  be enabled on other IOMMU (AMD 
IOMMU only needs 20 lines of codes). All other ways (I started and have 
codes for method 1 for intel IOMMU) need lots of work specific to each 
kind of IOMMU.

Consider so much advantages by just adding so small lines of codes. I 
don't see why we don't need ATS (for the IOOMUs that supports it).

Thanks

>
>
>>> Also, could you remind me pls - can guests just disable ATS?
>>>
>>> What happens then?
>>>
>>>
Michael S. Tsirkin Nov. 11, 2016, 3:49 a.m. UTC | #5
On Fri, Nov 11, 2016 at 11:26:12AM +0800, Jason Wang wrote:
> 
> 
> On 2016年11月11日 01:32, Michael S. Tsirkin wrote:
> > On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote:
> > > 
> > > On 2016年11月04日 03:49, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
> > > > > > This patches enable the Address Translation Service support for virtio
> > > > > > pci devices. This is needed for a guest visible Device IOTLB
> > > > > > implementation and will be required by vhost device IOTLB API
> > > > > > implementation for intel IOMMU.
> > > > > > 
> > > > > > Cc: Michael S. Tsirkin<mst@redhat.com>
> > > > > > Signed-off-by: Jason Wang<jasowang@redhat.com>
> > > > I'd like to understand why do you think this is strictly required.
> > > > Won't setting CM bit in the IOMMU do the trick.
> > > ATS was chosen for performance. Since there're many problems for CM:
> > > 
> > > - CM was slow (10%-20% slower on real hardware for things like netperf)
> > > because of each transition between non-present and present mapping needs an
> > > explicit invalidation. It may slow down the whole VM.
> > > - Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending
> > > of IOTLB entries. (What we can do in this case is in fact userspace IOTLB
> > > snooping, this could be done even without CM).
> > > It was natural to think of ATS when designing interface between IOMMU and
> > > device/remote IOTLBs. Do you see any drawbacks on ATS here?
> > > 
> > > Thanks
> > In fact at this point I'm confused. Any mapping needs to be programmed
> > in the IOMMU. We need to implement this correctly.
> > Once we do why do we need ATS?
> > I think what you need is map/unmap notifiers that Aviv is working on.
> > No?
> 
> Let me clarify, device IOTLB API can work without ATS or CM. So there're
> three ways to do:
> 
> 1) without ATS or CM support, the function could be implemented through:
> 1.1: asking for qemu help if there's an IOTLB miss in vhost
> 1.2: snooping the userspace IOTLB invalidation (present to non-present
> mapping) and update device IOTLB
> 
> 2) with CM enabled, the only thing we can add is snooping the non-present to
> present mapping and update the device IOTLB. This is not a requirement since
> we still can get this through asking qemu's(1.2) help.
> 
> 3) with ATS enabled, guest knows the existence of device IOTLB, and device
> IOTLB entires needs to be flushed explicitly by guest. In this case there's
> no need to snoop the ordinary IOTLB invalidation in 1.2. We just need to
> snoop the device IOTLB specific invalidation request from guest.
> 
> All the above 3 methods work very well, but let's have a look at performance
> impact:
> 
> - Method 1 (without CM or ATS), the performance is not the best since guest
> does not know about the existence of remote IOTLB, this means the flush of
> device IOTLB entry could not be done on demand. One example is some IOMMU
> driver (e.g intel) tends to optimize the IOTLB invalidations by issuing a
> global invalidation periodically. We need to flush the device IOTLB too in
> this case. Thus we can notice some jitter (because of IOTLB miss).
> 
> - Method 2 (with CM but without ATS) seems to be the worst case. It has not
> only all problems above a but also a new one: each transition needs to
> notify the device explicitly. Even if dpdk use static mappings, all other
> devices in the VM use dynamic ones which slows down the whole the system.
> According to the test, CM is about 10%-20% slower in real hardware.
> 
> - Method 3 (ATS) can give the best performance, all the problems have gone
> since guest can flush the device IOTLB entry on demand. It was defined by
> spec and was designed to solve the issues just like what we meet here, and
> was supported by modern IOMMUs.
> 
> And what's even better, implementing ATS turns out less than 100 lines of
> codes. And it was much more easier to  be enabled on other IOMMU (AMD IOMMU
> only needs 20 lines of codes). All other ways (I started and have codes for
> method 1 for intel IOMMU) need lots of work specific to each kind of IOMMU.

method 1 is basically what Aviv implemented except you don't
need map notifiers, only unmap.

> 
> Consider so much advantages by just adding so small lines of codes. I don't
> see why we don't need ATS (for the IOOMUs that supports it).
> 
> Thanks

I am concerned that not all IOMMUs and guests support ATS.

> > 
> > 
> > > > Also, could you remind me pls - can guests just disable ATS?
> > > > 
> > > > What happens then?
> > > > 
> > > >
Jason Wang Nov. 11, 2016, 4:24 a.m. UTC | #6
On 2016年11月11日 11:49, Michael S. Tsirkin wrote:
> On Fri, Nov 11, 2016 at 11:26:12AM +0800, Jason Wang wrote:
>> >
>> >
>> >On 2016年11月11日 01:32, Michael S. Tsirkin wrote:
>>> > >On Fri, Nov 04, 2016 at 02:48:20PM +0800, Jason Wang wrote:
>>>> > > >
>>>> > > >On 2016年11月04日 03:49, Michael S. Tsirkin wrote:
>>>>> > > > >On Thu, Nov 03, 2016 at 05:27:19PM +0800, Jason Wang wrote:
>>>>>>> > > > > > >This patches enable the Address Translation Service support for virtio
>>>>>>> > > > > > >pci devices. This is needed for a guest visible Device IOTLB
>>>>>>> > > > > > >implementation and will be required by vhost device IOTLB API
>>>>>>> > > > > > >implementation for intel IOMMU.
>>>>>>> > > > > > >
>>>>>>> > > > > > >Cc: Michael S. Tsirkin<mst@redhat.com>
>>>>>>> > > > > > >Signed-off-by: Jason Wang<jasowang@redhat.com>
>>>>> > > > >I'd like to understand why do you think this is strictly required.
>>>>> > > > >Won't setting CM bit in the IOMMU do the trick.
>>>> > > >ATS was chosen for performance. Since there're many problems for CM:
>>>> > > >
>>>> > > >- CM was slow (10%-20% slower on real hardware for things like netperf)
>>>> > > >because of each transition between non-present and present mapping needs an
>>>> > > >explicit invalidation. It may slow down the whole VM.
>>>> > > >- Without ATS/Device IOTLB, IOMMU becomes a bottleneck because of contending
>>>> > > >of IOTLB entries. (What we can do in this case is in fact userspace IOTLB
>>>> > > >snooping, this could be done even without CM).
>>>> > > >It was natural to think of ATS when designing interface between IOMMU and
>>>> > > >device/remote IOTLBs. Do you see any drawbacks on ATS here?
>>>> > > >
>>>> > > >Thanks
>>> > >In fact at this point I'm confused. Any mapping needs to be programmed
>>> > >in the IOMMU. We need to implement this correctly.
>>> > >Once we do why do we need ATS?
>>> > >I think what you need is map/unmap notifiers that Aviv is working on.
>>> > >No?
>> >
>> >Let me clarify, device IOTLB API can work without ATS or CM. So there're
>> >three ways to do:
>> >
>> >1) without ATS or CM support, the function could be implemented through:
>> >1.1: asking for qemu help if there's an IOTLB miss in vhost
>> >1.2: snooping the userspace IOTLB invalidation (present to non-present
>> >mapping) and update device IOTLB
>> >
>> >2) with CM enabled, the only thing we can add is snooping the non-present to
>> >present mapping and update the device IOTLB. This is not a requirement since
>> >we still can get this through asking qemu's(1.2) help.
>> >
>> >3) with ATS enabled, guest knows the existence of device IOTLB, and device
>> >IOTLB entires needs to be flushed explicitly by guest. In this case there's
>> >no need to snoop the ordinary IOTLB invalidation in 1.2. We just need to
>> >snoop the device IOTLB specific invalidation request from guest.
>> >
>> >All the above 3 methods work very well, but let's have a look at performance
>> >impact:
>> >
>> >- Method 1 (without CM or ATS), the performance is not the best since guest
>> >does not know about the existence of remote IOTLB, this means the flush of
>> >device IOTLB entry could not be done on demand. One example is some IOMMU
>> >driver (e.g intel) tends to optimize the IOTLB invalidations by issuing a
>> >global invalidation periodically. We need to flush the device IOTLB too in
>> >this case. Thus we can notice some jitter (because of IOTLB miss).
>> >
>> >- Method 2 (with CM but without ATS) seems to be the worst case. It has not
>> >only all problems above a but also a new one: each transition needs to
>> >notify the device explicitly. Even if dpdk use static mappings, all other
>> >devices in the VM use dynamic ones which slows down the whole the system.
>> >According to the test, CM is about 10%-20% slower in real hardware.
>> >
>> >- Method 3 (ATS) can give the best performance, all the problems have gone
>> >since guest can flush the device IOTLB entry on demand. It was defined by
>> >spec and was designed to solve the issues just like what we meet here, and
>> >was supported by modern IOMMUs.
>> >
>> >And what's even better, implementing ATS turns out less than 100 lines of
>> >codes. And it was much more easier to  be enabled on other IOMMU (AMD IOMMU
>> >only needs 20 lines of codes). All other ways (I started and have codes for
>> >method 1 for intel IOMMU) need lots of work specific to each kind of IOMMU.
> method 1 is basically what Aviv implemented except you don't
> need map notifiers, only unmap.
>
>> >
>> >Consider so much advantages by just adding so small lines of codes. I don't
>> >see why we don't need ATS (for the IOOMUs that supports it).
>> >
>> >Thanks
> I am concerned that not all IOMMUs and guests support ATS.
>

For IOMMUs that does not support ATS, we can used method 1.

For legacy guests, it does not even support VIRTIO_F_IOMMU_PLATFORM. So 
probably not an issue.

Thanks
diff mbox

Patch

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 99cfb45..02195d9 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -717,3 +717,19 @@  void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num)
                         PCI_EXT_CAP_DSN_SIZEOF);
     pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num);
 }
+
+void pcie_ats_init(PCIDevice *dev, uint16_t offset)
+{
+    pcie_add_capability(dev, PCI_EXT_CAP_ID_ATS, 0x1,
+                        offset, PCI_EXT_CAP_ATS_SIZEOF);
+
+    dev->exp.ats_cap = offset;
+
+    /* Invalidate Queue Depth 0, Page Aligned Request 0 */
+    pci_set_word(dev->config + offset + PCI_ATS_CAP, 0);
+    /* STU 0, Disabled by default */
+    pci_set_word(dev->config + offset + PCI_ATS_CTRL, 0);
+
+    pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
+}
+
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6ceb43e..e357bdf 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1838,6 +1838,11 @@  static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
          * PCI Power Management Interface Specification.
          */
         pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
+
+        if (proxy->flags & VIRTIO_PCI_FLAG_ATS) {
+            pcie_ats_init(pci_dev, 256);
+        }
+
     } else {
         /*
          * make future invocations of pci_is_express() return false
@@ -1889,6 +1894,8 @@  static Property virtio_pci_properties[] = {
                     VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
     DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
                     VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
+    DEFINE_PROP_BIT("ats", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_ATS_BIT, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index b4edea6..057d49d 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -69,6 +69,7 @@  enum {
     VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
     VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
     VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
+    VIRTIO_PCI_FLAG_ATS_BIT,
 };
 
 /* Need to activate work-arounds for buggy guests at vmstate load. */
@@ -93,6 +94,9 @@  enum {
 #define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
     (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
 
+/* address space translation service */
+#define VIRTIO_PCI_FLAG_ATS (1 << VIRTIO_PCI_FLAG_ATS_BIT)
+
 typedef struct {
     MSIMessage msg;
     int virq;
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 056d25e..b08451d 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -74,6 +74,9 @@  struct PCIExpressDevice {
     /* AER */
     uint16_t aer_cap;
     PCIEAERLog aer_log;
+
+    /* Offset of ATS capability in config space */
+    uint16_t ats_cap;
 };
 
 #define COMPAT_PROP_PCP "power_controller_present"
@@ -120,6 +123,7 @@  void pcie_add_capability(PCIDevice *dev,
 
 void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
+void pcie_ats_init(PCIDevice *dev, uint16_t offset);
 
 extern const VMStateDescription vmstate_pcie_device;
 
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 4040951..ac426a0 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -674,6 +674,7 @@ 
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
+#define PCI_EXT_CAP_ATS_SIZEOF	8
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
 
 /* Advanced Error Reporting */