Message ID | d09ebf953e219ad9250e0f1a29acec4628cd0613.1280958471.git.eduard.munteanu@linux360.ro |
---|---|
State | New |
Headers | show |
On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> wrote: > This introduces emulation for the AMD IOMMU, described in "AMD I/O > Virtualization Technology (IOMMU) Specification". > > Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> > --- > Makefile.target | 2 + > configure | 10 + > hw/amd_iommu.c | 671 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/pc.c | 4 + > hw/pc.h | 3 + > hw/pci_ids.h | 2 + > hw/pci_regs.h | 1 + > 7 files changed, 693 insertions(+), 0 deletions(-) > create mode 100644 hw/amd_iommu.c > > diff --git a/Makefile.target b/Makefile.target > index 70a9c1b..86226a0 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o > obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o > obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o > > +obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o Make this unconditional. > + > # Hardware support > obj-ia64-y += ide.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) > obj-ia64-y += fdc.o mc146818rtc.o serial.o i8259.o ipf.o > diff --git a/configure b/configure > index af50607..7448603 100755 > --- a/configure > +++ b/configure > @@ -317,6 +317,7 @@ io_thread="no" > mixemu="no" > kvm_cap_pit="" > kvm_cap_device_assignment="" > +amd_iommu="no" > kerneldir="" > aix="no" > blobs="yes" > @@ -629,6 +630,8 @@ for opt do > ;; > --enable-kvm-device-assignment) kvm_cap_device_assignment="yes" > ;; > + --enable-amd-iommu-emul) amd_iommu="yes" > + ;; > --enable-profiler) profiler="yes" > ;; > --enable-cocoa) > @@ -871,6 +874,8 @@ echo " --disable-kvm-pit disable KVM pit support" > echo " --enable-kvm-pit enable KVM pit support" > echo " --disable-kvm-device-assignment disable KVM device assignment support" > echo " --enable-kvm-device-assignment enable KVM device assignment support" > +echo " --disable-amd-iommu-emul disable AMD IOMMU emulation" > +echo " --enable-amd-iommu-emul enable AMD IOMMU emulation" > echo " --disable-nptl disable usermode NPTL support" > echo " --enable-nptl enable usermode NPTL support" > echo " --enable-system enable all system emulation targets" > @@ -2251,6 +2256,7 @@ echo "Install blobs $blobs" > echo "KVM support $kvm" > echo "KVM PIT support $kvm_cap_pit" > echo "KVM device assig. $kvm_cap_device_assignment" > +echo "AMD IOMMU emul. $amd_iommu" > echo "fdt support $fdt" > echo "preadv support $preadv" > echo "fdatasync $fdatasync" > @@ -2645,6 +2651,10 @@ case "$target_arch2" in > x86_64) > TARGET_BASE_ARCH=i386 > target_phys_bits=64 > + if test "$amd_iommu" = "yes"; then > + echo "CONFIG_AMD_IOMMU=y" >> $config_target_mak > + echo "CONFIG_PCI_IOMMU=y" >> $config_host_mak > + fi Drop all configure changes. > ;; > ia64) > target_phys_bits=64 > diff --git a/hw/amd_iommu.c b/hw/amd_iommu.c > new file mode 100644 > index 0000000..ff9903e > --- /dev/null > +++ b/hw/amd_iommu.c > @@ -0,0 +1,671 @@ > +/* > + * AMD IOMMU emulation > + * > + * Copyright (c) 2010 Eduard - Gabriel Munteanu > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this software and associated documentation files (the "Software"), to deal > + * in the Software without restriction, including without limitation the rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "pc.h" > +#include "hw.h" > +#include "pci.h" > +#include "qlist.h" > + > +/* Capability registers */ > +#define CAPAB_HEADER 0x00 > +#define CAPAB_REV_TYPE 0x02 > +#define CAPAB_FLAGS 0x03 > +#define CAPAB_BAR_LOW 0x04 > +#define CAPAB_BAR_HIGH 0x08 > +#define CAPAB_RANGE 0x0C > +#define CAPAB_MISC 0x10 > + > +#define CAPAB_SIZE 0x14 > + > +/* Capability header data */ > +#define CAPAB_FLAG_IOTLBSUP (1 << 0) > +#define CAPAB_FLAG_HTTUNNEL (1 << 1) > +#define CAPAB_FLAG_NPCACHE (1 << 2) > +#define CAPAB_INIT_REV (1 << 3) > +#define CAPAB_INIT_TYPE 3 > +#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE) > +#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | CAPAB_FLAG_HTTUNNEL) > +#define CAPAB_INIT_MISC (64 << 15) | (48 << 8) > +#define CAPAB_BAR_MASK ~((1UL << 14) - 1) > + > +/* MMIO registers */ > +#define MMIO_DEVICE_TABLE 0x0000 > +#define MMIO_COMMAND_BASE 0x0008 > +#define MMIO_EVENT_BASE 0x0010 > +#define MMIO_CONTROL 0x0018 > +#define MMIO_EXCL_BASE 0x0020 > +#define MMIO_EXCL_LIMIT 0x0028 > +#define MMIO_COMMAND_HEAD 0x2000 > +#define MMIO_COMMAND_TAIL 0x2008 > +#define MMIO_EVENT_HEAD 0x2010 > +#define MMIO_EVENT_TAIL 0x2018 > +#define MMIO_STATUS 0x2020 > + > +#define MMIO_SIZE 0x4000 > + > +#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) > +#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~MMIO_DEVTAB_SIZE_MASK) > +#define MMIO_DEVTAB_ENTRY_SIZE 32 > +#define MMIO_DEVTAB_SIZE_UNIT 4096 > + > +#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7) > +#define MMIO_CMDBUF_SIZE_MASK 0x0F > +#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK > +#define MMIO_CMDBUF_DEFAULT_SIZE 8 > +#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > +#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > + > +#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7) > +#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK > +#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK > +#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE > +#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > +#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > + > +#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK > +#define MMIO_EXCL_ENABLED_MASK (1ULL << 0) > +#define MMIO_EXCL_ALLOW_MASK (1ULL << 1) > +#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK > +#define MMIO_EXCL_LIMIT_LOW 0xFFF > + > +#define MMIO_CONTROL_IOMMUEN (1ULL << 0) > +#define MMIO_CONTROL_HTTUNEN (1ULL << 1) > +#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2) > +#define MMIO_CONTROL_EVENTINTEN (1ULL << 3) > +#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4) > +#define MMIO_CONTROL_CMDBUFEN (1ULL << 12) > + > +#define MMIO_STATUS_EVTLOG_OF (1ULL << 0) > +#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1) > +#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2) > +#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3) > +#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4) > + > +#define CMDBUF_ID_BYTE 0x07 > +#define CMDBUF_ID_RSHIFT 4 > +#define CMDBUF_ENTRY_SIZE 0x10 > + > +#define CMD_COMPLETION_WAIT 0x01 > +#define CMD_INVAL_DEVTAB_ENTRY 0x02 > +#define CMD_INVAL_IOMMU_PAGES 0x03 > +#define CMD_INVAL_IOTLB_PAGES 0x04 > +#define CMD_INVAL_INTR_TABLE 0x05 > + > +#define DEVTAB_ENTRY_SIZE 32 > + > +/* Device table entry bits 0:63 */ > +#define DEV_VALID (1ULL << 0) > +#define DEV_TRANSLATION_VALID (1ULL << 1) > +#define DEV_MODE_MASK 0x7 > +#define DEV_MODE_RSHIFT 9 > +#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000 > +#define DEV_PT_ROOT_RSHIFT 12 > +#define DEV_PERM_SHIFT 61 > +#define DEV_PERM_READ (1ULL << 61) > +#define DEV_PERM_WRITE (1ULL << 62) > + > +/* Device table entry bits 64:127 */ > +#define DEV_DOMAIN_ID_MASK ((1ULL << 16) - 1) > +#define DEV_IOTLB_SUPPORT (1ULL << 17) > +#define DEV_SUPPRESS_PF (1ULL << 18) > +#define DEV_SUPPRESS_ALL_PF (1ULL << 19) > +#define DEV_IOCTL_MASK ~3 > +#define DEV_IOCTL_RSHIFT 20 > +#define DEV_IOCTL_DENY 0 > +#define DEV_IOCTL_PASSTHROUGH 1 > +#define DEV_IOCTL_TRANSLATE 2 > +#define DEV_CACHE (1ULL << 37) > +#define DEV_SNOOP_DISABLE (1ULL << 38) > +#define DEV_EXCL (1ULL << 39) > + > +struct amd_iommu_invalidator { > + int devfn; > + PCIInvalidateIOTLBFunc *func; > + void *opaque; > + QLIST_ENTRY(amd_iommu_invalidator) list; > +}; This should be AMDIOMMUInvalidator with typedef. > + > +struct amd_iommu_state { > + PCIDevice dev; > + > + int capab_offset; > + unsigned char *capab; > + > + int mmio_index; > + target_phys_addr_t mmio_addr; > + unsigned char *mmio_buf; > + int mmio_enabled; > + > + int enabled; > + int ats_enabled; > + > + target_phys_addr_t devtab; > + size_t devtab_len; > + > + target_phys_addr_t cmdbuf; > + int cmdbuf_enabled; > + size_t cmdbuf_len; > + size_t cmdbuf_head; > + size_t cmdbuf_tail; > + int completion_wait_intr; > + > + target_phys_addr_t evtlog; > + int evtlog_enabled; > + int evtlog_intr; > + size_t evtlog_len; > + size_t evtlog_head; > + size_t evtlog_tail; > + > + target_phys_addr_t excl_base; > + target_phys_addr_t excl_limit; > + int excl_enabled; > + int excl_allow; > + > + QLIST_HEAD(, amd_iommu_invalidator) invalidators; > +}; Likewise, AMDIOMMUState. > +static void amd_iommu_register_invalidator(PCIIOMMU *iommu, > + PCIDevice *dev, > + pci_addr_t addr, > + PCIInvalidateIOTLBFunc *cb, > + void *opaque) > +{ > + struct amd_iommu_invalidator *inval; > + struct amd_iommu_state *st = iommu->opaque; > + > + inval = qemu_malloc(sizeof(struct amd_iommu_invalidator)); > + inval->devfn = dev->devfn; > + inval->func = cb; > + inval->opaque = opaque; > + > + QLIST_INSERT_HEAD(&st->invalidators, inval, list); > +} > + > +static void amd_iommu_completion_wait(struct amd_iommu_state *st, > + uint8_t *cmd) > +{ > + uint64_t addr; > + > + if (cmd[0] & 1) { > + addr = le64_to_cpu(*(uint64_t *) cmd) & 0xFFFFFFFFFFFF8; > + cpu_physical_memory_write(addr, cmd + 8, 8); > + } > + > + if (cmd[0] & 2) > + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_COMWAIT_INTR; > +} > + > +static void amd_iommu_inval_iotlb(struct amd_iommu_state *st, > + uint8_t *cmd) > +{ > + struct amd_iommu_invalidator *inval; > + int devfn = *(uint16_t *) cmd; > + > + QLIST_FOREACH(inval, &st->invalidators, list) { > + if (inval->devfn == devfn) { > + inval->func(inval->opaque); > + QLIST_REMOVE(inval, list); > + } > + } > +} > + > +static void amd_iommu_cmdbuf_run(struct amd_iommu_state *st) > +{ > + uint8_t cmd[16]; > + int type; > + > + if (!st->cmdbuf_enabled) > + return; > + > + /* Check if there's work to do. */ > + if (st->cmdbuf_head == st->cmdbuf_tail) > + return; > + > + cpu_physical_memory_read(st->cmdbuf + st->cmdbuf_head, cmd, 16); > + type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT; > + switch (type) { > + case CMD_COMPLETION_WAIT: > + amd_iommu_completion_wait(st, cmd); > + break; > + case CMD_INVAL_DEVTAB_ENTRY: > + break; > + case CMD_INVAL_IOMMU_PAGES: > + break; > + case CMD_INVAL_IOTLB_PAGES: > + amd_iommu_inval_iotlb(st, cmd); > + break; > + case CMD_INVAL_INTR_TABLE: > + break; > + default: > + break; > + } > + > + /* Increment and wrap head pointer. */ > + st->cmdbuf_head += CMDBUF_ENTRY_SIZE; > + if (st->cmdbuf_head >= st->cmdbuf_len) > + st->cmdbuf_head = 0; > +} > + > +static uint32_t amd_iommu_mmio_buf_read(struct amd_iommu_state *st, > + size_t offset, > + size_t size) > +{ > + ssize_t i; > + uint32_t ret; > + > + if (!size) > + return 0; > + > + ret = st->mmio_buf[offset + size - 1]; > + for (i = size - 2; i >= 0; i--) { > + ret <<= 8; > + ret |= st->mmio_buf[offset + i]; > + } > + > + return ret; > +} > + > +static void amd_iommu_mmio_buf_write(struct amd_iommu_state *st, > + size_t offset, > + size_t size, > + uint32_t val) > +{ > + size_t i; > + > + for (i = 0; i < size; i++) { > + st->mmio_buf[offset + i] = val & 0xFF; > + val >>= 8; > + } > +} > + > +static void amd_iommu_update_mmio(struct amd_iommu_state *st, > + target_phys_addr_t addr) > +{ > + size_t reg = addr & ~0x07; > + uint64_t *base = (uint64_t *) &st->mmio_buf[reg]; > + uint64_t val = *base; > + > + switch (reg) { > + case MMIO_CONTROL: > + st->enabled = !!(val & MMIO_CONTROL_IOMMUEN); > + st->ats_enabled = !!(val & MMIO_CONTROL_HTTUNEN); > + st->evtlog_enabled = st->enabled && > + !!(val & MMIO_CONTROL_EVENTLOGEN); > + st->evtlog_intr = !!(val & MMIO_CONTROL_EVENTINTEN); > + st->completion_wait_intr = !!(val & MMIO_CONTROL_COMWAITINTEN); > + st->cmdbuf_enabled = st->enabled && > + !!(val & MMIO_CONTROL_CMDBUFEN); > + > + /* Update status flags depending on the control register. */ > + if (st->cmdbuf_enabled) > + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_CMDBUF_RUN; > + else > + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_CMDBUF_RUN; > + if (st->evtlog_enabled) > + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_RUN; > + else > + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_EVTLOG_RUN; > + > + amd_iommu_cmdbuf_run(st); > + break; > + case MMIO_DEVICE_TABLE: > + st->devtab = (target_phys_addr_t) (val & MMIO_DEVTAB_BASE_MASK); > + st->devtab_len = ((val & MMIO_DEVTAB_SIZE_MASK) + 1) * > + (MMIO_DEVTAB_SIZE_UNIT / MMIO_DEVTAB_ENTRY_SIZE); > + break; > + case MMIO_COMMAND_BASE: > + st->cmdbuf = (target_phys_addr_t) (val & MMIO_CMDBUF_BASE_MASK); > + st->cmdbuf_len = 1UL << (st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] & > + MMIO_CMDBUF_SIZE_MASK); > + amd_iommu_cmdbuf_run(st); > + break; > + case MMIO_COMMAND_HEAD: > + st->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK; > + amd_iommu_cmdbuf_run(st); > + break; > + case MMIO_COMMAND_TAIL: > + st->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK; > + amd_iommu_cmdbuf_run(st); > + break; > + case MMIO_EVENT_BASE: > + st->evtlog = (target_phys_addr_t) (val & MMIO_EVTLOG_BASE_MASK); > + st->evtlog_len = 1UL << (st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] & > + MMIO_EVTLOG_SIZE_MASK); > + break; > + case MMIO_EVENT_HEAD: > + st->evtlog_head = val & MMIO_EVTLOG_HEAD_MASK; > + break; > + case MMIO_EVENT_TAIL: > + st->evtlog_tail = val & MMIO_EVTLOG_TAIL_MASK; > + break; > + case MMIO_EXCL_BASE: > + st->excl_base = (target_phys_addr_t) (val & MMIO_EXCL_BASE_MASK); > + st->excl_enabled = val & MMIO_EXCL_ENABLED_MASK; > + st->excl_allow = val & MMIO_EXCL_ALLOW_MASK; > + break; > + case MMIO_EXCL_LIMIT: > + st->excl_limit = (target_phys_addr_t) ((val & MMIO_EXCL_LIMIT_MASK) | > + MMIO_EXCL_LIMIT_LOW); > + break; > + default: > + break; > + } > +} > + > +static uint32_t amd_iommu_mmio_readb(void *opaque, target_phys_addr_t addr) > +{ > + struct amd_iommu_state *st = opaque; > + > + return amd_iommu_mmio_buf_read(st, addr, 1); > +} > + > +static uint32_t amd_iommu_mmio_readw(void *opaque, target_phys_addr_t addr) > +{ > + struct amd_iommu_state *st = opaque; > + > + return amd_iommu_mmio_buf_read(st, addr, 2); > +} > + > +static uint32_t amd_iommu_mmio_readl(void *opaque, target_phys_addr_t addr) > +{ > + struct amd_iommu_state *st = opaque; > + > + return amd_iommu_mmio_buf_read(st, addr, 4); > +} > + > +static void amd_iommu_mmio_writeb(void *opaque, > + target_phys_addr_t addr, > + uint32_t val) > +{ > + struct amd_iommu_state *st = opaque; > + > + amd_iommu_mmio_buf_write(st, addr, 1, val); > + amd_iommu_update_mmio(st, addr); > +} > + > +static void amd_iommu_mmio_writew(void *opaque, > + target_phys_addr_t addr, > + uint32_t val) > +{ > + struct amd_iommu_state *st = opaque; > + > + amd_iommu_mmio_buf_write(st, addr, 2, val); > + amd_iommu_update_mmio(st, addr); > +} > + > +static void amd_iommu_mmio_writel(void *opaque, > + target_phys_addr_t addr, > + uint32_t val) > +{ > + struct amd_iommu_state *st = opaque; > + > + amd_iommu_mmio_buf_write(st, addr, 4, val); > + amd_iommu_update_mmio(st, addr); > +} > + > +static CPUReadMemoryFunc * const amd_iommu_mmio_read[] = { > + amd_iommu_mmio_readb, > + amd_iommu_mmio_readw, > + amd_iommu_mmio_readl, > +}; > + > +static CPUWriteMemoryFunc * const amd_iommu_mmio_write[] = { > + amd_iommu_mmio_writeb, > + amd_iommu_mmio_writew, > + amd_iommu_mmio_writel, > +}; > + > +static void amd_iommu_init_mmio(struct amd_iommu_state *st) > +{ > + st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] = MMIO_CMDBUF_DEFAULT_SIZE; > + st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] = MMIO_EVTLOG_DEFAULT_SIZE; > +} > + > +static void amd_iommu_enable_mmio(struct amd_iommu_state *st) > +{ > + target_phys_addr_t addr; > + > + st->mmio_index = cpu_register_io_memory(amd_iommu_mmio_read, > + amd_iommu_mmio_write, st); > + if (st->mmio_index < 0) > + return; > + > + addr = le64_to_cpu(*(uint64_t *) &st->capab[CAPAB_BAR_LOW]) & CAPAB_BAR_MASK; > + cpu_register_physical_memory(addr, MMIO_SIZE, st->mmio_index); > + > + st->mmio_addr = addr; > + st->mmio_buf = qemu_mallocz(MMIO_SIZE); > + st->mmio_enabled = 1; > + amd_iommu_init_mmio(st); > +} > + > +static uint32_t amd_iommu_read_capab(PCIDevice *pci_dev, > + uint32_t addr, int len) > +{ > + return pci_default_cap_read_config(pci_dev, addr, len); > +} > + > +static void amd_iommu_write_capab(PCIDevice *dev, > + uint32_t addr, uint32_t val, int len) > +{ > + struct amd_iommu_state *st; > + unsigned char *capab; > + int reg; > + > + st = DO_UPCAST(struct amd_iommu_state, dev, dev); > + capab = st->capab; > + reg = (addr - 0x40) & ~0x3; /* Get the 32-bits register. */ > + > + switch (reg) { > + case CAPAB_HEADER: > + case CAPAB_MISC: > + /* Read-only. */ > + return; > + case CAPAB_BAR_LOW: > + case CAPAB_BAR_HIGH: > + case CAPAB_RANGE: > + if (st->mmio_enabled) > + return; > + pci_default_cap_write_config(dev, addr, val, len); > + break; > + default: > + return; > + } > + > + if (capab[CAPAB_BAR_LOW] & 0x1) > + amd_iommu_enable_mmio(st); > +} > + > +static int amd_iommu_init_capab(PCIDevice *dev) > +{ > + struct amd_iommu_state *st; > + unsigned char *capab; > + > + st = DO_UPCAST(struct amd_iommu_state, dev, dev); > + capab = st->dev.config + st->capab_offset; > + > + capab[CAPAB_REV_TYPE] = CAPAB_REV_TYPE; > + capab[CAPAB_FLAGS] = CAPAB_FLAGS; > + capab[CAPAB_BAR_LOW] = 0; > + capab[CAPAB_BAR_HIGH] = 0; > + capab[CAPAB_RANGE] = 0; > + *((uint32_t *) &capab[CAPAB_MISC]) = cpu_to_le32(CAPAB_INIT_MISC); > + > + st->capab = capab; > + st->dev.cap.length = CAPAB_SIZE; > + > + return 0; > +} > + > +static int amd_iommu_translate(PCIIOMMU *iommu, > + PCIDevice *dev, > + pci_addr_t addr, > + target_phys_addr_t *paddr, > + int *len, > + unsigned perms); Please move the implementation here to avoid this declaration. > +static int amd_iommu_pci_initfn(PCIDevice *dev) > +{ > + struct amd_iommu_state *st; > + PCIIOMMU *iommu; > + int err; > + > + st = DO_UPCAST(struct amd_iommu_state, dev, dev); > + > + pci_config_set_vendor_id(st->dev.config, PCI_VENDOR_ID_AMD); > + pci_config_set_device_id(st->dev.config, PCI_DEVICE_ID_AMD_IOMMU); > + pci_config_set_class(st->dev.config, PCI_CLASS_SYSTEM_IOMMU); > + > + st->capab_offset = pci_add_capability(&st->dev, > + PCI_CAP_ID_SEC, > + CAPAB_SIZE); > + err = pci_enable_capability_support(&st->dev, st->capab_offset, > + amd_iommu_read_capab, > + amd_iommu_write_capab, > + amd_iommu_init_capab); > + if (err) > + return err; > + > + iommu = qemu_mallocz(sizeof(PCIIOMMU)); > + iommu->opaque = st; > + iommu->translate = amd_iommu_translate; > + iommu->register_iotlb_invalidator = amd_iommu_register_invalidator; > + pci_register_iommu(dev, iommu); I'd avoid the structure and just pass the stuff to pci_register_iommu as function arguments. > + > + return 0; > +} > + > +static const VMStateDescription vmstate_amd_iommu = { > + .name = "amd-iommu", > + .version_id = 1, > + .minimum_version_id = 1, > + .minimum_version_id_old = 1, > + .fields = (VMStateField []) { > + VMSTATE_PCI_DEVICE(dev, struct amd_iommu_state), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static PCIDeviceInfo amd_iommu_pci_info = { > + .qdev.name = "amd-iommu", > + .qdev.desc = "AMD IOMMU", > + .qdev.size = sizeof(struct amd_iommu_state), > + .qdev.vmsd = &vmstate_amd_iommu, > + .init = amd_iommu_pci_initfn, > +}; > + > +void amd_iommu_init(PCIBus *bus) > +{ > + pci_create_simple(bus, -1, "amd-iommu"); > +} Just open code this in pc.c. > + > +static void amd_iommu_register(void) > +{ > + pci_qdev_register(&amd_iommu_pci_info); > +} > + > +device_init(amd_iommu_register); > + > +static void amd_iommu_page_fault(struct amd_iommu_state *st, > + int devfn, > + unsigned domid, > + target_phys_addr_t addr, > + int present, > + int is_write) > +{ > + uint16_t entry[8]; > + uint64_t *entry_addr = (uint64_t *) &entry[4]; > + > + entry[0] = cpu_to_le16(devfn); > + entry[1] = 0; > + entry[2] = cpu_to_le16(domid); > + entry[3] = (2UL << 12) | (!!present << 4) | (!!is_write << 5); > + *entry_addr = cpu_to_le64(addr); > + > + cpu_physical_memory_write((target_phys_addr_t) st->evtlog + st->evtlog_tail, (uint8_t *) &entry, 128); > + st->evtlog_tail += 128; > +} > + > +static inline uint64_t amd_iommu_get_perms(uint64_t entry) > +{ > + return (entry & (DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT; > +} > + > +static int amd_iommu_translate(PCIIOMMU *iommu, > + PCIDevice *dev, > + pci_addr_t addr, > + target_phys_addr_t *paddr, > + int *len, > + unsigned perms) > +{ > + int devfn, present; > + target_phys_addr_t entry_addr, pte_addr; > + uint64_t entry[4], pte, page_offset, pte_perms; > + unsigned level, domid; > + struct amd_iommu_state *st = iommu->opaque; > + > + if (!st->enabled) > + goto no_translation; > + > + /* Get device table entry. */ > + devfn = dev->devfn; > + entry_addr = st->devtab + devfn * DEVTAB_ENTRY_SIZE; > + cpu_physical_memory_read(entry_addr, (uint8_t *) entry, 32); > + > + pte = entry[0]; > + if (!(pte & DEV_VALID) || !(pte & DEV_TRANSLATION_VALID)) { > + goto no_translation; > + } > + domid = entry[1] & DEV_DOMAIN_ID_MASK; > + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > + while (level > 0) { > + /* > + * Check permissions: the bitwise > + * implication perms -> entry_perms must be true. > + */ > + pte_perms = amd_iommu_get_perms(pte); > + present = pte & 1; > + if (!present || perms != (perms & pte_perms)) { > + amd_iommu_page_fault(st, devfn, domid, addr, > + present, !!(perms & IOMMU_PERM_WRITE)); > + return -EPERM; > + } > + > + /* Go to the next lower level. */ > + pte_addr = pte & DEV_PT_ROOT_MASK; > + pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; > + pte = ldq_phys(pte_addr); > + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > + } > + page_offset = addr & 4095; > + *paddr = (pte & DEV_PT_ROOT_MASK) + page_offset; > + *len = 4096 - page_offset; > + > + return 0; > + > +no_translation: > + *paddr = addr; > + *len = INT_MAX; > + return 0; > +} > diff --git a/hw/pc.c b/hw/pc.c > index 186e322..4c929f9 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -1066,6 +1066,10 @@ void pc_pci_device_init(PCIBus *pci_bus) > int max_bus; > int bus; > > +#ifdef CONFIG_AMD_IOMMU > + amd_iommu_init(pci_bus); > +#endif > + > max_bus = drive_get_max_bus(IF_SCSI); > for (bus = 0; bus <= max_bus; bus++) { > pci_create_simple(pci_bus, -1, "lsi53c895a"); > diff --git a/hw/pc.h b/hw/pc.h > index 3ef2f75..255ad93 100644 > --- a/hw/pc.h > +++ b/hw/pc.h > @@ -191,4 +191,7 @@ void extboot_init(BlockDriverState *bs); > > int e820_add_entry(uint64_t, uint64_t, uint32_t); > > +/* amd_iommu.c */ > +void amd_iommu_init(PCIBus *bus); > + > #endif > diff --git a/hw/pci_ids.h b/hw/pci_ids.h > index 39e9f1d..d790312 100644 > --- a/hw/pci_ids.h > +++ b/hw/pci_ids.h > @@ -26,6 +26,7 @@ > > #define PCI_CLASS_MEMORY_RAM 0x0500 > > +#define PCI_CLASS_SYSTEM_IOMMU 0x0806 > #define PCI_CLASS_SYSTEM_OTHER 0x0880 > > #define PCI_CLASS_SERIAL_USB 0x0c03 > @@ -56,6 +57,7 @@ > > #define PCI_VENDOR_ID_AMD 0x1022 > #define PCI_DEVICE_ID_AMD_LANCE 0x2000 > +#define PCI_DEVICE_ID_AMD_IOMMU 0x0000 /* FIXME */ > > #define PCI_VENDOR_ID_MOTOROLA 0x1057 > #define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002 > diff --git a/hw/pci_regs.h b/hw/pci_regs.h > index 1c675dc..6399b5d 100644 > --- a/hw/pci_regs.h > +++ b/hw/pci_regs.h > @@ -216,6 +216,7 @@ > #define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ > #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ > #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ > +#define PCI_CAP_ID_SEC 0x0F /* Secure Device (AMD IOMMU) */ Indentation seems to be off. > #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ > #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ > #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */ > -- > 1.7.1 > > >
On Thu, Aug 05, 2010 at 09:31:58PM +0000, Blue Swirl wrote: > On Wed, Aug 4, 2010 at 10:32 PM, Eduard - Gabriel Munteanu > <eduard.munteanu@linux360.ro> wrote: [snip] > > diff --git a/Makefile.target b/Makefile.target > > index 70a9c1b..86226a0 100644 > > --- a/Makefile.target > > +++ b/Makefile.target > > @@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o > > ??obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o > > ??obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o > > > > +obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o > > Make this unconditional. > [snip] > > Drop all configure changes. > Alright, so it's not going to be a compile-time configurable option. I'll make some cmdline option for it and make really sure I don't mess performance in hot paths. (I'm actually happy to know it's gonna go in that way.) [snip] > > +struct amd_iommu_invalidator { > > + ?? ??int ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? devfn; > > + ?? ??PCIInvalidateIOTLBFunc ??*func; > > + ?? ??void ?? ?? ?? ?? ?? ?? ?? ?? ?? ??*opaque; > > + ?? ??QLIST_ENTRY(amd_iommu_invalidator) list; > > +}; > > This should be AMDIOMMUInvalidator with typedef. > > > + > > +struct amd_iommu_state { [snip] > > +}; > > Likewise, AMDIOMMUState. > [snip] > > +static int amd_iommu_translate(PCIIOMMU *iommu, > > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? PCIDevice *dev, > > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? pci_addr_t addr, > > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? target_phys_addr_t *paddr, > > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? int *len, > > + ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? unsigned perms); > > Please move the implementation here to avoid this declaration. > [snip] > > + ?? ??iommu = qemu_mallocz(sizeof(PCIIOMMU)); > > + ?? ??iommu->opaque = st; > > + ?? ??iommu->translate = amd_iommu_translate; > > + ?? ??iommu->register_iotlb_invalidator = amd_iommu_register_invalidator; > > + ?? ??pci_register_iommu(dev, iommu); > > I'd avoid the structure and just pass the stuff to pci_register_iommu > as function arguments. > [snip] > > +void amd_iommu_init(PCIBus *bus) > > +{ > > + ?? ??pci_create_simple(bus, -1, "amd-iommu"); > > +} > > Just open code this in pc.c. > Roger, I'll fix these. [snip] > > ??#define PCI_VENDOR_ID_MOTOROLA ?? ?? ?? ?? ?? 0x1057 > > ??#define PCI_DEVICE_ID_MOTOROLA_MPC106 ?? ??0x0002 > > diff --git a/hw/pci_regs.h b/hw/pci_regs.h > > index 1c675dc..6399b5d 100644 > > --- a/hw/pci_regs.h > > +++ b/hw/pci_regs.h > > @@ -216,6 +216,7 @@ > > ??#define ??PCI_CAP_ID_SHPC ?? ?? ?? 0x0C ?? ??/* PCI Standard Hot-Plug Controller */ > > ??#define ??PCI_CAP_ID_SSVID ?? ?? ??0x0D ?? ??/* Bridge subsystem vendor/device ID */ > > ??#define ??PCI_CAP_ID_AGP3 ?? ?? ?? 0x0E ?? ??/* AGP Target PCI-PCI bridge */ > > +#define ??PCI_CAP_ID_SEC ?? ?? 0x0F ?? ??/* Secure Device (AMD IOMMU) */ > > Indentation seems to be off. > > > ??#define ??PCI_CAP_ID_EXP ?? ?? ?? ??0x10 ?? ??/* PCI Express */ > > ??#define ??PCI_CAP_ID_MSIX ?? ?? ?? 0x11 ?? ??/* MSI-X */ > > ??#define ??PCI_CAP_ID_AF ?? ?? ?? ?? 0x13 ?? ??/* PCI Advanced Features */ > > -- > > 1.7.1 The original has tabs instead of spaces, but my changes line up properly. Which way should I go, convert it all to spaces, add my line with tabs or leave it like this? Of course, any cleanup would go in a separate patch. Thanks, Eduard
diff --git a/Makefile.target b/Makefile.target index 70a9c1b..86226a0 100644 --- a/Makefile.target +++ b/Makefile.target @@ -219,6 +219,8 @@ obj-i386-y += pcspk.o i8254.o obj-i386-$(CONFIG_KVM_PIT) += i8254-kvm.o obj-i386-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += device-assignment.o +obj-i386-$(CONFIG_AMD_IOMMU) += amd_iommu.o + # Hardware support obj-ia64-y += ide.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) obj-ia64-y += fdc.o mc146818rtc.o serial.o i8259.o ipf.o diff --git a/configure b/configure index af50607..7448603 100755 --- a/configure +++ b/configure @@ -317,6 +317,7 @@ io_thread="no" mixemu="no" kvm_cap_pit="" kvm_cap_device_assignment="" +amd_iommu="no" kerneldir="" aix="no" blobs="yes" @@ -629,6 +630,8 @@ for opt do ;; --enable-kvm-device-assignment) kvm_cap_device_assignment="yes" ;; + --enable-amd-iommu-emul) amd_iommu="yes" + ;; --enable-profiler) profiler="yes" ;; --enable-cocoa) @@ -871,6 +874,8 @@ echo " --disable-kvm-pit disable KVM pit support" echo " --enable-kvm-pit enable KVM pit support" echo " --disable-kvm-device-assignment disable KVM device assignment support" echo " --enable-kvm-device-assignment enable KVM device assignment support" +echo " --disable-amd-iommu-emul disable AMD IOMMU emulation" +echo " --enable-amd-iommu-emul enable AMD IOMMU emulation" echo " --disable-nptl disable usermode NPTL support" echo " --enable-nptl enable usermode NPTL support" echo " --enable-system enable all system emulation targets" @@ -2251,6 +2256,7 @@ echo "Install blobs $blobs" echo "KVM support $kvm" echo "KVM PIT support $kvm_cap_pit" echo "KVM device assig. $kvm_cap_device_assignment" +echo "AMD IOMMU emul. $amd_iommu" echo "fdt support $fdt" echo "preadv support $preadv" echo "fdatasync $fdatasync" @@ -2645,6 +2651,10 @@ case "$target_arch2" in x86_64) TARGET_BASE_ARCH=i386 target_phys_bits=64 + if test "$amd_iommu" = "yes"; then + echo "CONFIG_AMD_IOMMU=y" >> $config_target_mak + echo "CONFIG_PCI_IOMMU=y" >> $config_host_mak + fi ;; ia64) target_phys_bits=64 diff --git a/hw/amd_iommu.c b/hw/amd_iommu.c new file mode 100644 index 0000000..ff9903e --- /dev/null +++ b/hw/amd_iommu.c @@ -0,0 +1,671 @@ +/* + * AMD IOMMU emulation + * + * Copyright (c) 2010 Eduard - Gabriel Munteanu + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "pc.h" +#include "hw.h" +#include "pci.h" +#include "qlist.h" + +/* Capability registers */ +#define CAPAB_HEADER 0x00 +#define CAPAB_REV_TYPE 0x02 +#define CAPAB_FLAGS 0x03 +#define CAPAB_BAR_LOW 0x04 +#define CAPAB_BAR_HIGH 0x08 +#define CAPAB_RANGE 0x0C +#define CAPAB_MISC 0x10 + +#define CAPAB_SIZE 0x14 + +/* Capability header data */ +#define CAPAB_FLAG_IOTLBSUP (1 << 0) +#define CAPAB_FLAG_HTTUNNEL (1 << 1) +#define CAPAB_FLAG_NPCACHE (1 << 2) +#define CAPAB_INIT_REV (1 << 3) +#define CAPAB_INIT_TYPE 3 +#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE) +#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | CAPAB_FLAG_HTTUNNEL) +#define CAPAB_INIT_MISC (64 << 15) | (48 << 8) +#define CAPAB_BAR_MASK ~((1UL << 14) - 1) + +/* MMIO registers */ +#define MMIO_DEVICE_TABLE 0x0000 +#define MMIO_COMMAND_BASE 0x0008 +#define MMIO_EVENT_BASE 0x0010 +#define MMIO_CONTROL 0x0018 +#define MMIO_EXCL_BASE 0x0020 +#define MMIO_EXCL_LIMIT 0x0028 +#define MMIO_COMMAND_HEAD 0x2000 +#define MMIO_COMMAND_TAIL 0x2008 +#define MMIO_EVENT_HEAD 0x2010 +#define MMIO_EVENT_TAIL 0x2018 +#define MMIO_STATUS 0x2020 + +#define MMIO_SIZE 0x4000 + +#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) +#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~MMIO_DEVTAB_SIZE_MASK) +#define MMIO_DEVTAB_ENTRY_SIZE 32 +#define MMIO_DEVTAB_SIZE_UNIT 4096 + +#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7) +#define MMIO_CMDBUF_SIZE_MASK 0x0F +#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK +#define MMIO_CMDBUF_DEFAULT_SIZE 8 +#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) +#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK + +#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7) +#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK +#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK +#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE +#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) +#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK + +#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK +#define MMIO_EXCL_ENABLED_MASK (1ULL << 0) +#define MMIO_EXCL_ALLOW_MASK (1ULL << 1) +#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK +#define MMIO_EXCL_LIMIT_LOW 0xFFF + +#define MMIO_CONTROL_IOMMUEN (1ULL << 0) +#define MMIO_CONTROL_HTTUNEN (1ULL << 1) +#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2) +#define MMIO_CONTROL_EVENTINTEN (1ULL << 3) +#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4) +#define MMIO_CONTROL_CMDBUFEN (1ULL << 12) + +#define MMIO_STATUS_EVTLOG_OF (1ULL << 0) +#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1) +#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2) +#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3) +#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4) + +#define CMDBUF_ID_BYTE 0x07 +#define CMDBUF_ID_RSHIFT 4 +#define CMDBUF_ENTRY_SIZE 0x10 + +#define CMD_COMPLETION_WAIT 0x01 +#define CMD_INVAL_DEVTAB_ENTRY 0x02 +#define CMD_INVAL_IOMMU_PAGES 0x03 +#define CMD_INVAL_IOTLB_PAGES 0x04 +#define CMD_INVAL_INTR_TABLE 0x05 + +#define DEVTAB_ENTRY_SIZE 32 + +/* Device table entry bits 0:63 */ +#define DEV_VALID (1ULL << 0) +#define DEV_TRANSLATION_VALID (1ULL << 1) +#define DEV_MODE_MASK 0x7 +#define DEV_MODE_RSHIFT 9 +#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000 +#define DEV_PT_ROOT_RSHIFT 12 +#define DEV_PERM_SHIFT 61 +#define DEV_PERM_READ (1ULL << 61) +#define DEV_PERM_WRITE (1ULL << 62) + +/* Device table entry bits 64:127 */ +#define DEV_DOMAIN_ID_MASK ((1ULL << 16) - 1) +#define DEV_IOTLB_SUPPORT (1ULL << 17) +#define DEV_SUPPRESS_PF (1ULL << 18) +#define DEV_SUPPRESS_ALL_PF (1ULL << 19) +#define DEV_IOCTL_MASK ~3 +#define DEV_IOCTL_RSHIFT 20 +#define DEV_IOCTL_DENY 0 +#define DEV_IOCTL_PASSTHROUGH 1 +#define DEV_IOCTL_TRANSLATE 2 +#define DEV_CACHE (1ULL << 37) +#define DEV_SNOOP_DISABLE (1ULL << 38) +#define DEV_EXCL (1ULL << 39) + +struct amd_iommu_invalidator { + int devfn; + PCIInvalidateIOTLBFunc *func; + void *opaque; + QLIST_ENTRY(amd_iommu_invalidator) list; +}; + +struct amd_iommu_state { + PCIDevice dev; + + int capab_offset; + unsigned char *capab; + + int mmio_index; + target_phys_addr_t mmio_addr; + unsigned char *mmio_buf; + int mmio_enabled; + + int enabled; + int ats_enabled; + + target_phys_addr_t devtab; + size_t devtab_len; + + target_phys_addr_t cmdbuf; + int cmdbuf_enabled; + size_t cmdbuf_len; + size_t cmdbuf_head; + size_t cmdbuf_tail; + int completion_wait_intr; + + target_phys_addr_t evtlog; + int evtlog_enabled; + int evtlog_intr; + size_t evtlog_len; + size_t evtlog_head; + size_t evtlog_tail; + + target_phys_addr_t excl_base; + target_phys_addr_t excl_limit; + int excl_enabled; + int excl_allow; + + QLIST_HEAD(, amd_iommu_invalidator) invalidators; +}; + +static void amd_iommu_register_invalidator(PCIIOMMU *iommu, + PCIDevice *dev, + pci_addr_t addr, + PCIInvalidateIOTLBFunc *cb, + void *opaque) +{ + struct amd_iommu_invalidator *inval; + struct amd_iommu_state *st = iommu->opaque; + + inval = qemu_malloc(sizeof(struct amd_iommu_invalidator)); + inval->devfn = dev->devfn; + inval->func = cb; + inval->opaque = opaque; + + QLIST_INSERT_HEAD(&st->invalidators, inval, list); +} + +static void amd_iommu_completion_wait(struct amd_iommu_state *st, + uint8_t *cmd) +{ + uint64_t addr; + + if (cmd[0] & 1) { + addr = le64_to_cpu(*(uint64_t *) cmd) & 0xFFFFFFFFFFFF8; + cpu_physical_memory_write(addr, cmd + 8, 8); + } + + if (cmd[0] & 2) + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_COMWAIT_INTR; +} + +static void amd_iommu_inval_iotlb(struct amd_iommu_state *st, + uint8_t *cmd) +{ + struct amd_iommu_invalidator *inval; + int devfn = *(uint16_t *) cmd; + + QLIST_FOREACH(inval, &st->invalidators, list) { + if (inval->devfn == devfn) { + inval->func(inval->opaque); + QLIST_REMOVE(inval, list); + } + } +} + +static void amd_iommu_cmdbuf_run(struct amd_iommu_state *st) +{ + uint8_t cmd[16]; + int type; + + if (!st->cmdbuf_enabled) + return; + + /* Check if there's work to do. */ + if (st->cmdbuf_head == st->cmdbuf_tail) + return; + + cpu_physical_memory_read(st->cmdbuf + st->cmdbuf_head, cmd, 16); + type = cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT; + switch (type) { + case CMD_COMPLETION_WAIT: + amd_iommu_completion_wait(st, cmd); + break; + case CMD_INVAL_DEVTAB_ENTRY: + break; + case CMD_INVAL_IOMMU_PAGES: + break; + case CMD_INVAL_IOTLB_PAGES: + amd_iommu_inval_iotlb(st, cmd); + break; + case CMD_INVAL_INTR_TABLE: + break; + default: + break; + } + + /* Increment and wrap head pointer. */ + st->cmdbuf_head += CMDBUF_ENTRY_SIZE; + if (st->cmdbuf_head >= st->cmdbuf_len) + st->cmdbuf_head = 0; +} + +static uint32_t amd_iommu_mmio_buf_read(struct amd_iommu_state *st, + size_t offset, + size_t size) +{ + ssize_t i; + uint32_t ret; + + if (!size) + return 0; + + ret = st->mmio_buf[offset + size - 1]; + for (i = size - 2; i >= 0; i--) { + ret <<= 8; + ret |= st->mmio_buf[offset + i]; + } + + return ret; +} + +static void amd_iommu_mmio_buf_write(struct amd_iommu_state *st, + size_t offset, + size_t size, + uint32_t val) +{ + size_t i; + + for (i = 0; i < size; i++) { + st->mmio_buf[offset + i] = val & 0xFF; + val >>= 8; + } +} + +static void amd_iommu_update_mmio(struct amd_iommu_state *st, + target_phys_addr_t addr) +{ + size_t reg = addr & ~0x07; + uint64_t *base = (uint64_t *) &st->mmio_buf[reg]; + uint64_t val = *base; + + switch (reg) { + case MMIO_CONTROL: + st->enabled = !!(val & MMIO_CONTROL_IOMMUEN); + st->ats_enabled = !!(val & MMIO_CONTROL_HTTUNEN); + st->evtlog_enabled = st->enabled && + !!(val & MMIO_CONTROL_EVENTLOGEN); + st->evtlog_intr = !!(val & MMIO_CONTROL_EVENTINTEN); + st->completion_wait_intr = !!(val & MMIO_CONTROL_COMWAITINTEN); + st->cmdbuf_enabled = st->enabled && + !!(val & MMIO_CONTROL_CMDBUFEN); + + /* Update status flags depending on the control register. */ + if (st->cmdbuf_enabled) + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_CMDBUF_RUN; + else + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_CMDBUF_RUN; + if (st->evtlog_enabled) + st->mmio_buf[MMIO_STATUS] |= MMIO_STATUS_EVTLOG_RUN; + else + st->mmio_buf[MMIO_STATUS] &= ~MMIO_STATUS_EVTLOG_RUN; + + amd_iommu_cmdbuf_run(st); + break; + case MMIO_DEVICE_TABLE: + st->devtab = (target_phys_addr_t) (val & MMIO_DEVTAB_BASE_MASK); + st->devtab_len = ((val & MMIO_DEVTAB_SIZE_MASK) + 1) * + (MMIO_DEVTAB_SIZE_UNIT / MMIO_DEVTAB_ENTRY_SIZE); + break; + case MMIO_COMMAND_BASE: + st->cmdbuf = (target_phys_addr_t) (val & MMIO_CMDBUF_BASE_MASK); + st->cmdbuf_len = 1UL << (st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] & + MMIO_CMDBUF_SIZE_MASK); + amd_iommu_cmdbuf_run(st); + break; + case MMIO_COMMAND_HEAD: + st->cmdbuf_head = val & MMIO_CMDBUF_HEAD_MASK; + amd_iommu_cmdbuf_run(st); + break; + case MMIO_COMMAND_TAIL: + st->cmdbuf_tail = val & MMIO_CMDBUF_TAIL_MASK; + amd_iommu_cmdbuf_run(st); + break; + case MMIO_EVENT_BASE: + st->evtlog = (target_phys_addr_t) (val & MMIO_EVTLOG_BASE_MASK); + st->evtlog_len = 1UL << (st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] & + MMIO_EVTLOG_SIZE_MASK); + break; + case MMIO_EVENT_HEAD: + st->evtlog_head = val & MMIO_EVTLOG_HEAD_MASK; + break; + case MMIO_EVENT_TAIL: + st->evtlog_tail = val & MMIO_EVTLOG_TAIL_MASK; + break; + case MMIO_EXCL_BASE: + st->excl_base = (target_phys_addr_t) (val & MMIO_EXCL_BASE_MASK); + st->excl_enabled = val & MMIO_EXCL_ENABLED_MASK; + st->excl_allow = val & MMIO_EXCL_ALLOW_MASK; + break; + case MMIO_EXCL_LIMIT: + st->excl_limit = (target_phys_addr_t) ((val & MMIO_EXCL_LIMIT_MASK) | + MMIO_EXCL_LIMIT_LOW); + break; + default: + break; + } +} + +static uint32_t amd_iommu_mmio_readb(void *opaque, target_phys_addr_t addr) +{ + struct amd_iommu_state *st = opaque; + + return amd_iommu_mmio_buf_read(st, addr, 1); +} + +static uint32_t amd_iommu_mmio_readw(void *opaque, target_phys_addr_t addr) +{ + struct amd_iommu_state *st = opaque; + + return amd_iommu_mmio_buf_read(st, addr, 2); +} + +static uint32_t amd_iommu_mmio_readl(void *opaque, target_phys_addr_t addr) +{ + struct amd_iommu_state *st = opaque; + + return amd_iommu_mmio_buf_read(st, addr, 4); +} + +static void amd_iommu_mmio_writeb(void *opaque, + target_phys_addr_t addr, + uint32_t val) +{ + struct amd_iommu_state *st = opaque; + + amd_iommu_mmio_buf_write(st, addr, 1, val); + amd_iommu_update_mmio(st, addr); +} + +static void amd_iommu_mmio_writew(void *opaque, + target_phys_addr_t addr, + uint32_t val) +{ + struct amd_iommu_state *st = opaque; + + amd_iommu_mmio_buf_write(st, addr, 2, val); + amd_iommu_update_mmio(st, addr); +} + +static void amd_iommu_mmio_writel(void *opaque, + target_phys_addr_t addr, + uint32_t val) +{ + struct amd_iommu_state *st = opaque; + + amd_iommu_mmio_buf_write(st, addr, 4, val); + amd_iommu_update_mmio(st, addr); +} + +static CPUReadMemoryFunc * const amd_iommu_mmio_read[] = { + amd_iommu_mmio_readb, + amd_iommu_mmio_readw, + amd_iommu_mmio_readl, +}; + +static CPUWriteMemoryFunc * const amd_iommu_mmio_write[] = { + amd_iommu_mmio_writeb, + amd_iommu_mmio_writew, + amd_iommu_mmio_writel, +}; + +static void amd_iommu_init_mmio(struct amd_iommu_state *st) +{ + st->mmio_buf[MMIO_CMDBUF_SIZE_BYTE] = MMIO_CMDBUF_DEFAULT_SIZE; + st->mmio_buf[MMIO_EVTLOG_SIZE_BYTE] = MMIO_EVTLOG_DEFAULT_SIZE; +} + +static void amd_iommu_enable_mmio(struct amd_iommu_state *st) +{ + target_phys_addr_t addr; + + st->mmio_index = cpu_register_io_memory(amd_iommu_mmio_read, + amd_iommu_mmio_write, st); + if (st->mmio_index < 0) + return; + + addr = le64_to_cpu(*(uint64_t *) &st->capab[CAPAB_BAR_LOW]) & CAPAB_BAR_MASK; + cpu_register_physical_memory(addr, MMIO_SIZE, st->mmio_index); + + st->mmio_addr = addr; + st->mmio_buf = qemu_mallocz(MMIO_SIZE); + st->mmio_enabled = 1; + amd_iommu_init_mmio(st); +} + +static uint32_t amd_iommu_read_capab(PCIDevice *pci_dev, + uint32_t addr, int len) +{ + return pci_default_cap_read_config(pci_dev, addr, len); +} + +static void amd_iommu_write_capab(PCIDevice *dev, + uint32_t addr, uint32_t val, int len) +{ + struct amd_iommu_state *st; + unsigned char *capab; + int reg; + + st = DO_UPCAST(struct amd_iommu_state, dev, dev); + capab = st->capab; + reg = (addr - 0x40) & ~0x3; /* Get the 32-bits register. */ + + switch (reg) { + case CAPAB_HEADER: + case CAPAB_MISC: + /* Read-only. */ + return; + case CAPAB_BAR_LOW: + case CAPAB_BAR_HIGH: + case CAPAB_RANGE: + if (st->mmio_enabled) + return; + pci_default_cap_write_config(dev, addr, val, len); + break; + default: + return; + } + + if (capab[CAPAB_BAR_LOW] & 0x1) + amd_iommu_enable_mmio(st); +} + +static int amd_iommu_init_capab(PCIDevice *dev) +{ + struct amd_iommu_state *st; + unsigned char *capab; + + st = DO_UPCAST(struct amd_iommu_state, dev, dev); + capab = st->dev.config + st->capab_offset; + + capab[CAPAB_REV_TYPE] = CAPAB_REV_TYPE; + capab[CAPAB_FLAGS] = CAPAB_FLAGS; + capab[CAPAB_BAR_LOW] = 0; + capab[CAPAB_BAR_HIGH] = 0; + capab[CAPAB_RANGE] = 0; + *((uint32_t *) &capab[CAPAB_MISC]) = cpu_to_le32(CAPAB_INIT_MISC); + + st->capab = capab; + st->dev.cap.length = CAPAB_SIZE; + + return 0; +} + +static int amd_iommu_translate(PCIIOMMU *iommu, + PCIDevice *dev, + pci_addr_t addr, + target_phys_addr_t *paddr, + int *len, + unsigned perms); + +static int amd_iommu_pci_initfn(PCIDevice *dev) +{ + struct amd_iommu_state *st; + PCIIOMMU *iommu; + int err; + + st = DO_UPCAST(struct amd_iommu_state, dev, dev); + + pci_config_set_vendor_id(st->dev.config, PCI_VENDOR_ID_AMD); + pci_config_set_device_id(st->dev.config, PCI_DEVICE_ID_AMD_IOMMU); + pci_config_set_class(st->dev.config, PCI_CLASS_SYSTEM_IOMMU); + + st->capab_offset = pci_add_capability(&st->dev, + PCI_CAP_ID_SEC, + CAPAB_SIZE); + err = pci_enable_capability_support(&st->dev, st->capab_offset, + amd_iommu_read_capab, + amd_iommu_write_capab, + amd_iommu_init_capab); + if (err) + return err; + + iommu = qemu_mallocz(sizeof(PCIIOMMU)); + iommu->opaque = st; + iommu->translate = amd_iommu_translate; + iommu->register_iotlb_invalidator = amd_iommu_register_invalidator; + pci_register_iommu(dev, iommu); + + return 0; +} + +static const VMStateDescription vmstate_amd_iommu = { + .name = "amd-iommu", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .fields = (VMStateField []) { + VMSTATE_PCI_DEVICE(dev, struct amd_iommu_state), + VMSTATE_END_OF_LIST() + } +}; + +static PCIDeviceInfo amd_iommu_pci_info = { + .qdev.name = "amd-iommu", + .qdev.desc = "AMD IOMMU", + .qdev.size = sizeof(struct amd_iommu_state), + .qdev.vmsd = &vmstate_amd_iommu, + .init = amd_iommu_pci_initfn, +}; + +void amd_iommu_init(PCIBus *bus) +{ + pci_create_simple(bus, -1, "amd-iommu"); +} + +static void amd_iommu_register(void) +{ + pci_qdev_register(&amd_iommu_pci_info); +} + +device_init(amd_iommu_register); + +static void amd_iommu_page_fault(struct amd_iommu_state *st, + int devfn, + unsigned domid, + target_phys_addr_t addr, + int present, + int is_write) +{ + uint16_t entry[8]; + uint64_t *entry_addr = (uint64_t *) &entry[4]; + + entry[0] = cpu_to_le16(devfn); + entry[1] = 0; + entry[2] = cpu_to_le16(domid); + entry[3] = (2UL << 12) | (!!present << 4) | (!!is_write << 5); + *entry_addr = cpu_to_le64(addr); + + cpu_physical_memory_write((target_phys_addr_t) st->evtlog + st->evtlog_tail, (uint8_t *) &entry, 128); + st->evtlog_tail += 128; +} + +static inline uint64_t amd_iommu_get_perms(uint64_t entry) +{ + return (entry & (DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SHIFT; +} + +static int amd_iommu_translate(PCIIOMMU *iommu, + PCIDevice *dev, + pci_addr_t addr, + target_phys_addr_t *paddr, + int *len, + unsigned perms) +{ + int devfn, present; + target_phys_addr_t entry_addr, pte_addr; + uint64_t entry[4], pte, page_offset, pte_perms; + unsigned level, domid; + struct amd_iommu_state *st = iommu->opaque; + + if (!st->enabled) + goto no_translation; + + /* Get device table entry. */ + devfn = dev->devfn; + entry_addr = st->devtab + devfn * DEVTAB_ENTRY_SIZE; + cpu_physical_memory_read(entry_addr, (uint8_t *) entry, 32); + + pte = entry[0]; + if (!(pte & DEV_VALID) || !(pte & DEV_TRANSLATION_VALID)) { + goto no_translation; + } + domid = entry[1] & DEV_DOMAIN_ID_MASK; + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; + while (level > 0) { + /* + * Check permissions: the bitwise + * implication perms -> entry_perms must be true. + */ + pte_perms = amd_iommu_get_perms(pte); + present = pte & 1; + if (!present || perms != (perms & pte_perms)) { + amd_iommu_page_fault(st, devfn, domid, addr, + present, !!(perms & IOMMU_PERM_WRITE)); + return -EPERM; + } + + /* Go to the next lower level. */ + pte_addr = pte & DEV_PT_ROOT_MASK; + pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3; + pte = ldq_phys(pte_addr); + level = (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; + } + page_offset = addr & 4095; + *paddr = (pte & DEV_PT_ROOT_MASK) + page_offset; + *len = 4096 - page_offset; + + return 0; + +no_translation: + *paddr = addr; + *len = INT_MAX; + return 0; +} diff --git a/hw/pc.c b/hw/pc.c index 186e322..4c929f9 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -1066,6 +1066,10 @@ void pc_pci_device_init(PCIBus *pci_bus) int max_bus; int bus; +#ifdef CONFIG_AMD_IOMMU + amd_iommu_init(pci_bus); +#endif + max_bus = drive_get_max_bus(IF_SCSI); for (bus = 0; bus <= max_bus; bus++) { pci_create_simple(pci_bus, -1, "lsi53c895a"); diff --git a/hw/pc.h b/hw/pc.h index 3ef2f75..255ad93 100644 --- a/hw/pc.h +++ b/hw/pc.h @@ -191,4 +191,7 @@ void extboot_init(BlockDriverState *bs); int e820_add_entry(uint64_t, uint64_t, uint32_t); +/* amd_iommu.c */ +void amd_iommu_init(PCIBus *bus); + #endif diff --git a/hw/pci_ids.h b/hw/pci_ids.h index 39e9f1d..d790312 100644 --- a/hw/pci_ids.h +++ b/hw/pci_ids.h @@ -26,6 +26,7 @@ #define PCI_CLASS_MEMORY_RAM 0x0500 +#define PCI_CLASS_SYSTEM_IOMMU 0x0806 #define PCI_CLASS_SYSTEM_OTHER 0x0880 #define PCI_CLASS_SERIAL_USB 0x0c03 @@ -56,6 +57,7 @@ #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 +#define PCI_DEVICE_ID_AMD_IOMMU 0x0000 /* FIXME */ #define PCI_VENDOR_ID_MOTOROLA 0x1057 #define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002 diff --git a/hw/pci_regs.h b/hw/pci_regs.h index 1c675dc..6399b5d 100644 --- a/hw/pci_regs.h +++ b/hw/pci_regs.h @@ -216,6 +216,7 @@ #define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */ #define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */ +#define PCI_CAP_ID_SEC 0x0F /* Secure Device (AMD IOMMU) */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ #define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */
This introduces emulation for the AMD IOMMU, described in "AMD I/O Virtualization Technology (IOMMU) Specification". Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro> --- Makefile.target | 2 + configure | 10 + hw/amd_iommu.c | 671 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/pc.c | 4 + hw/pc.h | 3 + hw/pci_ids.h | 2 + hw/pci_regs.h | 1 + 7 files changed, 693 insertions(+), 0 deletions(-) create mode 100644 hw/amd_iommu.c