Message ID | 1349962023-560-5-git-send-email-avi@redhat.com |
---|---|
State | New |
Headers | show |
Il 11/10/2012 15:27, Avi Kivity ha scritto: > -static int spapr_tce_translate(DMAContext *dma, > - dma_addr_t addr, > - target_phys_addr_t *paddr, > - target_phys_addr_t *len, > - DMADirection dir) > +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, > + bool is_write) > { > sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); > - enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE) > - ? SPAPR_TCE_WO : SPAPR_TCE_RO; > + enum sPAPRTCEAccess access = is_write ? SPAPR_TCE_WO : SPAPR_TCE_RO; > uint64_t tce; > > #ifdef DEBUG_TCE > @@ -84,29 +80,27 @@ static int spapr_tce_translate(DMAContext *dma, > #ifdef DEBUG_TCE > fprintf(stderr, "spapr_tce_translate out of bounds\n"); > #endif > - return -EFAULT; > + return (IOMMUTLBEntry) { .valid = false }; > } > > tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce; > > /* Check TCE */ > if (!(tce & access)) { > - return -EPERM; > + return (IOMMUTLBEntry) { .valid = false }; > } > > - /* How much til end of page ? */ > - *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1; > - > - /* Translate */ > - *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) | > - (addr & SPAPR_TCE_PAGE_MASK); > - > #ifdef DEBUG_TCE > fprintf(stderr, " -> *paddr=0x" TARGET_FMT_plx ", *len=0x" > - TARGET_FMT_plx "\n", *paddr, *len); > + TARGET_FMT_plx "\n", (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1); > #endif > > - return 0; > + return (IOMMUTLBEntry) { > + .device_addr = addr & SPAPR_TCE_PAGE_MASK, > + .translated_addr = (tce & ~SPAPR_TCE_PAGE_MASK), > + .addr_mask = SPAPR_TCE_PAGE_MASK, > + .valid = true, > + }; > } > > DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) > @@ -118,7 +112,7 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) > } > > tcet = g_malloc0(sizeof(*tcet)); > - dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL); > + dma_context_init(&tcet->dma, &address_space_memory, NULL, NULL, NULL); > > tcet->liobn = liobn; > tcet->window_size = window_size; > @@ -253,7 +247,8 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, > return 0; > } > > - if (iommu->translate == spapr_tce_translate) { > + /* FIXME: WHAT?? */ > + if (true) { > sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu); > return spapr_dma_dt(fdt, node_off, propname, > tcet->liobn, 0, tcet->window_size); > diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c > index 661c05b..24f5b46 100644 > --- a/hw/spapr_pci.c > +++ b/hw/spapr_pci.c > @@ -506,14 +506,30 @@ static void spapr_msi_write(void *opaque, target_phys_addr_t addr, > /* > * PHB PCI device > */ > -static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque, > - int devfn) > +static MemoryRegion *spapr_pci_dma_iommu_new(PCIBus *bus, void *opaque, int devfn) > { > sPAPRPHBState *phb = opaque; > > - return phb->dma; > + return &phb->iommu; > } > > +static void spapr_pci_dma_iommu_destroy(MemoryRegion *iommu) > +{ > + /* iommu is shared among devices, do nothing */ > +} > + > +static IOMMUTLBEntry spapr_phb_translate(MemoryRegion *iommu, target_phys_addr_t addr, > + bool is_write) > +{ > + sPAPRPHBState *phb = container_of(iommu, sPAPRPHBState, iommu); > + > + return spapr_tce_translate(phb->dma, addr, is_write); > +} IIUC, sPAPRTCETable could drop the DMAContext field and just include the actual translation info. spapr_tce_new_dma_context becomes spapr_tce_new and returns an opaque sPAPRTCETable struct that is passed back to spapr_tce_translate. And DMAContext can disappear and will be replaced with just an AddressSpace *. I like it! Paolo
On 10/11/2012 03:53 PM, Paolo Bonzini wrote: > Il 11/10/2012 15:27, Avi Kivity ha scritto: >> -static int spapr_tce_translate(DMAContext *dma, >> - dma_addr_t addr, >> - target_phys_addr_t *paddr, >> - target_phys_addr_t *len, >> - DMADirection dir) >> +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, >> + bool is_write) >> { >> sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); >> - enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE) >> - ? SPAPR_TCE_WO : SPAPR_TCE_RO; >> + enum sPAPRTCEAccess access = is_write ? SPAPR_TCE_WO : SPAPR_TCE_RO; >> uint64_t tce; >> >> #ifdef DEBUG_TCE >> @@ -84,29 +80,27 @@ static int spapr_tce_translate(DMAContext *dma, >> #ifdef DEBUG_TCE >> fprintf(stderr, "spapr_tce_translate out of bounds\n"); >> #endif >> - return -EFAULT; >> + return (IOMMUTLBEntry) { .valid = false }; >> } >> >> tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce; >> >> /* Check TCE */ >> if (!(tce & access)) { >> - return -EPERM; >> + return (IOMMUTLBEntry) { .valid = false }; >> } >> >> - /* How much til end of page ? */ >> - *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1; >> - >> - /* Translate */ >> - *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) | >> - (addr & SPAPR_TCE_PAGE_MASK); >> - >> #ifdef DEBUG_TCE >> fprintf(stderr, " -> *paddr=0x" TARGET_FMT_plx ", *len=0x" >> - TARGET_FMT_plx "\n", *paddr, *len); >> + TARGET_FMT_plx "\n", (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1); >> #endif >> >> - return 0; >> + return (IOMMUTLBEntry) { >> + .device_addr = addr & SPAPR_TCE_PAGE_MASK, >> + .translated_addr = (tce & ~SPAPR_TCE_PAGE_MASK), >> + .addr_mask = SPAPR_TCE_PAGE_MASK, >> + .valid = true, >> + }; >> } >> >> DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) >> @@ -118,7 +112,7 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) >> } >> >> tcet = g_malloc0(sizeof(*tcet)); >> - dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL); >> + dma_context_init(&tcet->dma, &address_space_memory, NULL, NULL, NULL); >> >> tcet->liobn = liobn; >> tcet->window_size = window_size; >> @@ -253,7 +247,8 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, >> return 0; >> } >> >> - if (iommu->translate == spapr_tce_translate) { >> + /* FIXME: WHAT?? */ >> + if (true) { >> sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu); >> return spapr_dma_dt(fdt, node_off, propname, >> tcet->liobn, 0, tcet->window_size); >> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c >> index 661c05b..24f5b46 100644 >> --- a/hw/spapr_pci.c >> +++ b/hw/spapr_pci.c >> @@ -506,14 +506,30 @@ static void spapr_msi_write(void *opaque, target_phys_addr_t addr, >> /* >> * PHB PCI device >> */ >> -static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque, >> - int devfn) >> +static MemoryRegion *spapr_pci_dma_iommu_new(PCIBus *bus, void *opaque, int devfn) >> { >> sPAPRPHBState *phb = opaque; >> >> - return phb->dma; >> + return &phb->iommu; >> } >> >> +static void spapr_pci_dma_iommu_destroy(MemoryRegion *iommu) >> +{ >> + /* iommu is shared among devices, do nothing */ >> +} >> + >> +static IOMMUTLBEntry spapr_phb_translate(MemoryRegion *iommu, target_phys_addr_t addr, >> + bool is_write) >> +{ >> + sPAPRPHBState *phb = container_of(iommu, sPAPRPHBState, iommu); >> + >> + return spapr_tce_translate(phb->dma, addr, is_write); >> +} > > IIUC, sPAPRTCETable could drop the DMAContext field and just include the > actual translation info. spapr_tce_new_dma_context becomes > spapr_tce_new and returns an opaque sPAPRTCETable struct that is passed > back to spapr_tce_translate. Maybe, I just typed in code until it compiled, then forgot to copy Alex. I have no idea how to test it, so I kept the changes minimal. > > And DMAContext can disappear and will be replaced with just an > AddressSpace *. Indeed that's the plan. address_space_map() and dma_memory_map() simply duplicate each other. The only thing remaining is dma_memory_set(). > I like it! Me too, except when I'm coding it.
On Thu, Oct 11, 2012 at 1:27 PM, Avi Kivity <avi@redhat.com> wrote: > Instead of requesting a DMAContext from the bus implementation, use a > MemoryRegion. This can be initialized using memory_region_init_iommu() > (or memory_region_init_alias() for simple, static translations). > > Add a destructor, since setups that have per-device translations will > need to return a different iommu region for every device. > > Signed-off-by: Avi Kivity <avi@redhat.com> > --- > hw/pci.c | 59 +++++++++++++++++++++++++++++++++--------------------- > hw/pci.h | 7 +++++-- > hw/pci_internals.h | 5 +++-- > hw/spapr.h | 2 ++ > hw/spapr_iommu.c | 35 ++++++++++++++------------------ > hw/spapr_pci.c | 26 ++++++++++++++++++++---- > hw/spapr_pci.h | 1 + > 7 files changed, 84 insertions(+), 51 deletions(-) > > diff --git a/hw/pci.c b/hw/pci.c > index 7adf61b..02e1989 100644 > --- a/hw/pci.c > +++ b/hw/pci.c > @@ -274,6 +274,21 @@ int pci_find_domain(const PCIBus *bus) > return -1; > } > > +static MemoryRegion *pci_default_iommu(PCIBus *bus, void *opaque, int devfn) > +{ > + MemoryRegion *mr = g_new(MemoryRegion, 1); > + > + /* FIXME: inherit memory region from bus creator */ > + memory_region_init_alias(mr, "iommu-nop", get_system_memory(), 0, INT64_MAX); > + return mr; > +} > + > +static void pci_default_iommu_dtor(MemoryRegion *mr) > +{ > + memory_region_destroy(mr); > + g_free(mr); > +} > + > void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, > const char *name, > MemoryRegion *address_space_mem, > @@ -285,6 +300,7 @@ void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, > bus->devfn_min = devfn_min; > bus->address_space_mem = address_space_mem; > bus->address_space_io = address_space_io; > + pci_setup_iommu(bus, pci_default_iommu, pci_default_iommu_dtor, NULL); > > /* host bridge */ > QLIST_INIT(&bus->child); > @@ -775,21 +791,16 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, > PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name); > return NULL; > } > + > pci_dev->bus = bus; > - if (bus->dma_context_fn) { > - pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn); > - } else { > - /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is > - * taken unconditionally */ > - /* FIXME: inherit memory region from bus creator */ > - memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master", > - get_system_memory(), 0, > - memory_region_size(get_system_memory())); > - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); > - address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region); > - pci_dev->dma = g_new(DMAContext, 1); > - dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL); > - } > + pci_dev->iommu = bus->iommu_fn(bus, bus->iommu_opaque, devfn); > + memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master", > + pci_dev->iommu, 0, memory_region_size(pci_dev->iommu)); > + memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); > + address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region); > + pci_dev->dma = g_new(DMAContext, 1); > + dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL); > + > pci_dev->devfn = devfn; > pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); > pci_dev->irq_state = 0; > @@ -843,12 +854,12 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) > pci_dev->bus->devices[pci_dev->devfn] = NULL; > pci_config_free(pci_dev); > > - if (!pci_dev->bus->dma_context_fn) { > - address_space_destroy(&pci_dev->bus_master_as); > - memory_region_destroy(&pci_dev->bus_master_enable_region); > - g_free(pci_dev->dma); > - pci_dev->dma = NULL; > - } > + address_space_destroy(&pci_dev->bus_master_as); > + memory_region_del_subregion(&pci_dev->bus_master_enable_region, pci_dev->iommu); > + pci_dev->bus->iommu_dtor_fn(pci_dev->iommu); > + memory_region_destroy(&pci_dev->bus_master_enable_region); > + g_free(pci_dev->dma); > + pci_dev->dma = NULL; > } > > static void pci_unregister_io_regions(PCIDevice *pci_dev) > @@ -2092,10 +2103,12 @@ static void pci_device_class_init(ObjectClass *klass, void *data) > k->props = pci_props; > } > > -void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque) > +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor, > + void *opaque) > { > - bus->dma_context_fn = fn; > - bus->dma_context_opaque = opaque; > + bus->iommu_fn = fn; > + bus->iommu_dtor_fn = dtor; > + bus->iommu_opaque = opaque; > } > > static TypeInfo pci_device_type_info = { > diff --git a/hw/pci.h b/hw/pci.h > index a65e490..370354a 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -213,6 +213,7 @@ struct PCIDevice { > PCIIORegion io_regions[PCI_NUM_REGIONS]; > AddressSpace bus_master_as; > MemoryRegion bus_master_enable_region; > + MemoryRegion *iommu; > DMAContext *dma; > > /* do not access the following fields */ > @@ -351,9 +352,11 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, > > void pci_device_deassert_intx(PCIDevice *dev); > > -typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int); > +typedef MemoryRegion *(*PCIIOMMUFunc)(PCIBus *, void *, int); > +typedef void (*PCIIOMMUDestructorFunc)(MemoryRegion *mr); > > -void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque); > +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor, > + void *opaque); > > static inline void > pci_set_byte(uint8_t *config, uint8_t val) > diff --git a/hw/pci_internals.h b/hw/pci_internals.h > index c931b64..040508f 100644 > --- a/hw/pci_internals.h > +++ b/hw/pci_internals.h > @@ -17,8 +17,9 @@ > > struct PCIBus { > BusState qbus; > - PCIDMAContextFunc dma_context_fn; > - void *dma_context_opaque; > + PCIIOMMUFunc iommu_fn; > + PCIIOMMUDestructorFunc iommu_dtor_fn; > + void *iommu_opaque; Maybe the opaque could be avoided (in later patches) by clever use of container_of() by the functions to get the parent structure? > uint8_t devfn_min; > pci_set_irq_fn set_irq; > pci_map_irq_fn map_irq; > diff --git a/hw/spapr.h b/hw/spapr.h > index ac34a17..452efba1 100644 > --- a/hw/spapr.h > +++ b/hw/spapr.h > @@ -334,6 +334,8 @@ typedef struct sPAPRTCE { > #define SPAPR_PCI_BASE_LIOBN 0x80000000 > > void spapr_iommu_init(void); > +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, > + bool is_write); > DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size); > void spapr_tce_free(DMAContext *dma); > int spapr_dma_dt(void *fdt, int node_off, const char *propname, > diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c > index 54798a3..79e35d1 100644 > --- a/hw/spapr_iommu.c > +++ b/hw/spapr_iommu.c > @@ -63,15 +63,11 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) > return NULL; > } > > -static int spapr_tce_translate(DMAContext *dma, > - dma_addr_t addr, > - target_phys_addr_t *paddr, > - target_phys_addr_t *len, > - DMADirection dir) > +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, > + bool is_write) > { > sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); > - enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE) > - ? SPAPR_TCE_WO : SPAPR_TCE_RO; > + enum sPAPRTCEAccess access = is_write ? SPAPR_TCE_WO : SPAPR_TCE_RO; > uint64_t tce; > > #ifdef DEBUG_TCE > @@ -84,29 +80,27 @@ static int spapr_tce_translate(DMAContext *dma, > #ifdef DEBUG_TCE > fprintf(stderr, "spapr_tce_translate out of bounds\n"); > #endif > - return -EFAULT; > + return (IOMMUTLBEntry) { .valid = false }; > } > > tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce; > > /* Check TCE */ > if (!(tce & access)) { > - return -EPERM; > + return (IOMMUTLBEntry) { .valid = false }; > } > > - /* How much til end of page ? */ > - *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1; > - > - /* Translate */ > - *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) | > - (addr & SPAPR_TCE_PAGE_MASK); > - > #ifdef DEBUG_TCE > fprintf(stderr, " -> *paddr=0x" TARGET_FMT_plx ", *len=0x" > - TARGET_FMT_plx "\n", *paddr, *len); > + TARGET_FMT_plx "\n", (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1); > #endif > > - return 0; > + return (IOMMUTLBEntry) { > + .device_addr = addr & SPAPR_TCE_PAGE_MASK, > + .translated_addr = (tce & ~SPAPR_TCE_PAGE_MASK), > + .addr_mask = SPAPR_TCE_PAGE_MASK, > + .valid = true, > + }; > } > > DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) > @@ -118,7 +112,7 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) > } > > tcet = g_malloc0(sizeof(*tcet)); > - dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL); > + dma_context_init(&tcet->dma, &address_space_memory, NULL, NULL, NULL); > > tcet->liobn = liobn; > tcet->window_size = window_size; > @@ -253,7 +247,8 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, > return 0; > } > > - if (iommu->translate == spapr_tce_translate) { > + /* FIXME: WHAT?? */ > + if (true) { > sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu); > return spapr_dma_dt(fdt, node_off, propname, > tcet->liobn, 0, tcet->window_size); > diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c > index 661c05b..24f5b46 100644 > --- a/hw/spapr_pci.c > +++ b/hw/spapr_pci.c > @@ -506,14 +506,30 @@ static void spapr_msi_write(void *opaque, target_phys_addr_t addr, > /* > * PHB PCI device > */ > -static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque, > - int devfn) > +static MemoryRegion *spapr_pci_dma_iommu_new(PCIBus *bus, void *opaque, int devfn) > { > sPAPRPHBState *phb = opaque; > > - return phb->dma; > + return &phb->iommu; > } > > +static void spapr_pci_dma_iommu_destroy(MemoryRegion *iommu) > +{ > + /* iommu is shared among devices, do nothing */ > +} > + > +static IOMMUTLBEntry spapr_phb_translate(MemoryRegion *iommu, target_phys_addr_t addr, > + bool is_write) > +{ > + sPAPRPHBState *phb = container_of(iommu, sPAPRPHBState, iommu); > + > + return spapr_tce_translate(phb->dma, addr, is_write); > +} > + > +static MemoryRegionIOMMUOps spapr_iommu_ops = { > + .translate = spapr_phb_translate, > +}; > + > static int spapr_phb_init(SysBusDevice *s) > { > sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); > @@ -576,7 +592,9 @@ static int spapr_phb_init(SysBusDevice *s) > sphb->dma_window_start = 0; > sphb->dma_window_size = 0x40000000; > sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, sphb->dma_window_size); > - pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb); > + memory_region_init_iommu(&sphb->iommu, &spapr_iommu_ops, get_system_memory(), > + "iommu-spapr", INT64_MAX); > + pci_setup_iommu(bus, spapr_pci_dma_iommu_new, spapr_pci_dma_iommu_destroy, sphb); > > QLIST_INSERT_HEAD(&spapr->phbs, sphb, list); > > diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h > index 670dc62..171db45 100644 > --- a/hw/spapr_pci.h > +++ b/hw/spapr_pci.h > @@ -45,6 +45,7 @@ typedef struct sPAPRPHBState { > target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size; > target_phys_addr_t msi_win_addr; > MemoryRegion memwindow, iowindow, msiwindow; > + MemoryRegion iommu; > > uint32_t dma_liobn; > uint64_t dma_window_start; > -- > 1.7.12 >
On 10/13/2012 11:13 AM, Blue Swirl wrote: >> struct PCIBus { >> BusState qbus; >> - PCIDMAContextFunc dma_context_fn; >> - void *dma_context_opaque; >> + PCIIOMMUFunc iommu_fn; >> + PCIIOMMUDestructorFunc iommu_dtor_fn; >> + void *iommu_opaque; > > Maybe the opaque could be avoided (in later patches) by clever use of > container_of() by the functions to get the parent structure? Indeed yes. This requires replacing pci_bus_new() and pci_register_bus() by a conventional pci_bus_init(). I think it's the right direction to move and the memory API always uses caller-managed allocation and objects, instead of API-managed allocation and pointers/handles.
diff --git a/hw/pci.c b/hw/pci.c index 7adf61b..02e1989 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -274,6 +274,21 @@ int pci_find_domain(const PCIBus *bus) return -1; } +static MemoryRegion *pci_default_iommu(PCIBus *bus, void *opaque, int devfn) +{ + MemoryRegion *mr = g_new(MemoryRegion, 1); + + /* FIXME: inherit memory region from bus creator */ + memory_region_init_alias(mr, "iommu-nop", get_system_memory(), 0, INT64_MAX); + return mr; +} + +static void pci_default_iommu_dtor(MemoryRegion *mr) +{ + memory_region_destroy(mr); + g_free(mr); +} + void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, const char *name, MemoryRegion *address_space_mem, @@ -285,6 +300,7 @@ void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent, bus->devfn_min = devfn_min; bus->address_space_mem = address_space_mem; bus->address_space_io = address_space_io; + pci_setup_iommu(bus, pci_default_iommu, pci_default_iommu_dtor, NULL); /* host bridge */ QLIST_INIT(&bus->child); @@ -775,21 +791,16 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name); return NULL; } + pci_dev->bus = bus; - if (bus->dma_context_fn) { - pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn); - } else { - /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is - * taken unconditionally */ - /* FIXME: inherit memory region from bus creator */ - memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master", - get_system_memory(), 0, - memory_region_size(get_system_memory())); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); - address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region); - pci_dev->dma = g_new(DMAContext, 1); - dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL); - } + pci_dev->iommu = bus->iommu_fn(bus, bus->iommu_opaque, devfn); + memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master", + pci_dev->iommu, 0, memory_region_size(pci_dev->iommu)); + memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region); + pci_dev->dma = g_new(DMAContext, 1); + dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL); + pci_dev->devfn = devfn; pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); pci_dev->irq_state = 0; @@ -843,12 +854,12 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) pci_dev->bus->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); - if (!pci_dev->bus->dma_context_fn) { - address_space_destroy(&pci_dev->bus_master_as); - memory_region_destroy(&pci_dev->bus_master_enable_region); - g_free(pci_dev->dma); - pci_dev->dma = NULL; - } + address_space_destroy(&pci_dev->bus_master_as); + memory_region_del_subregion(&pci_dev->bus_master_enable_region, pci_dev->iommu); + pci_dev->bus->iommu_dtor_fn(pci_dev->iommu); + memory_region_destroy(&pci_dev->bus_master_enable_region); + g_free(pci_dev->dma); + pci_dev->dma = NULL; } static void pci_unregister_io_regions(PCIDevice *pci_dev) @@ -2092,10 +2103,12 @@ static void pci_device_class_init(ObjectClass *klass, void *data) k->props = pci_props; } -void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque) +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor, + void *opaque) { - bus->dma_context_fn = fn; - bus->dma_context_opaque = opaque; + bus->iommu_fn = fn; + bus->iommu_dtor_fn = dtor; + bus->iommu_opaque = opaque; } static TypeInfo pci_device_type_info = { diff --git a/hw/pci.h b/hw/pci.h index a65e490..370354a 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -213,6 +213,7 @@ struct PCIDevice { PCIIORegion io_regions[PCI_NUM_REGIONS]; AddressSpace bus_master_as; MemoryRegion bus_master_enable_region; + MemoryRegion *iommu; DMAContext *dma; /* do not access the following fields */ @@ -351,9 +352,11 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, void pci_device_deassert_intx(PCIDevice *dev); -typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int); +typedef MemoryRegion *(*PCIIOMMUFunc)(PCIBus *, void *, int); +typedef void (*PCIIOMMUDestructorFunc)(MemoryRegion *mr); -void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque); +void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor, + void *opaque); static inline void pci_set_byte(uint8_t *config, uint8_t val) diff --git a/hw/pci_internals.h b/hw/pci_internals.h index c931b64..040508f 100644 --- a/hw/pci_internals.h +++ b/hw/pci_internals.h @@ -17,8 +17,9 @@ struct PCIBus { BusState qbus; - PCIDMAContextFunc dma_context_fn; - void *dma_context_opaque; + PCIIOMMUFunc iommu_fn; + PCIIOMMUDestructorFunc iommu_dtor_fn; + void *iommu_opaque; uint8_t devfn_min; pci_set_irq_fn set_irq; pci_map_irq_fn map_irq; diff --git a/hw/spapr.h b/hw/spapr.h index ac34a17..452efba1 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -334,6 +334,8 @@ typedef struct sPAPRTCE { #define SPAPR_PCI_BASE_LIOBN 0x80000000 void spapr_iommu_init(void); +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, + bool is_write); DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size); void spapr_tce_free(DMAContext *dma); int spapr_dma_dt(void *fdt, int node_off, const char *propname, diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c index 54798a3..79e35d1 100644 --- a/hw/spapr_iommu.c +++ b/hw/spapr_iommu.c @@ -63,15 +63,11 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } -static int spapr_tce_translate(DMAContext *dma, - dma_addr_t addr, - target_phys_addr_t *paddr, - target_phys_addr_t *len, - DMADirection dir) +IOMMUTLBEntry spapr_tce_translate(DMAContext *dma, target_phys_addr_t addr, + bool is_write) { sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma); - enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE) - ? SPAPR_TCE_WO : SPAPR_TCE_RO; + enum sPAPRTCEAccess access = is_write ? SPAPR_TCE_WO : SPAPR_TCE_RO; uint64_t tce; #ifdef DEBUG_TCE @@ -84,29 +80,27 @@ static int spapr_tce_translate(DMAContext *dma, #ifdef DEBUG_TCE fprintf(stderr, "spapr_tce_translate out of bounds\n"); #endif - return -EFAULT; + return (IOMMUTLBEntry) { .valid = false }; } tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce; /* Check TCE */ if (!(tce & access)) { - return -EPERM; + return (IOMMUTLBEntry) { .valid = false }; } - /* How much til end of page ? */ - *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1; - - /* Translate */ - *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) | - (addr & SPAPR_TCE_PAGE_MASK); - #ifdef DEBUG_TCE fprintf(stderr, " -> *paddr=0x" TARGET_FMT_plx ", *len=0x" - TARGET_FMT_plx "\n", *paddr, *len); + TARGET_FMT_plx "\n", (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1); #endif - return 0; + return (IOMMUTLBEntry) { + .device_addr = addr & SPAPR_TCE_PAGE_MASK, + .translated_addr = (tce & ~SPAPR_TCE_PAGE_MASK), + .addr_mask = SPAPR_TCE_PAGE_MASK, + .valid = true, + }; } DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) @@ -118,7 +112,7 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) } tcet = g_malloc0(sizeof(*tcet)); - dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL); + dma_context_init(&tcet->dma, &address_space_memory, NULL, NULL, NULL); tcet->liobn = liobn; tcet->window_size = window_size; @@ -253,7 +247,8 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, return 0; } - if (iommu->translate == spapr_tce_translate) { + /* FIXME: WHAT?? */ + if (true) { sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu); return spapr_dma_dt(fdt, node_off, propname, tcet->liobn, 0, tcet->window_size); diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 661c05b..24f5b46 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -506,14 +506,30 @@ static void spapr_msi_write(void *opaque, target_phys_addr_t addr, /* * PHB PCI device */ -static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque, - int devfn) +static MemoryRegion *spapr_pci_dma_iommu_new(PCIBus *bus, void *opaque, int devfn) { sPAPRPHBState *phb = opaque; - return phb->dma; + return &phb->iommu; } +static void spapr_pci_dma_iommu_destroy(MemoryRegion *iommu) +{ + /* iommu is shared among devices, do nothing */ +} + +static IOMMUTLBEntry spapr_phb_translate(MemoryRegion *iommu, target_phys_addr_t addr, + bool is_write) +{ + sPAPRPHBState *phb = container_of(iommu, sPAPRPHBState, iommu); + + return spapr_tce_translate(phb->dma, addr, is_write); +} + +static MemoryRegionIOMMUOps spapr_iommu_ops = { + .translate = spapr_phb_translate, +}; + static int spapr_phb_init(SysBusDevice *s) { sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); @@ -576,7 +592,9 @@ static int spapr_phb_init(SysBusDevice *s) sphb->dma_window_start = 0; sphb->dma_window_size = 0x40000000; sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, sphb->dma_window_size); - pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb); + memory_region_init_iommu(&sphb->iommu, &spapr_iommu_ops, get_system_memory(), + "iommu-spapr", INT64_MAX); + pci_setup_iommu(bus, spapr_pci_dma_iommu_new, spapr_pci_dma_iommu_destroy, sphb); QLIST_INSERT_HEAD(&spapr->phbs, sphb, list); diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h index 670dc62..171db45 100644 --- a/hw/spapr_pci.h +++ b/hw/spapr_pci.h @@ -45,6 +45,7 @@ typedef struct sPAPRPHBState { target_phys_addr_t mem_win_addr, mem_win_size, io_win_addr, io_win_size; target_phys_addr_t msi_win_addr; MemoryRegion memwindow, iowindow, msiwindow; + MemoryRegion iommu; uint32_t dma_liobn; uint64_t dma_window_start;
Instead of requesting a DMAContext from the bus implementation, use a MemoryRegion. This can be initialized using memory_region_init_iommu() (or memory_region_init_alias() for simple, static translations). Add a destructor, since setups that have per-device translations will need to return a different iommu region for every device. Signed-off-by: Avi Kivity <avi@redhat.com> --- hw/pci.c | 59 +++++++++++++++++++++++++++++++++--------------------- hw/pci.h | 7 +++++-- hw/pci_internals.h | 5 +++-- hw/spapr.h | 2 ++ hw/spapr_iommu.c | 35 ++++++++++++++------------------ hw/spapr_pci.c | 26 ++++++++++++++++++++---- hw/spapr_pci.h | 1 + 7 files changed, 84 insertions(+), 51 deletions(-)