Message ID | 1415629216-59652-3-git-send-email-blaschka@linux.vnet.ibm.com |
---|---|
State | New |
Headers | show |
On 10.11.14 15:20, Frank Blaschka wrote: > From: Frank Blaschka <frank.blaschka@de.ibm.com> > > This patch implements the s390 pci instructions in qemu. It allows > to access and drive pci devices attached to the s390 pci bus. > Because of platform constrains devices using IO BARs are not > supported. Also a device has to support MSI/MSI-X to run on s390. > > Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > --- > target-s390x/Makefile.objs | 2 +- > target-s390x/kvm.c | 52 ++++ > target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > 4 files changed, 1141 insertions(+), 1 deletion(-) > create mode 100644 target-s390x/pci_ic.c > create mode 100644 target-s390x/pci_ic.h > > diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs > index 2c57494..cc71400 100644 > --- a/target-s390x/Makefile.objs > +++ b/target-s390x/Makefile.objs > @@ -2,4 +2,4 @@ obj-y += translate.o helper.o cpu.o interrupt.o > obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o > obj-y += gdbstub.o > obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o > -obj-$(CONFIG_KVM) += kvm.o > +obj-$(CONFIG_KVM) += kvm.o pci_ic.o > diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c > index 5b10a25..d59e740 100644 > --- a/target-s390x/kvm.c > +++ b/target-s390x/kvm.c > @@ -40,6 +40,7 @@ > #include "exec/gdbstub.h" > #include "trace.h" > #include "qapi-event.h" > +#include "pci_ic.h" > > /* #define DEBUG_KVM */ > > @@ -56,6 +57,7 @@ > #define IPA0_B2 0xb200 > #define IPA0_B9 0xb900 > #define IPA0_EB 0xeb00 > +#define IPA0_E3 0xe300 > > #define PRIV_B2_SCLP_CALL 0x20 > #define PRIV_B2_CSCH 0x30 > @@ -76,8 +78,17 @@ > #define PRIV_B2_XSCH 0x76 > > #define PRIV_EB_SQBS 0x8a > +#define PRIV_EB_PCISTB 0xd0 > +#define PRIV_EB_SIC 0xd1 > > #define PRIV_B9_EQBS 0x9c > +#define PRIV_B9_CLP 0xa0 > +#define PRIV_B9_PCISTG 0xd0 > +#define PRIV_B9_PCILG 0xd2 > +#define PRIV_B9_RPCIT 0xd3 > + > +#define PRIV_E3_MPCIFC 0xd0 > +#define PRIV_E3_STPCIFC 0xd4 > > #define DIAG_IPL 0x308 > #define DIAG_KVM_HYPERCALL 0x500 > @@ -814,6 +825,18 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > int r = 0; > > switch (ipa1) { > + case PRIV_B9_CLP: > + r = kvm_clp_service_call(cpu, run); > + break; > + case PRIV_B9_PCISTG: > + r = kvm_pcistg_service_call(cpu, run); > + break; > + case PRIV_B9_PCILG: > + r = kvm_pcilg_service_call(cpu, run); > + break; > + case PRIV_B9_RPCIT: > + r = kvm_rpcit_service_call(cpu, run); > + break; > case PRIV_B9_EQBS: > /* just inject exception */ > r = -1; > @@ -832,6 +855,12 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > int r = 0; > > switch (ipa1) { > + case PRIV_EB_PCISTB: > + r = kvm_pcistb_service_call(cpu, run); > + break; > + case PRIV_EB_SIC: > + r = kvm_sic_service_call(cpu, run); > + break; > case PRIV_EB_SQBS: > /* just inject exception */ > r = -1; > @@ -845,6 +874,26 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > return r; > } > > +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) > +{ > + int r = 0; > + > + switch (ipbl) { > + case PRIV_E3_MPCIFC: > + r = kvm_mpcifc_service_call(cpu, run); > + break; > + case PRIV_E3_STPCIFC: > + r = kvm_stpcifc_service_call(cpu, run); > + break; > + default: > + r = -1; > + DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipbl); > + break; > + } > + > + return r; > +} > + > static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) > { > CPUS390XState *env = &cpu->env; > @@ -1041,6 +1090,9 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run) > case IPA0_EB: > r = handle_eb(cpu, run, ipa1); > break; > + case IPA0_E3: > + r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff); > + break; > case IPA0_DIAG: > r = handle_diag(cpu, run, run->s390_sieic.ipb); > break; > diff --git a/target-s390x/pci_ic.c b/target-s390x/pci_ic.c > new file mode 100644 > index 0000000..6c05faf > --- /dev/null > +++ b/target-s390x/pci_ic.c > @@ -0,0 +1,753 @@ > +/* > + * s390 PCI intercepts > + * > + * Copyright 2014 IBM Corp. > + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> > + * Hong Bo Li <lihbbj@cn.ibm.com> > + * Yi Min Zhao <zyimin@cn.ibm.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > + * your option) any later version. See the COPYING file in the top-level > + * directory. > + */ > + > +#include <sys/types.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > + > +#include <linux/kvm.h> > +#include <asm/ptrace.h> > +#include <hw/pci/pci.h> > +#include <hw/pci/pci_host.h> > +#include <net/net.h> > + > +#include "qemu-common.h" > +#include "qemu/timer.h" > +#include "migration/qemu-file.h" > +#include "sysemu/sysemu.h" > +#include "sysemu/kvm.h" > +#include "cpu.h" > +#include "sysemu/device_tree.h" > +#include "monitor/monitor.h" > +#include "pci_ic.h" > + > +#include "hw/hw.h" > +#include "hw/pci/pci.h" > +#include "hw/pci/pci_bridge.h" > +#include "hw/pci/pci_bus.h" > +#include "hw/pci/pci_host.h" > +#include "hw/s390x/s390-pci-bus.h" > +#include "exec/exec-all.h" > +#include "exec/memory-internal.h" > + > +/* #define DEBUG_S390PCI_IC */ > +#ifdef DEBUG_S390PCI_IC > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stderr, "s390pci_ic: " fmt, ## __VA_ARGS__); } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +static uint64_t resume_token; global variable? Why? > + > +static uint8_t barsize(uint64_t size) > +{ > + uint64_t mask = 1; > + int i; > + > + if (!size) { > + return 0; > + } > + > + for (i = 0; i < 64; i++) { > + if (size & mask) { > + break; > + } > + mask = (mask << 1); > + } > + > + return i; > +} Isn't there an existing helper for this in the PCI layer? In fact, please check whether it makes sense to move some of the code to hw/ rather than target-s390x. > + > +static void s390_set_status_code(CPUS390XState *env, > + uint8_t r, uint64_t status_code) > +{ > + env->regs[r] &= ~0xff000000; > + env->regs[r] |= (status_code & 0xff) << 24; > +} > + > +static int list_pci(ClpReqRspListPci *rrb, uint8_t *cc) > +{ > + S390PCIBusDevice *pbdev; > + uint32_t res_code, initial_l2, g_l2, finish; > + int rc, idx; > + > + rc = 0; > + if (be16_to_cpu(rrb->request.hdr.len) != 32) { > + res_code = CLP_RC_LEN; > + rc = -EINVAL; > + goto out; > + } > + > + if ((be32_to_cpu(rrb->request.fmt) & CLP_MASK_FMT) != 0) { > + res_code = CLP_RC_FMT; > + rc = -EINVAL; > + goto out; > + } > + > + if ((be32_to_cpu(rrb->request.fmt) & ~CLP_MASK_FMT) != 0 || > + rrb->request.reserved1 != 0 || > + rrb->request.reserved2 != 0) { > + res_code = CLP_RC_RESNOT0; > + rc = -EINVAL; > + goto out; > + } > + > + if (be64_to_cpu(rrb->request.resume_token) == 0) { > + resume_token = 0; > + } else if (be64_to_cpu(rrb->request.resume_token) != resume_token) { > + res_code = CLP_RC_LISTPCI_BADRT; > + rc = -EINVAL; > + goto out; > + } > + > + if (be16_to_cpu(rrb->response.hdr.len) < 48) { > + res_code = CLP_RC_8K; > + rc = -EINVAL; > + goto out; > + } > + > + initial_l2 = be16_to_cpu(rrb->response.hdr.len); > + if ((initial_l2 - LIST_PCI_HDR_LEN) % sizeof(ClpFhListEntry) > + != 0) { > + rc = -EINVAL; > + *cc = 3; > + goto out; > + } > + > + rrb->response.fmt = 0; > + rrb->response.reserved1 = rrb->response.reserved2 = 0; > + rrb->response.mdd = cpu_to_be32(FH_VIRT); > + rrb->response.max_fn = cpu_to_be16(PCI_MAX_FUNCTIONS); > + rrb->response.entry_size = sizeof(ClpFhListEntry); > + finish = 0; > + idx = resume_token; > + g_l2 = LIST_PCI_HDR_LEN; > + do { > + pbdev = s390_pci_find_dev_by_idx(idx); > + if (!pbdev) { > + finish = 1; > + break; > + } > + rrb->response.fh_list[idx - resume_token].device_id = > + pci_get_word(pbdev->pdev->config + PCI_DEVICE_ID); > + rrb->response.fh_list[idx - resume_token].vendor_id = > + pci_get_word(pbdev->pdev->config + PCI_VENDOR_ID); > + rrb->response.fh_list[idx - resume_token].config = > + cpu_to_be32(0x80000000); > + rrb->response.fh_list[idx - resume_token].fid = cpu_to_be32(pbdev->fid); > + rrb->response.fh_list[idx - resume_token].fh = cpu_to_be32(pbdev->fh); > + > + g_l2 += sizeof(ClpFhListEntry); > + DPRINTF("g_l2 %d vendor id 0x%x device id 0x%x fid 0x%x fh 0x%x\n", > + g_l2, > + rrb->response.fh_list[idx - resume_token].vendor_id, > + rrb->response.fh_list[idx - resume_token].device_id, > + rrb->response.fh_list[idx - resume_token].fid, > + rrb->response.fh_list[idx - resume_token].fh); > + idx++; > + } while (g_l2 < initial_l2); > + > + if (finish == 1) { > + resume_token = 0; > + } else { > + resume_token = idx; > + } > + rrb->response.resume_token = cpu_to_be64(resume_token); > + rrb->response.hdr.len = cpu_to_be16(g_l2); > + rrb->response.hdr.rsp = cpu_to_be16(CLP_RC_OK); > +out: > + if (rc) { > + DPRINTF("list pci failed rc 0x%x\n", rc); > + rrb->response.hdr.rsp = cpu_to_be16(res_code); > + } > + return rc; > +} > + > +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) Please separate kvm_ calls from the actual implementation. Do all the parameter extraction in the kvm_ function and then forward on to a generic function that doesn't need to know about kvm_run anymore. kvm specific c file: int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) { uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; return clp_service_call(cpu, r2); } io / pci specific c file: int clp_service_call(S390CPU *cpu, uint8_t r2) { ... } > +{ > + ClpReqHdr *reqh; > + ClpRspHdr *resh; > + S390PCIBusDevice *pbdev; > + uint32_t req_len; > + uint32_t res_len; > + uint8_t *buffer; > + uint8_t cc = 0; > + CPUS390XState *env = &cpu->env; > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > + int i; > + > + buffer = g_malloc0(4096 * 2); Do you really need this? Couldn't you make the pointers be actual structs on the stack and just read/write from them directly? The compiler should be smart enough to throw away elements that aren't used anymore to conserve memory. > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 4); > + return 0; > + } > + > + cpu_physical_memory_rw(env->regs[r2], buffer, sizeof(*reqh), 0); > + reqh = (ClpReqHdr *)buffer; > + req_len = be16_to_cpu(reqh->len); > + if (req_len < 16 || req_len > 8184 || (req_len % 8 != 0)) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + sizeof(*resh), 0); > + resh = (ClpRspHdr *)(buffer + req_len); > + res_len = be16_to_cpu(resh->len); > + if (res_len < 8 || res_len > 8176 || (res_len % 8 != 0)) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + if ((req_len + res_len) > 8192) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 0); > + > + if (req_len != 32) { > + resh->rsp = cpu_to_be16(CLP_RC_LEN); > + goto out; > + } > + > + switch (reqh->cmd) { > + case CLP_LIST_PCI: { > + ClpReqRspListPci *rrb = (ClpReqRspListPci *)buffer; > + list_pci(rrb, &cc); > + break; > + } > + case CLP_SET_PCI_FN: { > + ClpReqSetPci *reqsetpci = (ClpReqSetPci *)reqh; > + ClpRspSetPci *ressetpci = (ClpRspSetPci *)resh; > + > + pbdev = s390_pci_find_dev_by_fh(be32_to_cpu(reqsetpci->fh)); > + if (!pbdev) { > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); > + goto out; > + } > + > + switch (reqsetpci->oc) { > + case CLP_SET_ENABLE_PCI_FN: > + pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET; > + ressetpci->fh = cpu_to_be32(pbdev->fh); > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); > + break; > + case CLP_SET_DISABLE_PCI_FN: > + pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET); > + ressetpci->fh = cpu_to_be32(pbdev->fh); > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); > + break; > + default: > + DPRINTF("unknown set pci command\n"); > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FHOP); > + break; > + } > + break; > + } > + case CLP_QUERY_PCI_FN: { > + ClpReqQueryPci *reqquery = (ClpReqQueryPci *)reqh; > + ClpRspQueryPci *resquery = (ClpRspQueryPci *)resh; > + > + pbdev = s390_pci_find_dev_by_fh(reqquery->fh); > + if (!pbdev) { > + DPRINTF("query pci no pci dev\n"); > + resquery->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); > + goto out; > + } > + > + for (i = 0; i < PCI_BAR_COUNT; i++) { > + uint64_t data = pci_host_config_read_common(pbdev->pdev, > + 0x10 + (i * 4), pci_config_size(pbdev->pdev), 4); > + > + resquery->bar[i] = bswap32(data); > + resquery->bar_size[i] = barsize(pbdev->pdev->io_regions[i].size); > + DPRINTF("bar %d addr 0x%x size 0x%lx barsize 0x%x\n", i, > + resquery->bar[i], pbdev->pdev->io_regions[i].size, > + resquery->bar_size[i]); > + } > + > + resquery->sdma = ZPCI_SDMA_ADDR; > + resquery->edma = ZPCI_EDMA_ADDR; > + resquery->pchid = 0; > + resquery->ug = 1; > + resquery->uid = pbdev->fid; > + > + resquery->hdr.rsp = CLP_RC_OK; > + break; > + } > + case CLP_QUERY_PCI_FNGRP: { > + ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh; > + resgrp->fr = 1; > + resgrp->dasm = 0; > + resgrp->msia = ZPCI_MSI_ADDR; > + resgrp->mui = 0; > + resgrp->i = 128; > + resgrp->version = 0; > + > + resgrp->hdr.rsp = CLP_RC_OK; > + break; > + } > + default: > + DPRINTF("unknown clp command\n"); > + resh->rsp = cpu_to_be16(CLP_RC_CMD); > + break; > + } > + > +out: > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 1); ... ah, to write back. Wouldn't it be cleaner to do this explicitly? > + g_free(buffer); > + setcc(cpu, cc); > + return 0; > +} > + > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + S390PCIBusDevice *pbdev; > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > + PciLgStg *rp; > + uint64_t offset; > + uint64_t data; > + uint8_t len; > + > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 4); > + return 0; > + } > + > + if (r2 & 0x1) { > + program_interrupt(env, PGM_SPECIFICATION, 4); > + return 0; > + } > + > + rp = (PciLgStg *)&env->regs[r2]; > + offset = env->regs[r2 + 1]; > + > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > + if (!pbdev) { > + DPRINTF("pcilg no pci dev\n"); > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > + return 0; > + } > + > + len = rp->len & 0xF; > + if (rp->pcias < 6) { > + if ((8 - (offset & 0x7)) < len) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > + io_mem_read(mr, offset, &data, len); > + } else if (rp->pcias == 15) { > + if ((4 - (offset & 0x3)) < len) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + data = pci_host_config_read_common( > + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > + > + switch (len) { > + case 1: > + break; > + case 2: > + data = cpu_to_le16(data); > + break; > + case 4: > + data = cpu_to_le32(data); > + break; > + case 8: > + data = cpu_to_le64(data); > + break; Why? Also, this is wrong. cpu_to_le64 convert between host endianness and LE. So if you're running this on an LE host, you won't swap the value and get a broken result. If you know that the value is always swapped, use bswapxx(). > + default: > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + } else { > + DPRINTF("invalid space\n"); > + setcc(cpu, ZPCI_PCI_LS_ERR); > + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); > + return 0; > + } > + > + env->regs[r1] = data; > + setcc(cpu, ZPCI_PCI_LS_OK); > + return 0; > +} > + > +static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t offset, > + uint64_t *data, uint8_t len) > +{ > + uint32_t msg_data; > + > + if (offset % PCI_MSIX_ENTRY_SIZE != 8) { > + return; > + } > + > + if (len != 4) { > + DPRINTF("access msix table msg data but len is %d\n", len); > + return; > + } > + > + msg_data = (pbdev->fid << ZPCI_MSI_VEC_BITS) | le32_to_cpu(*data); > + *data = cpu_to_le32(msg_data); > + DPRINTF("update msix msg_data to 0x%x\n", msg_data); > +} > + > +static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias) > +{ > + if (pbdev->msix.available && pbdev->msix.table_bar == pcias && > + offset >= pbdev->msix.table_offset && > + offset <= pbdev->msix.table_offset + > + (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) { > + return 1; > + } else { > + return 0; > + } > +} > + > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > + PciLgStg *rp; > + uint64_t offset, data; > + S390PCIBusDevice *pbdev; > + uint8_t len; > + > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 4); > + return 0; > + } > + > + if (r2 & 0x1) { > + program_interrupt(env, PGM_SPECIFICATION, 4); > + return 0; > + } > + > + rp = (PciLgStg *)&env->regs[r2]; > + offset = env->regs[r2 + 1]; > + > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > + if (!pbdev) { > + DPRINTF("pcistg no pci dev\n"); > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > + return 0; > + } > + > + data = env->regs[r1]; > + len = rp->len & 0xF; > + if (rp->pcias < 6) { > + if ((8 - (offset & 0x7)) < len) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + MemoryRegion *mr; > + if (trap_msix(pbdev, offset, rp->pcias)) { > + offset = offset - pbdev->msix.table_offset; > + mr = &pbdev->pdev->msix_table_mmio; > + update_msix_table_msg_data(pbdev, offset, &data, len); > + } else { > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > + } > + > + io_mem_write(mr, offset, data, len); > + } else if (rp->pcias == 15) { > + if ((4 - (offset & 0x3)) < len) { > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } > + switch (len) { > + case 1: > + break; > + case 2: > + data = le16_to_cpu(data); > + break; > + case 4: > + data = le32_to_cpu(data); > + break; > + case 8: > + data = le64_to_cpu(data); > + break; > + default: > + program_interrupt(env, PGM_OPERAND, 4); > + return 0; > + } I guess you want a generic function similar to qemu_bswap_len() that supports 64bit? > + > + pci_host_config_write_common(pbdev->pdev, offset, > + pci_config_size(pbdev->pdev), > + data, len); > + } else { > + DPRINTF("pcistg invalid space\n"); > + setcc(cpu, ZPCI_PCI_LS_ERR); > + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); > + return 0; > + } > + > + setcc(cpu, ZPCI_PCI_LS_OK); > + return 0; > +} > + > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > + uint32_t fh; > + uint64_t pte; > + S390PCIBusDevice *pbdev; > + ram_addr_t size; > + int flags; > + IOMMUTLBEntry entry; > + > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 4); > + return 0; > + } > + > + if (r2 & 0x1) { > + program_interrupt(env, PGM_SPECIFICATION, 4); > + return 0; > + } > + > + fh = env->regs[r1] >> 32; > + size = env->regs[r2 + 1]; > + > + pbdev = s390_pci_find_dev_by_fh(fh); > + > + if (!pbdev) { > + DPRINTF("rpcit no pci dev\n"); > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > + return 0; > + } > + > + pte = s390_guest_io_table_walk(s390_pci_get_table_origin(pbdev->g_iota), > + env->regs[r2]); > + flags = pte & ZPCI_PTE_FLAG_MASK; > + entry.target_as = &address_space_memory; > + entry.iova = env->regs[r2]; > + entry.translated_addr = pte & ZPCI_PTE_ADDR_MASK; > + entry.addr_mask = size - 1; > + > + if (flags & ZPCI_PTE_INVALID) { > + entry.perm = IOMMU_NONE; > + } else { > + entry.perm = IOMMU_RW; > + } Deja vu? This is the iommu translation function, no? Can't you somehow just call it? > + > + memory_region_notify_iommu(pci_device_iommu_address_space( > + pbdev->pdev)->root, entry); > + > + setcc(cpu, ZPCI_PCI_LS_OK); > + return 0; > +} > + > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + qemu_log_mask(LOG_UNIMP, "SIC missing\n"); > + return 0; > +} > + > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; > + uint8_t r3 = run->s390_sieic.ipa & 0x000f; > + PciStb *rp; > + uint64_t gaddr; > + uint64_t *uaddr, *pu; > + hwaddr len; > + S390PCIBusDevice *pbdev; > + MemoryRegion *mr; > + int i; > + > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 6); > + return 0; > + } > + > + rp = (PciStb *)&env->regs[r1]; > + if (rp->pcias > 5) { > + DPRINTF("pcistb invalid space\n"); > + setcc(cpu, ZPCI_PCI_LS_ERR); > + s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS); > + return 0; > + } > + > + switch (rp->len) { > + case 16: > + case 32: > + case 64: > + case 128: > + break; > + default: > + program_interrupt(env, PGM_SPECIFICATION, 6); > + return 0; > + } > + > + gaddr = get_base_disp_rsy(cpu, run); > + len = rp->len; > + > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > + if (!pbdev) { > + DPRINTF("pcistb no pci dev fh 0x%x\n", rp->fh); > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > + return 0; > + } > + > + uaddr = cpu_physical_memory_map(gaddr, &len, 0); > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > + if (!memory_region_access_valid(mr, env->regs[r3], rp->len, true)) { > + cpu_physical_memory_unmap(uaddr, len, 0, len); > + program_interrupt(env, PGM_ADDRESSING, 6); > + return 0; > + } > + > + pu = uaddr; > + for (i = 0; i < rp->len / 8; i++) { > + io_mem_write(mr, env->regs[r3] + i * 8, *pu, 8); Please don't overoptimize and just use individual ldq_phys() operations here for each memory access. In general, try to avoid cpu_physical_memory_map(). > + pu++; > + } > + > + cpu_physical_memory_unmap(uaddr, len, 0, len); > + setcc(cpu, ZPCI_PCI_LS_OK); > + return 0; > +} > + > +static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) > +{ > + int ret; > + S390FLICState *fs = s390_get_flic(); > + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); > + > + ret = css_register_io_adapter(S390_PCIPT_ADAPTER, > + FIB_DATA_ISC(fib.data), true, false, > + &pbdev->routes.adapter.adapter_id); > + assert(ret == 0); > + > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aisb, true); > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aibv, true); > + > + pbdev->routes.adapter.summary_addr = fib.aisb; > + pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(fib.data); > + pbdev->routes.adapter.ind_addr = fib.aibv; > + pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(fib.data); > + > + DPRINTF("reg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); > + return 0; > +} > + > +static int dereg_irqs(S390PCIBusDevice *pbdev) > +{ > + S390FLICState *fs = s390_get_flic(); > + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); > + > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, > + pbdev->routes.adapter.ind_addr, false); > + > + pbdev->routes.adapter.summary_addr = 0; > + pbdev->routes.adapter.summary_offset = 0; > + pbdev->routes.adapter.ind_addr = 0; > + pbdev->routes.adapter.ind_offset = 0; > + > + DPRINTF("dereg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); > + return 0; > +} > + > +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; > + uint8_t oc; > + uint32_t fh; > + uint64_t fiba; > + ZpciFib fib; > + S390PCIBusDevice *pbdev; > + > + cpu_synchronize_state(CPU(cpu)); > + > + if (env->psw.mask & PSW_MASK_PSTATE) { > + program_interrupt(env, PGM_PRIVILEGED, 6); > + return 0; > + } > + > + oc = env->regs[r1] & 0xff; > + fh = env->regs[r1] >> 32; > + fiba = get_base_disp_rxy(cpu, run); > + > + if (fiba & 0x7) { > + program_interrupt(env, PGM_SPECIFICATION, 6); > + return 0; > + } > + > + pbdev = s390_pci_find_dev_by_fh(fh); > + if (!pbdev) { > + DPRINTF("mpcifc no pci dev fh 0x%x\n", fh); > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > + return 0; > + } > + > + cpu_physical_memory_rw(fiba, (uint8_t *)&fib, sizeof(fib), 0); I also find cpu_physical_memory_rw() pretty hard to read. Meanwhile, it's been deprecated by cpu_physical_memory_read() and cpu_physical_memory_write() which make the code more readable. > + > + switch (oc) { > + case ZPCI_MOD_FC_REG_INT: { > + pbdev->isc = FIB_DATA_ISC(fib.data); > + reg_irqs(env, pbdev, fib); > + break; > + } > + case ZPCI_MOD_FC_DEREG_INT: > + dereg_irqs(pbdev); > + break; > + case ZPCI_MOD_FC_REG_IOAT: > + if (fib.pba > fib.pal) { > + program_interrupt(&cpu->env, PGM_OPERAND, 6); > + return 0; > + } > + pbdev->g_iota = fib.iota; > + break; > + case ZPCI_MOD_FC_DEREG_IOAT: > + break; > + case ZPCI_MOD_FC_REREG_IOAT: > + break; > + case ZPCI_MOD_FC_RESET_ERROR: > + break; > + case ZPCI_MOD_FC_RESET_BLOCK: > + break; > + case ZPCI_MOD_FC_SET_MEASURE: > + break; > + default: > + program_interrupt(&cpu->env, PGM_OPERAND, 6); > + return 0; > + } > + > + setcc(cpu, ZPCI_PCI_LS_OK); > + return 0; > +} > + > +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run) > +{ > + qemu_log_mask(LOG_UNIMP, "STPCIFC missing\n"); > + return 0; > +} > diff --git a/target-s390x/pci_ic.h b/target-s390x/pci_ic.h > new file mode 100644 > index 0000000..0eb6c27 > --- /dev/null > +++ b/target-s390x/pci_ic.h > @@ -0,0 +1,335 @@ > +/* > + * s390 PCI intercept definitions > + * > + * Copyright 2014 IBM Corp. > + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> > + * Hong Bo Li <lihbbj@cn.ibm.com> > + * Yi Min Zhao <zyimin@cn.ibm.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > + * your option) any later version. See the COPYING file in the top-level > + * directory. > + */ > + > +#ifndef PCI_IC_S390X_H > +#define PCI_IC_S390X_H > + > +#include <sysemu/dma.h> > + > +/* CLP common request & response block size */ > +#define CLP_BLK_SIZE 4096 > +#define PCI_BAR_COUNT 6 > +#define PCI_MAX_FUNCTIONS 4096 > + > +typedef struct ClpReqHdr { > + __uint16_t len; > + __uint16_t cmd; > +} QEMU_PACKED ClpReqHdr; > + > +typedef struct ClpRspHdr { > + __uint16_t len; > + __uint16_t rsp; > +} QEMU_PACKED ClpRspHdr; > + > +/* CLP Response Codes */ > +#define CLP_RC_OK 0x0010 /* Command request successfully */ > +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ > +#define CLP_RC_PERM 0x0030 /* Command not authorized */ > +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ > +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ > +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ > +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ > +#define CLP_RC_NODATA 0x0080 /* No data available */ > +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ > + > +/* > + * Call Logical Processor - Command Codes > + */ > +#define CLP_LIST_PCI 0x0002 > +#define CLP_QUERY_PCI_FN 0x0003 > +#define CLP_QUERY_PCI_FNGRP 0x0004 > +#define CLP_SET_PCI_FN 0x0005 > + > +/* PCI function handle list entry */ > +typedef struct ClpFhListEntry { > + __uint16_t device_id; > + __uint16_t vendor_id; > +#define CLP_FHLIST_MASK_CONFIG 0x80000000 > + __uint32_t config; > + __uint32_t fid; > + __uint32_t fh; > +} QEMU_PACKED ClpFhListEntry; > + > +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ > +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ > +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ > +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ > +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ > +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ > +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ > +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ > +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ > +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ > + > +/* request or response block header length */ > +#define LIST_PCI_HDR_LEN 32 > + > +/* Number of function handles fitting in response block */ > +#define CLP_FH_LIST_NR_ENTRIES \ > + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ > + / sizeof(ClpFhListEntry)) > + > +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ > +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ > + > +#define CLP_UTIL_STR_LEN 64 > + > +#define CLP_MASK_FMT 0xf0000000 > + > +/* List PCI functions request */ > +typedef struct ClpReqListPci { > + ClpReqHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint64_t resume_token; > + __uint64_t reserved2; > +} QEMU_PACKED ClpReqListPci; > + > +/* List PCI functions response */ > +typedef struct ClpRspListPci { > + ClpRspHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint64_t resume_token; > + __uint32_t mdd; > + __uint16_t max_fn; > + __uint8_t reserved2; > + __uint8_t entry_size; > + ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES]; > +} QEMU_PACKED ClpRspListPci; > + > +/* Query PCI function request */ > +typedef struct ClpReqQueryPci { > + ClpReqHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint32_t fh; /* function handle */ > + __uint32_t reserved2; > + __uint64_t reserved3; > +} QEMU_PACKED ClpReqQueryPci; > + > +/* Query PCI function response */ > +typedef struct ClpRspQueryPci { > + ClpRspHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint16_t vfn; /* virtual fn number */ > +#define CLP_RSP_QPCI_MASK_UTIL 0x100 > +#define CLP_RSP_QPCI_MASK_PFGID 0xff > + __uint16_t ug; > + __uint32_t fid; /* pci function id */ > + __uint8_t bar_size[PCI_BAR_COUNT]; > + __uint16_t pchid; > + __uint32_t bar[PCI_BAR_COUNT]; > + __uint64_t reserved2; > + __uint64_t sdma; /* start dma as */ > + __uint64_t edma; /* end dma as */ > + __uint32_t reserved3[11]; > + __uint32_t uid; > + __uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */ > +} QEMU_PACKED ClpRspQueryPci; > + > +/* Query PCI function group request */ > +typedef struct ClpReqQueryPciGrp { > + ClpReqHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > +#define CLP_REQ_QPCIG_MASK_PFGID 0xff > + __uint32_t g; > + __uint32_t reserved2; > + __uint64_t reserved3; > +} QEMU_PACKED ClpReqQueryPciGrp; > + > +/* Query PCI function group response */ > +typedef struct ClpRspQueryPciGrp { > + ClpRspHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > +#define CLP_RSP_QPCIG_MASK_NOI 0xfff > + __uint16_t i; > + __uint8_t version; > +#define CLP_RSP_QPCIG_MASK_FRAME 0x2 > +#define CLP_RSP_QPCIG_MASK_REFRESH 0x1 > + __uint8_t fr; > + __uint16_t reserved2; > + __uint16_t mui; > + __uint64_t reserved3; > + __uint64_t dasm; /* dma address space mask */ > + __uint64_t msia; /* MSI address */ > + __uint64_t reserved4; > + __uint64_t reserved5; > +} QEMU_PACKED ClpRspQueryPciGrp; > + > +/* Set PCI function request */ > +typedef struct ClpReqSetPci { > + ClpReqHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint32_t fh; /* function handle */ > + __uint16_t reserved2; > + __uint8_t oc; /* operation controls */ > + __uint8_t ndas; /* number of dma spaces */ > + __uint64_t reserved3; > +} QEMU_PACKED ClpReqSetPci; > + > +/* Set PCI function response */ > +typedef struct ClpRspSetPci { > + ClpRspHdr hdr; > + __uint32_t fmt; > + __uint64_t reserved1; > + __uint32_t fh; /* function handle */ > + __uint32_t reserved3; > + __uint64_t reserved4; > +} QEMU_PACKED ClpRspSetPci; > + > +typedef struct ClpReqRspListPci { > + ClpReqListPci request; > + ClpRspListPci response; > +} QEMU_PACKED ClpReqRspListPci; > + > +typedef struct ClpReqRspSetPci { > + ClpReqSetPci request; > + ClpRspSetPci response; > +} QEMU_PACKED ClpReqRspSetPci; > + > +typedef struct ClpReqRspQueryPci { > + ClpReqQueryPci request; > + ClpRspQueryPci response; > +} QEMU_PACKED ClpReqRspQueryPci; > + > +typedef struct ClpReqRspQueryPciGrp { > + ClpReqQueryPciGrp request; > + ClpRspQueryPciGrp response; > +} QEMU_PACKED ClpReqRspQueryPciGrp; > + > +typedef struct PciLgStg { > + uint32_t fh; > + uint8_t status; > + uint8_t pcias; > + uint8_t reserved; > + uint8_t len; > +} QEMU_PACKED PciLgStg; > + > +typedef struct PciStb { > + uint32_t fh; > + uint8_t status; > + uint8_t pcias; > + uint8_t reserved; > + uint8_t len; > +} QEMU_PACKED PciStb; > + > +/* Load/Store status codes */ > +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 > +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 > +#define ZPCI_PCI_ST_BLOCKED 12 > +#define ZPCI_PCI_ST_INSUF_RES 16 > +#define ZPCI_PCI_ST_INVAL_AS 20 > +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 > +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 > +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 > +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 > +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 > + > +/* Load/Store return codes */ > +#define ZPCI_PCI_LS_OK 0 > +#define ZPCI_PCI_LS_ERR 1 > +#define ZPCI_PCI_LS_BUSY 2 > +#define ZPCI_PCI_LS_INVAL_HANDLE 3 > + > +/* Modify PCI Function Controls */ > +#define ZPCI_MOD_FC_REG_INT 2 > +#define ZPCI_MOD_FC_DEREG_INT 3 > +#define ZPCI_MOD_FC_REG_IOAT 4 > +#define ZPCI_MOD_FC_DEREG_IOAT 5 > +#define ZPCI_MOD_FC_REREG_IOAT 6 > +#define ZPCI_MOD_FC_RESET_ERROR 7 > +#define ZPCI_MOD_FC_RESET_BLOCK 9 > +#define ZPCI_MOD_FC_SET_MEASURE 10 > + > +/* FIB function controls */ > +#define ZPCI_FIB_FC_ENABLED 0x80 > +#define ZPCI_FIB_FC_ERROR 0x40 > +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 > +#define ZPCI_FIB_FC_DMAAS_REG 0x10 > + > +/* FIB function controls */ > +#define ZPCI_FIB_FC_ENABLED 0x80 > +#define ZPCI_FIB_FC_ERROR 0x40 > +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 > +#define ZPCI_FIB_FC_DMAAS_REG 0x10 > + > +/* Function Information Block */ > +typedef struct ZpciFib { > + __uint8_t fmt; /* format */ > + __uint8_t reserved1[7]; > + __uint8_t fc; /* function controls */ > + __uint8_t reserved2; > + __uint16_t reserved3; > + __uint32_t reserved4; > + __uint64_t pba; /* PCI base address */ > + __uint64_t pal; /* PCI address limit */ > + __uint64_t iota; /* I/O Translation Anchor */ > +#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7) > +#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff) > +#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f) > +#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1) > +#define FIB_DATA_AISBO(x) ((x) & 0x3f) > + __uint32_t data; > + __uint32_t reserved5; > + __uint64_t aibv; /* Adapter int bit vector address */ > + __uint64_t aisb; /* Adapter int summary bit address */ > + __uint64_t fmb_addr; /* Function measurement address and key */ > + __uint32_t reserved6; > + __uint32_t gd; > +} QEMU_PACKED ZpciFib; > + > +static inline uint64_t get_base_disp_rxy(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint32_t x2 = (run->s390_sieic.ipa & 0x000f); > + uint32_t base2 = run->s390_sieic.ipb >> 28; > + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + > + ((run->s390_sieic.ipb & 0xff00) << 4); > + > + if (disp2 & 0x80000) { > + disp2 += 0xfff00000; > + } > + > + return (base2 ? env->regs[base2] : 0) + > + (x2 ? env->regs[x2] : 0) + (long)(int)disp2; > +} > + > +static inline uint64_t get_base_disp_rsy(S390CPU *cpu, struct kvm_run *run) > +{ > + CPUS390XState *env = &cpu->env; > + uint32_t base2 = run->s390_sieic.ipb >> 28; > + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + > + ((run->s390_sieic.ipb & 0xff00) << 4); > + > + if (disp2 & 0x80000) { > + disp2 += 0xfff00000; > + } > + > + return (base2 ? env->regs[base2] : 0) + (long)(int)disp2; > +} Same comment as in the previous patch here, please try to avoid putting code into a header file. > + > +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run); > +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run); Hrm. Maybe we could add some registration hook similar to spapr's hcall or rtas callback registration that would allow us to encapsulate this a bit better? Then you'd only have to spawn a PHB device which could register for these service calls. Alex
On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > > > On 10.11.14 15:20, Frank Blaschka wrote: > > From: Frank Blaschka <frank.blaschka@de.ibm.com> > > > > This patch implements the s390 pci instructions in qemu. It allows > > to access and drive pci devices attached to the s390 pci bus. > > Because of platform constrains devices using IO BARs are not > > supported. Also a device has to support MSI/MSI-X to run on s390. > > > > Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > > --- > > target-s390x/Makefile.objs | 2 +- > > target-s390x/kvm.c | 52 ++++ > > target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > > target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > > 4 files changed, 1141 insertions(+), 1 deletion(-) > > create mode 100644 target-s390x/pci_ic.c > > create mode 100644 target-s390x/pci_ic.h > > > > diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs > > index 2c57494..cc71400 100644 > > --- a/target-s390x/Makefile.objs > > +++ b/target-s390x/Makefile.objs > > @@ -2,4 +2,4 @@ obj-y += translate.o helper.o cpu.o interrupt.o > > obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o > > obj-y += gdbstub.o > > obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o > > -obj-$(CONFIG_KVM) += kvm.o > > +obj-$(CONFIG_KVM) += kvm.o pci_ic.o > > diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c > > index 5b10a25..d59e740 100644 > > --- a/target-s390x/kvm.c > > +++ b/target-s390x/kvm.c > > @@ -40,6 +40,7 @@ > > #include "exec/gdbstub.h" > > #include "trace.h" > > #include "qapi-event.h" > > +#include "pci_ic.h" > > > > /* #define DEBUG_KVM */ > > > > @@ -56,6 +57,7 @@ > > #define IPA0_B2 0xb200 > > #define IPA0_B9 0xb900 > > #define IPA0_EB 0xeb00 > > +#define IPA0_E3 0xe300 > > > > #define PRIV_B2_SCLP_CALL 0x20 > > #define PRIV_B2_CSCH 0x30 > > @@ -76,8 +78,17 @@ > > #define PRIV_B2_XSCH 0x76 > > > > #define PRIV_EB_SQBS 0x8a > > +#define PRIV_EB_PCISTB 0xd0 > > +#define PRIV_EB_SIC 0xd1 > > > > #define PRIV_B9_EQBS 0x9c > > +#define PRIV_B9_CLP 0xa0 > > +#define PRIV_B9_PCISTG 0xd0 > > +#define PRIV_B9_PCILG 0xd2 > > +#define PRIV_B9_RPCIT 0xd3 > > + > > +#define PRIV_E3_MPCIFC 0xd0 > > +#define PRIV_E3_STPCIFC 0xd4 > > > > #define DIAG_IPL 0x308 > > #define DIAG_KVM_HYPERCALL 0x500 > > @@ -814,6 +825,18 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > > int r = 0; > > > > switch (ipa1) { > > + case PRIV_B9_CLP: > > + r = kvm_clp_service_call(cpu, run); > > + break; > > + case PRIV_B9_PCISTG: > > + r = kvm_pcistg_service_call(cpu, run); > > + break; > > + case PRIV_B9_PCILG: > > + r = kvm_pcilg_service_call(cpu, run); > > + break; > > + case PRIV_B9_RPCIT: > > + r = kvm_rpcit_service_call(cpu, run); > > + break; > > case PRIV_B9_EQBS: > > /* just inject exception */ > > r = -1; > > @@ -832,6 +855,12 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > > int r = 0; > > > > switch (ipa1) { > > + case PRIV_EB_PCISTB: > > + r = kvm_pcistb_service_call(cpu, run); > > + break; > > + case PRIV_EB_SIC: > > + r = kvm_sic_service_call(cpu, run); > > + break; > > case PRIV_EB_SQBS: > > /* just inject exception */ > > r = -1; > > @@ -845,6 +874,26 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > > return r; > > } > > > > +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) > > +{ > > + int r = 0; > > + > > + switch (ipbl) { > > + case PRIV_E3_MPCIFC: > > + r = kvm_mpcifc_service_call(cpu, run); > > + break; > > + case PRIV_E3_STPCIFC: > > + r = kvm_stpcifc_service_call(cpu, run); > > + break; > > + default: > > + r = -1; > > + DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipbl); > > + break; > > + } > > + > > + return r; > > +} > > + > > static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) > > { > > CPUS390XState *env = &cpu->env; > > @@ -1041,6 +1090,9 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run) > > case IPA0_EB: > > r = handle_eb(cpu, run, ipa1); > > break; > > + case IPA0_E3: > > + r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff); > > + break; > > case IPA0_DIAG: > > r = handle_diag(cpu, run, run->s390_sieic.ipb); > > break; > > diff --git a/target-s390x/pci_ic.c b/target-s390x/pci_ic.c > > new file mode 100644 > > index 0000000..6c05faf > > --- /dev/null > > +++ b/target-s390x/pci_ic.c > > @@ -0,0 +1,753 @@ > > +/* > > + * s390 PCI intercepts > > + * > > + * Copyright 2014 IBM Corp. > > + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> > > + * Hong Bo Li <lihbbj@cn.ibm.com> > > + * Yi Min Zhao <zyimin@cn.ibm.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > > + * your option) any later version. See the COPYING file in the top-level > > + * directory. > > + */ > > + > > +#include <sys/types.h> > > +#include <sys/ioctl.h> > > +#include <sys/mman.h> > > + > > +#include <linux/kvm.h> > > +#include <asm/ptrace.h> > > +#include <hw/pci/pci.h> > > +#include <hw/pci/pci_host.h> > > +#include <net/net.h> > > + > > +#include "qemu-common.h" > > +#include "qemu/timer.h" > > +#include "migration/qemu-file.h" > > +#include "sysemu/sysemu.h" > > +#include "sysemu/kvm.h" > > +#include "cpu.h" > > +#include "sysemu/device_tree.h" > > +#include "monitor/monitor.h" > > +#include "pci_ic.h" > > + > > +#include "hw/hw.h" > > +#include "hw/pci/pci.h" > > +#include "hw/pci/pci_bridge.h" > > +#include "hw/pci/pci_bus.h" > > +#include "hw/pci/pci_host.h" > > +#include "hw/s390x/s390-pci-bus.h" > > +#include "exec/exec-all.h" > > +#include "exec/memory-internal.h" > > + > > +/* #define DEBUG_S390PCI_IC */ > > +#ifdef DEBUG_S390PCI_IC > > +#define DPRINTF(fmt, ...) \ > > + do { fprintf(stderr, "s390pci_ic: " fmt, ## __VA_ARGS__); } while (0) > > +#else > > +#define DPRINTF(fmt, ...) \ > > + do { } while (0) > > +#endif > > + > > +static uint64_t resume_token; > > global variable? Why? > Hi Alex, thx for the review will try to address all issues from 1/3 and 2/3 patch. If I do not agree with a change I try to explain ... > > + > > +static uint8_t barsize(uint64_t size) > > +{ > > + uint64_t mask = 1; > > + int i; > > + > > + if (!size) { > > + return 0; > > + } > > + > > + for (i = 0; i < 64; i++) { > > + if (size & mask) { > > + break; > > + } > > + mask = (mask << 1); > > + } > > + > > + return i; > > +} > > Isn't there an existing helper for this in the PCI layer? > Did not find one, this function is used to fill a s390 specific len in an instruction intercept (architecture specific encoding of the len). > In fact, please check whether it makes sense to move some of the code to > hw/ rather than target-s390x. > > > + > > +static void s390_set_status_code(CPUS390XState *env, > > + uint8_t r, uint64_t status_code) > > +{ > > + env->regs[r] &= ~0xff000000; > > + env->regs[r] |= (status_code & 0xff) << 24; > > +} > > + > > +static int list_pci(ClpReqRspListPci *rrb, uint8_t *cc) > > +{ > > + S390PCIBusDevice *pbdev; > > + uint32_t res_code, initial_l2, g_l2, finish; > > + int rc, idx; > > + > > + rc = 0; > > + if (be16_to_cpu(rrb->request.hdr.len) != 32) { > > + res_code = CLP_RC_LEN; > > + rc = -EINVAL; > > + goto out; > > + } > > + > > + if ((be32_to_cpu(rrb->request.fmt) & CLP_MASK_FMT) != 0) { > > + res_code = CLP_RC_FMT; > > + rc = -EINVAL; > > + goto out; > > + } > > + > > + if ((be32_to_cpu(rrb->request.fmt) & ~CLP_MASK_FMT) != 0 || > > + rrb->request.reserved1 != 0 || > > + rrb->request.reserved2 != 0) { > > + res_code = CLP_RC_RESNOT0; > > + rc = -EINVAL; > > + goto out; > > + } > > + > > + if (be64_to_cpu(rrb->request.resume_token) == 0) { > > + resume_token = 0; > > + } else if (be64_to_cpu(rrb->request.resume_token) != resume_token) { > > + res_code = CLP_RC_LISTPCI_BADRT; > > + rc = -EINVAL; > > + goto out; > > + } > > + > > + if (be16_to_cpu(rrb->response.hdr.len) < 48) { > > + res_code = CLP_RC_8K; > > + rc = -EINVAL; > > + goto out; > > + } > > + > > + initial_l2 = be16_to_cpu(rrb->response.hdr.len); > > + if ((initial_l2 - LIST_PCI_HDR_LEN) % sizeof(ClpFhListEntry) > > + != 0) { > > + rc = -EINVAL; > > + *cc = 3; > > + goto out; > > + } > > + > > + rrb->response.fmt = 0; > > + rrb->response.reserved1 = rrb->response.reserved2 = 0; > > + rrb->response.mdd = cpu_to_be32(FH_VIRT); > > + rrb->response.max_fn = cpu_to_be16(PCI_MAX_FUNCTIONS); > > + rrb->response.entry_size = sizeof(ClpFhListEntry); > > + finish = 0; > > + idx = resume_token; > > + g_l2 = LIST_PCI_HDR_LEN; > > + do { > > + pbdev = s390_pci_find_dev_by_idx(idx); > > + if (!pbdev) { > > + finish = 1; > > + break; > > + } > > + rrb->response.fh_list[idx - resume_token].device_id = > > + pci_get_word(pbdev->pdev->config + PCI_DEVICE_ID); > > + rrb->response.fh_list[idx - resume_token].vendor_id = > > + pci_get_word(pbdev->pdev->config + PCI_VENDOR_ID); > > + rrb->response.fh_list[idx - resume_token].config = > > + cpu_to_be32(0x80000000); > > + rrb->response.fh_list[idx - resume_token].fid = cpu_to_be32(pbdev->fid); > > + rrb->response.fh_list[idx - resume_token].fh = cpu_to_be32(pbdev->fh); > > + > > + g_l2 += sizeof(ClpFhListEntry); > > + DPRINTF("g_l2 %d vendor id 0x%x device id 0x%x fid 0x%x fh 0x%x\n", > > + g_l2, > > + rrb->response.fh_list[idx - resume_token].vendor_id, > > + rrb->response.fh_list[idx - resume_token].device_id, > > + rrb->response.fh_list[idx - resume_token].fid, > > + rrb->response.fh_list[idx - resume_token].fh); > > + idx++; > > + } while (g_l2 < initial_l2); > > + > > + if (finish == 1) { > > + resume_token = 0; > > + } else { > > + resume_token = idx; > > + } > > + rrb->response.resume_token = cpu_to_be64(resume_token); > > + rrb->response.hdr.len = cpu_to_be16(g_l2); > > + rrb->response.hdr.rsp = cpu_to_be16(CLP_RC_OK); > > +out: > > + if (rc) { > > + DPRINTF("list pci failed rc 0x%x\n", rc); > > + rrb->response.hdr.rsp = cpu_to_be16(res_code); > > + } > > + return rc; > > +} > > + > > +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) > > Please separate kvm_ calls from the actual implementation. Do all the > parameter extraction in the kvm_ function and then forward on to a > generic function that doesn't need to know about kvm_run anymore. > > kvm specific c file: > > int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) > { > uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > return clp_service_call(cpu, r2); > } > > io / pci specific c file: > > int clp_service_call(S390CPU *cpu, uint8_t r2) > { > ... > } > I had already in mind to separate clp and pci instruction implementation from kvm and move them to hw. Will do some major rework and move code arround. > > +{ > > + ClpReqHdr *reqh; > > + ClpRspHdr *resh; > > + S390PCIBusDevice *pbdev; > > + uint32_t req_len; > > + uint32_t res_len; > > + uint8_t *buffer; > > + uint8_t cc = 0; > > + CPUS390XState *env = &cpu->env; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + int i; > > + > > + buffer = g_malloc0(4096 * 2); > > Do you really need this? Couldn't you make the pointers be actual > structs on the stack and just read/write from them directly? > > The compiler should be smart enough to throw away elements that aren't > used anymore to conserve memory. > > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 4); > > + return 0; > > + } > > + > > + cpu_physical_memory_rw(env->regs[r2], buffer, sizeof(*reqh), 0); > > + reqh = (ClpReqHdr *)buffer; > > + req_len = be16_to_cpu(reqh->len); > > + if (req_len < 16 || req_len > 8184 || (req_len % 8 != 0)) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + > > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + sizeof(*resh), 0); > > + resh = (ClpRspHdr *)(buffer + req_len); > > + res_len = be16_to_cpu(resh->len); > > + if (res_len < 8 || res_len > 8176 || (res_len % 8 != 0)) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + if ((req_len + res_len) > 8192) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + > > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 0); > > + > > + if (req_len != 32) { > > + resh->rsp = cpu_to_be16(CLP_RC_LEN); > > + goto out; > > + } > > + > > + switch (reqh->cmd) { > > + case CLP_LIST_PCI: { > > + ClpReqRspListPci *rrb = (ClpReqRspListPci *)buffer; > > + list_pci(rrb, &cc); > > + break; > > + } > > + case CLP_SET_PCI_FN: { > > + ClpReqSetPci *reqsetpci = (ClpReqSetPci *)reqh; > > + ClpRspSetPci *ressetpci = (ClpRspSetPci *)resh; > > + > > + pbdev = s390_pci_find_dev_by_fh(be32_to_cpu(reqsetpci->fh)); > > + if (!pbdev) { > > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); > > + goto out; > > + } > > + > > + switch (reqsetpci->oc) { > > + case CLP_SET_ENABLE_PCI_FN: > > + pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET; > > + ressetpci->fh = cpu_to_be32(pbdev->fh); > > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); > > + break; > > + case CLP_SET_DISABLE_PCI_FN: > > + pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET); > > + ressetpci->fh = cpu_to_be32(pbdev->fh); > > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); > > + break; > > + default: > > + DPRINTF("unknown set pci command\n"); > > + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FHOP); > > + break; > > + } > > + break; > > + } > > + case CLP_QUERY_PCI_FN: { > > + ClpReqQueryPci *reqquery = (ClpReqQueryPci *)reqh; > > + ClpRspQueryPci *resquery = (ClpRspQueryPci *)resh; > > + > > + pbdev = s390_pci_find_dev_by_fh(reqquery->fh); > > + if (!pbdev) { > > + DPRINTF("query pci no pci dev\n"); > > + resquery->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); > > + goto out; > > + } > > + > > + for (i = 0; i < PCI_BAR_COUNT; i++) { > > + uint64_t data = pci_host_config_read_common(pbdev->pdev, > > + 0x10 + (i * 4), pci_config_size(pbdev->pdev), 4); > > + > > + resquery->bar[i] = bswap32(data); > > + resquery->bar_size[i] = barsize(pbdev->pdev->io_regions[i].size); > > + DPRINTF("bar %d addr 0x%x size 0x%lx barsize 0x%x\n", i, > > + resquery->bar[i], pbdev->pdev->io_regions[i].size, > > + resquery->bar_size[i]); > > + } > > + > > + resquery->sdma = ZPCI_SDMA_ADDR; > > + resquery->edma = ZPCI_EDMA_ADDR; > > + resquery->pchid = 0; > > + resquery->ug = 1; > > + resquery->uid = pbdev->fid; > > + > > + resquery->hdr.rsp = CLP_RC_OK; > > + break; > > + } > > + case CLP_QUERY_PCI_FNGRP: { > > + ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh; > > + resgrp->fr = 1; > > + resgrp->dasm = 0; > > + resgrp->msia = ZPCI_MSI_ADDR; > > + resgrp->mui = 0; > > + resgrp->i = 128; > > + resgrp->version = 0; > > + > > + resgrp->hdr.rsp = CLP_RC_OK; > > + break; > > + } > > + default: > > + DPRINTF("unknown clp command\n"); > > + resh->rsp = cpu_to_be16(CLP_RC_CMD); > > + break; > > + } > > + > > +out: > > + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 1); > > ... ah, to write back. Wouldn't it be cleaner to do this explicitly? > > > + g_free(buffer); > > + setcc(cpu, cc); > > + return 0; > > +} > > + > > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + S390PCIBusDevice *pbdev; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + PciLgStg *rp; > > + uint64_t offset; > > + uint64_t data; > > + uint8_t len; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 4); > > + return 0; > > + } > > + > > + if (r2 & 0x1) { > > + program_interrupt(env, PGM_SPECIFICATION, 4); > > + return 0; > > + } > > + > > + rp = (PciLgStg *)&env->regs[r2]; > > + offset = env->regs[r2 + 1]; > > + > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcilg no pci dev\n"); > > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > > + return 0; > > + } > > + > > + len = rp->len & 0xF; > > + if (rp->pcias < 6) { > > + if ((8 - (offset & 0x7)) < len) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + io_mem_read(mr, offset, &data, len); > > + } else if (rp->pcias == 15) { > > + if ((4 - (offset & 0x3)) < len) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + data = pci_host_config_read_common( > > + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > > + > > + switch (len) { > > + case 1: > > + break; > > + case 2: > > + data = cpu_to_le16(data); > > + break; > > + case 4: > > + data = cpu_to_le32(data); > > + break; > > + case 8: > > + data = cpu_to_le64(data); > > + break; > > Why? Also, this is wrong. cpu_to_le64 convert between host endianness > and LE. So if you're running this on an LE host, you won't swap the > value and get a broken result. > > If you know that the value is always swapped, use bswapxx(). > Actually the code is right and required for a big endian host :-) pcilg/pcistg provide access to the PCI config space which is defined as PCI byte order (little endian). Since pci_host_config_read_common does already a le to cpu conversion we have to convert back to PCI byte order. Doing an unconditional swap would be a bug on a little endian host. > > + default: > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + } else { > > + DPRINTF("invalid space\n"); > > + setcc(cpu, ZPCI_PCI_LS_ERR); > > + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); > > + return 0; > > + } > > + > > + env->regs[r1] = data; > > + setcc(cpu, ZPCI_PCI_LS_OK); > > + return 0; > > +} > > + > > +static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t offset, > > + uint64_t *data, uint8_t len) > > +{ > > + uint32_t msg_data; > > + > > + if (offset % PCI_MSIX_ENTRY_SIZE != 8) { > > + return; > > + } > > + > > + if (len != 4) { > > + DPRINTF("access msix table msg data but len is %d\n", len); > > + return; > > + } > > + > > + msg_data = (pbdev->fid << ZPCI_MSI_VEC_BITS) | le32_to_cpu(*data); > > + *data = cpu_to_le32(msg_data); > > + DPRINTF("update msix msg_data to 0x%x\n", msg_data); > > +} > > + > > +static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias) > > +{ > > + if (pbdev->msix.available && pbdev->msix.table_bar == pcias && > > + offset >= pbdev->msix.table_offset && > > + offset <= pbdev->msix.table_offset + > > + (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) { > > + return 1; > > + } else { > > + return 0; > > + } > > +} > > + > > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + PciLgStg *rp; > > + uint64_t offset, data; > > + S390PCIBusDevice *pbdev; > > + uint8_t len; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 4); > > + return 0; > > + } > > + > > + if (r2 & 0x1) { > > + program_interrupt(env, PGM_SPECIFICATION, 4); > > + return 0; > > + } > > + > > + rp = (PciLgStg *)&env->regs[r2]; > > + offset = env->regs[r2 + 1]; > > + > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcistg no pci dev\n"); > > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > > + return 0; > > + } > > + > > + data = env->regs[r1]; > > + len = rp->len & 0xF; > > + if (rp->pcias < 6) { > > + if ((8 - (offset & 0x7)) < len) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + MemoryRegion *mr; > > + if (trap_msix(pbdev, offset, rp->pcias)) { > > + offset = offset - pbdev->msix.table_offset; > > + mr = &pbdev->pdev->msix_table_mmio; > > + update_msix_table_msg_data(pbdev, offset, &data, len); > > + } else { > > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + } > > + > > + io_mem_write(mr, offset, data, len); > > + } else if (rp->pcias == 15) { > > + if ((4 - (offset & 0x3)) < len) { > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > + switch (len) { > > + case 1: > > + break; > > + case 2: > > + data = le16_to_cpu(data); > > + break; > > + case 4: > > + data = le32_to_cpu(data); > > + break; > > + case 8: > > + data = le64_to_cpu(data); > > + break; > > + default: > > + program_interrupt(env, PGM_OPERAND, 4); > > + return 0; > > + } > > I guess you want a generic function similar to qemu_bswap_len() that > supports 64bit? > > > + > > + pci_host_config_write_common(pbdev->pdev, offset, > > + pci_config_size(pbdev->pdev), > > + data, len); > > + } else { > > + DPRINTF("pcistg invalid space\n"); > > + setcc(cpu, ZPCI_PCI_LS_ERR); > > + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); > > + return 0; > > + } > > + > > + setcc(cpu, ZPCI_PCI_LS_OK); > > + return 0; > > +} > > + > > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + uint32_t fh; > > + uint64_t pte; > > + S390PCIBusDevice *pbdev; > > + ram_addr_t size; > > + int flags; > > + IOMMUTLBEntry entry; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 4); > > + return 0; > > + } > > + > > + if (r2 & 0x1) { > > + program_interrupt(env, PGM_SPECIFICATION, 4); > > + return 0; > > + } > > + > > + fh = env->regs[r1] >> 32; > > + size = env->regs[r2 + 1]; > > + > > + pbdev = s390_pci_find_dev_by_fh(fh); > > + > > + if (!pbdev) { > > + DPRINTF("rpcit no pci dev\n"); > > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > > + return 0; > > + } > > + > > + pte = s390_guest_io_table_walk(s390_pci_get_table_origin(pbdev->g_iota), > > + env->regs[r2]); > > + flags = pte & ZPCI_PTE_FLAG_MASK; > > + entry.target_as = &address_space_memory; > > + entry.iova = env->regs[r2]; > > + entry.translated_addr = pte & ZPCI_PTE_ADDR_MASK; > > + entry.addr_mask = size - 1; > > + > > + if (flags & ZPCI_PTE_INVALID) { > > + entry.perm = IOMMU_NONE; > > + } else { > > + entry.perm = IOMMU_RW; > > + } > > Deja vu? This is the iommu translation function, no? Can't you somehow > just call it? > yes you are so right, can't belive I did't saw this before > > + > > + memory_region_notify_iommu(pci_device_iommu_address_space( > > + pbdev->pdev)->root, entry); > > + > > + setcc(cpu, ZPCI_PCI_LS_OK); > > + return 0; > > +} > > + > > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + qemu_log_mask(LOG_UNIMP, "SIC missing\n"); > > + return 0; > > +} > > + > > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; > > + uint8_t r3 = run->s390_sieic.ipa & 0x000f; > > + PciStb *rp; > > + uint64_t gaddr; > > + uint64_t *uaddr, *pu; > > + hwaddr len; > > + S390PCIBusDevice *pbdev; > > + MemoryRegion *mr; > > + int i; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 6); > > + return 0; > > + } > > + > > + rp = (PciStb *)&env->regs[r1]; > > + if (rp->pcias > 5) { > > + DPRINTF("pcistb invalid space\n"); > > + setcc(cpu, ZPCI_PCI_LS_ERR); > > + s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS); > > + return 0; > > + } > > + > > + switch (rp->len) { > > + case 16: > > + case 32: > > + case 64: > > + case 128: > > + break; > > + default: > > + program_interrupt(env, PGM_SPECIFICATION, 6); > > + return 0; > > + } > > + > > + gaddr = get_base_disp_rsy(cpu, run); > > + len = rp->len; > > + > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcistb no pci dev fh 0x%x\n", rp->fh); > > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > > + return 0; > > + } > > + > > + uaddr = cpu_physical_memory_map(gaddr, &len, 0); > > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + if (!memory_region_access_valid(mr, env->regs[r3], rp->len, true)) { > > + cpu_physical_memory_unmap(uaddr, len, 0, len); > > + program_interrupt(env, PGM_ADDRESSING, 6); > > + return 0; > > + } > > + > > + pu = uaddr; > > + for (i = 0; i < rp->len / 8; i++) { > > + io_mem_write(mr, env->regs[r3] + i * 8, *pu, 8); > > Please don't overoptimize and just use individual ldq_phys() operations > here for each memory access. In general, try to avoid > cpu_physical_memory_map(). > > > + pu++; > > + } > > + > > + cpu_physical_memory_unmap(uaddr, len, 0, len); > > + setcc(cpu, ZPCI_PCI_LS_OK); > > + return 0; > > +} > > + > > +static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) > > +{ > > + int ret; > > + S390FLICState *fs = s390_get_flic(); > > + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); > > + > > + ret = css_register_io_adapter(S390_PCIPT_ADAPTER, > > + FIB_DATA_ISC(fib.data), true, false, > > + &pbdev->routes.adapter.adapter_id); > > + assert(ret == 0); > > + > > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aisb, true); > > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aibv, true); > > + > > + pbdev->routes.adapter.summary_addr = fib.aisb; > > + pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(fib.data); > > + pbdev->routes.adapter.ind_addr = fib.aibv; > > + pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(fib.data); > > + > > + DPRINTF("reg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); > > + return 0; > > +} > > + > > +static int dereg_irqs(S390PCIBusDevice *pbdev) > > +{ > > + S390FLICState *fs = s390_get_flic(); > > + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); > > + > > + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, > > + pbdev->routes.adapter.ind_addr, false); > > + > > + pbdev->routes.adapter.summary_addr = 0; > > + pbdev->routes.adapter.summary_offset = 0; > > + pbdev->routes.adapter.ind_addr = 0; > > + pbdev->routes.adapter.ind_offset = 0; > > + > > + DPRINTF("dereg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); > > + return 0; > > +} > > + > > +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; > > + uint8_t oc; > > + uint32_t fh; > > + uint64_t fiba; > > + ZpciFib fib; > > + S390PCIBusDevice *pbdev; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + if (env->psw.mask & PSW_MASK_PSTATE) { > > + program_interrupt(env, PGM_PRIVILEGED, 6); > > + return 0; > > + } > > + > > + oc = env->regs[r1] & 0xff; > > + fh = env->regs[r1] >> 32; > > + fiba = get_base_disp_rxy(cpu, run); > > + > > + if (fiba & 0x7) { > > + program_interrupt(env, PGM_SPECIFICATION, 6); > > + return 0; > > + } > > + > > + pbdev = s390_pci_find_dev_by_fh(fh); > > + if (!pbdev) { > > + DPRINTF("mpcifc no pci dev fh 0x%x\n", fh); > > + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > > + return 0; > > + } > > + > > + cpu_physical_memory_rw(fiba, (uint8_t *)&fib, sizeof(fib), 0); > > I also find cpu_physical_memory_rw() pretty hard to read. Meanwhile, > it's been deprecated by cpu_physical_memory_read() and > cpu_physical_memory_write() which make the code more readable. > > > + > > + switch (oc) { > > + case ZPCI_MOD_FC_REG_INT: { > > + pbdev->isc = FIB_DATA_ISC(fib.data); > > + reg_irqs(env, pbdev, fib); > > + break; > > + } > > + case ZPCI_MOD_FC_DEREG_INT: > > + dereg_irqs(pbdev); > > + break; > > + case ZPCI_MOD_FC_REG_IOAT: > > + if (fib.pba > fib.pal) { > > + program_interrupt(&cpu->env, PGM_OPERAND, 6); > > + return 0; > > + } > > + pbdev->g_iota = fib.iota; > > + break; > > + case ZPCI_MOD_FC_DEREG_IOAT: > > + break; > > + case ZPCI_MOD_FC_REREG_IOAT: > > + break; > > + case ZPCI_MOD_FC_RESET_ERROR: > > + break; > > + case ZPCI_MOD_FC_RESET_BLOCK: > > + break; > > + case ZPCI_MOD_FC_SET_MEASURE: > > + break; > > + default: > > + program_interrupt(&cpu->env, PGM_OPERAND, 6); > > + return 0; > > + } > > + > > + setcc(cpu, ZPCI_PCI_LS_OK); > > + return 0; > > +} > > + > > +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + qemu_log_mask(LOG_UNIMP, "STPCIFC missing\n"); > > + return 0; > > +} > > diff --git a/target-s390x/pci_ic.h b/target-s390x/pci_ic.h > > new file mode 100644 > > index 0000000..0eb6c27 > > --- /dev/null > > +++ b/target-s390x/pci_ic.h > > @@ -0,0 +1,335 @@ > > +/* > > + * s390 PCI intercept definitions > > + * > > + * Copyright 2014 IBM Corp. > > + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> > > + * Hong Bo Li <lihbbj@cn.ibm.com> > > + * Yi Min Zhao <zyimin@cn.ibm.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or (at > > + * your option) any later version. See the COPYING file in the top-level > > + * directory. > > + */ > > + > > +#ifndef PCI_IC_S390X_H > > +#define PCI_IC_S390X_H > > + > > +#include <sysemu/dma.h> > > + > > +/* CLP common request & response block size */ > > +#define CLP_BLK_SIZE 4096 > > +#define PCI_BAR_COUNT 6 > > +#define PCI_MAX_FUNCTIONS 4096 > > + > > +typedef struct ClpReqHdr { > > + __uint16_t len; > > + __uint16_t cmd; > > +} QEMU_PACKED ClpReqHdr; > > + > > +typedef struct ClpRspHdr { > > + __uint16_t len; > > + __uint16_t rsp; > > +} QEMU_PACKED ClpRspHdr; > > + > > +/* CLP Response Codes */ > > +#define CLP_RC_OK 0x0010 /* Command request successfully */ > > +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ > > +#define CLP_RC_PERM 0x0030 /* Command not authorized */ > > +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ > > +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ > > +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ > > +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ > > +#define CLP_RC_NODATA 0x0080 /* No data available */ > > +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ > > + > > +/* > > + * Call Logical Processor - Command Codes > > + */ > > +#define CLP_LIST_PCI 0x0002 > > +#define CLP_QUERY_PCI_FN 0x0003 > > +#define CLP_QUERY_PCI_FNGRP 0x0004 > > +#define CLP_SET_PCI_FN 0x0005 > > + > > +/* PCI function handle list entry */ > > +typedef struct ClpFhListEntry { > > + __uint16_t device_id; > > + __uint16_t vendor_id; > > +#define CLP_FHLIST_MASK_CONFIG 0x80000000 > > + __uint32_t config; > > + __uint32_t fid; > > + __uint32_t fh; > > +} QEMU_PACKED ClpFhListEntry; > > + > > +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ > > +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ > > +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ > > +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ > > +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ > > +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ > > +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ > > +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ > > +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ > > +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ > > + > > +/* request or response block header length */ > > +#define LIST_PCI_HDR_LEN 32 > > + > > +/* Number of function handles fitting in response block */ > > +#define CLP_FH_LIST_NR_ENTRIES \ > > + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ > > + / sizeof(ClpFhListEntry)) > > + > > +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ > > +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ > > + > > +#define CLP_UTIL_STR_LEN 64 > > + > > +#define CLP_MASK_FMT 0xf0000000 > > + > > +/* List PCI functions request */ > > +typedef struct ClpReqListPci { > > + ClpReqHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint64_t resume_token; > > + __uint64_t reserved2; > > +} QEMU_PACKED ClpReqListPci; > > + > > +/* List PCI functions response */ > > +typedef struct ClpRspListPci { > > + ClpRspHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint64_t resume_token; > > + __uint32_t mdd; > > + __uint16_t max_fn; > > + __uint8_t reserved2; > > + __uint8_t entry_size; > > + ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES]; > > +} QEMU_PACKED ClpRspListPci; > > + > > +/* Query PCI function request */ > > +typedef struct ClpReqQueryPci { > > + ClpReqHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint32_t fh; /* function handle */ > > + __uint32_t reserved2; > > + __uint64_t reserved3; > > +} QEMU_PACKED ClpReqQueryPci; > > + > > +/* Query PCI function response */ > > +typedef struct ClpRspQueryPci { > > + ClpRspHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint16_t vfn; /* virtual fn number */ > > +#define CLP_RSP_QPCI_MASK_UTIL 0x100 > > +#define CLP_RSP_QPCI_MASK_PFGID 0xff > > + __uint16_t ug; > > + __uint32_t fid; /* pci function id */ > > + __uint8_t bar_size[PCI_BAR_COUNT]; > > + __uint16_t pchid; > > + __uint32_t bar[PCI_BAR_COUNT]; > > + __uint64_t reserved2; > > + __uint64_t sdma; /* start dma as */ > > + __uint64_t edma; /* end dma as */ > > + __uint32_t reserved3[11]; > > + __uint32_t uid; > > + __uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */ > > +} QEMU_PACKED ClpRspQueryPci; > > + > > +/* Query PCI function group request */ > > +typedef struct ClpReqQueryPciGrp { > > + ClpReqHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > +#define CLP_REQ_QPCIG_MASK_PFGID 0xff > > + __uint32_t g; > > + __uint32_t reserved2; > > + __uint64_t reserved3; > > +} QEMU_PACKED ClpReqQueryPciGrp; > > + > > +/* Query PCI function group response */ > > +typedef struct ClpRspQueryPciGrp { > > + ClpRspHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > +#define CLP_RSP_QPCIG_MASK_NOI 0xfff > > + __uint16_t i; > > + __uint8_t version; > > +#define CLP_RSP_QPCIG_MASK_FRAME 0x2 > > +#define CLP_RSP_QPCIG_MASK_REFRESH 0x1 > > + __uint8_t fr; > > + __uint16_t reserved2; > > + __uint16_t mui; > > + __uint64_t reserved3; > > + __uint64_t dasm; /* dma address space mask */ > > + __uint64_t msia; /* MSI address */ > > + __uint64_t reserved4; > > + __uint64_t reserved5; > > +} QEMU_PACKED ClpRspQueryPciGrp; > > + > > +/* Set PCI function request */ > > +typedef struct ClpReqSetPci { > > + ClpReqHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint32_t fh; /* function handle */ > > + __uint16_t reserved2; > > + __uint8_t oc; /* operation controls */ > > + __uint8_t ndas; /* number of dma spaces */ > > + __uint64_t reserved3; > > +} QEMU_PACKED ClpReqSetPci; > > + > > +/* Set PCI function response */ > > +typedef struct ClpRspSetPci { > > + ClpRspHdr hdr; > > + __uint32_t fmt; > > + __uint64_t reserved1; > > + __uint32_t fh; /* function handle */ > > + __uint32_t reserved3; > > + __uint64_t reserved4; > > +} QEMU_PACKED ClpRspSetPci; > > + > > +typedef struct ClpReqRspListPci { > > + ClpReqListPci request; > > + ClpRspListPci response; > > +} QEMU_PACKED ClpReqRspListPci; > > + > > +typedef struct ClpReqRspSetPci { > > + ClpReqSetPci request; > > + ClpRspSetPci response; > > +} QEMU_PACKED ClpReqRspSetPci; > > + > > +typedef struct ClpReqRspQueryPci { > > + ClpReqQueryPci request; > > + ClpRspQueryPci response; > > +} QEMU_PACKED ClpReqRspQueryPci; > > + > > +typedef struct ClpReqRspQueryPciGrp { > > + ClpReqQueryPciGrp request; > > + ClpRspQueryPciGrp response; > > +} QEMU_PACKED ClpReqRspQueryPciGrp; > > + > > +typedef struct PciLgStg { > > + uint32_t fh; > > + uint8_t status; > > + uint8_t pcias; > > + uint8_t reserved; > > + uint8_t len; > > +} QEMU_PACKED PciLgStg; > > + > > +typedef struct PciStb { > > + uint32_t fh; > > + uint8_t status; > > + uint8_t pcias; > > + uint8_t reserved; > > + uint8_t len; > > +} QEMU_PACKED PciStb; > > + > > +/* Load/Store status codes */ > > +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 > > +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 > > +#define ZPCI_PCI_ST_BLOCKED 12 > > +#define ZPCI_PCI_ST_INSUF_RES 16 > > +#define ZPCI_PCI_ST_INVAL_AS 20 > > +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 > > +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 > > +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 > > +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 > > +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 > > + > > +/* Load/Store return codes */ > > +#define ZPCI_PCI_LS_OK 0 > > +#define ZPCI_PCI_LS_ERR 1 > > +#define ZPCI_PCI_LS_BUSY 2 > > +#define ZPCI_PCI_LS_INVAL_HANDLE 3 > > + > > +/* Modify PCI Function Controls */ > > +#define ZPCI_MOD_FC_REG_INT 2 > > +#define ZPCI_MOD_FC_DEREG_INT 3 > > +#define ZPCI_MOD_FC_REG_IOAT 4 > > +#define ZPCI_MOD_FC_DEREG_IOAT 5 > > +#define ZPCI_MOD_FC_REREG_IOAT 6 > > +#define ZPCI_MOD_FC_RESET_ERROR 7 > > +#define ZPCI_MOD_FC_RESET_BLOCK 9 > > +#define ZPCI_MOD_FC_SET_MEASURE 10 > > + > > +/* FIB function controls */ > > +#define ZPCI_FIB_FC_ENABLED 0x80 > > +#define ZPCI_FIB_FC_ERROR 0x40 > > +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 > > +#define ZPCI_FIB_FC_DMAAS_REG 0x10 > > + > > +/* FIB function controls */ > > +#define ZPCI_FIB_FC_ENABLED 0x80 > > +#define ZPCI_FIB_FC_ERROR 0x40 > > +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 > > +#define ZPCI_FIB_FC_DMAAS_REG 0x10 > > + > > +/* Function Information Block */ > > +typedef struct ZpciFib { > > + __uint8_t fmt; /* format */ > > + __uint8_t reserved1[7]; > > + __uint8_t fc; /* function controls */ > > + __uint8_t reserved2; > > + __uint16_t reserved3; > > + __uint32_t reserved4; > > + __uint64_t pba; /* PCI base address */ > > + __uint64_t pal; /* PCI address limit */ > > + __uint64_t iota; /* I/O Translation Anchor */ > > +#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7) > > +#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff) > > +#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f) > > +#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1) > > +#define FIB_DATA_AISBO(x) ((x) & 0x3f) > > + __uint32_t data; > > + __uint32_t reserved5; > > + __uint64_t aibv; /* Adapter int bit vector address */ > > + __uint64_t aisb; /* Adapter int summary bit address */ > > + __uint64_t fmb_addr; /* Function measurement address and key */ > > + __uint32_t reserved6; > > + __uint32_t gd; > > +} QEMU_PACKED ZpciFib; > > + > > +static inline uint64_t get_base_disp_rxy(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint32_t x2 = (run->s390_sieic.ipa & 0x000f); > > + uint32_t base2 = run->s390_sieic.ipb >> 28; > > + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + > > + ((run->s390_sieic.ipb & 0xff00) << 4); > > + > > + if (disp2 & 0x80000) { > > + disp2 += 0xfff00000; > > + } > > + > > + return (base2 ? env->regs[base2] : 0) + > > + (x2 ? env->regs[x2] : 0) + (long)(int)disp2; > > +} > > + > > +static inline uint64_t get_base_disp_rsy(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint32_t base2 = run->s390_sieic.ipb >> 28; > > + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + > > + ((run->s390_sieic.ipb & 0xff00) << 4); > > + > > + if (disp2 & 0x80000) { > > + disp2 += 0xfff00000; > > + } > > + > > + return (base2 ? env->regs[base2] : 0) + (long)(int)disp2; > > +} > > Same comment as in the previous patch here, please try to avoid putting > code into a header file. > > > + > > +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run); > > +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run); > > Hrm. Maybe we could add some registration hook similar to spapr's hcall > or rtas callback registration that would allow us to encapsulate this a > bit better? > > Then you'd only have to spawn a PHB device which could register for > these service calls. > > > Alex > >
On 11.11.14 13:10, Frank Blaschka wrote: > On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >> >> >> On 10.11.14 15:20, Frank Blaschka wrote: >>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>> >>> This patch implements the s390 pci instructions in qemu. It allows >>> to access and drive pci devices attached to the s390 pci bus. >>> Because of platform constrains devices using IO BARs are not >>> supported. Also a device has to support MSI/MSI-X to run on s390. >>> >>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>> --- >>> target-s390x/Makefile.objs | 2 +- >>> target-s390x/kvm.c | 52 ++++ >>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>> create mode 100644 target-s390x/pci_ic.c >>> create mode 100644 target-s390x/pci_ic.h >>> [...] >>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>> +{ >>> + CPUS390XState *env = &cpu->env; >>> + S390PCIBusDevice *pbdev; >>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>> + PciLgStg *rp; >>> + uint64_t offset; >>> + uint64_t data; >>> + uint8_t len; >>> + >>> + cpu_synchronize_state(CPU(cpu)); >>> + >>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>> + return 0; >>> + } >>> + >>> + if (r2 & 0x1) { >>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>> + return 0; >>> + } >>> + >>> + rp = (PciLgStg *)&env->regs[r2]; >>> + offset = env->regs[r2 + 1]; >>> + >>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>> + if (!pbdev) { >>> + DPRINTF("pcilg no pci dev\n"); >>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>> + return 0; >>> + } >>> + >>> + len = rp->len & 0xF; >>> + if (rp->pcias < 6) { >>> + if ((8 - (offset & 0x7)) < len) { >>> + program_interrupt(env, PGM_OPERAND, 4); >>> + return 0; >>> + } >>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>> + io_mem_read(mr, offset, &data, len); >>> + } else if (rp->pcias == 15) { >>> + if ((4 - (offset & 0x3)) < len) { >>> + program_interrupt(env, PGM_OPERAND, 4); >>> + return 0; >>> + } >>> + data = pci_host_config_read_common( >>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>> + >>> + switch (len) { >>> + case 1: >>> + break; >>> + case 2: >>> + data = cpu_to_le16(data); >>> + break; >>> + case 4: >>> + data = cpu_to_le32(data); >>> + break; >>> + case 8: >>> + data = cpu_to_le64(data); >>> + break; >> >> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >> and LE. So if you're running this on an LE host, you won't swap the >> value and get a broken result. >> >> If you know that the value is always swapped, use bswapxx(). >> > > Actually the code is right and required for a big endian host :-) > pcilg/pcistg provide access to the PCI config space which is defined > as PCI byte order (little endian). Since pci_host_config_read_common does > already a le to cpu conversion we have to convert back to PCI byte order. > Doing an unconditional swap would be a bug on a little endian host. Why would it be a bug? The value you end up writing is contents of a register and thus doesn't have endianness. So if QEMU was an LE process, the value of data would be identical as on a BE QEMU before your swab. After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. Alex
On 11 November 2014 12:10, Frank Blaschka <blaschka@linux.vnet.ibm.com> wrote: > On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >> > +static uint8_t barsize(uint64_t size) >> > +{ >> > + uint64_t mask = 1; >> > + int i; >> > + >> > + if (!size) { >> > + return 0; >> > + } >> > + >> > + for (i = 0; i < 64; i++) { >> > + if (size & mask) { >> > + break; >> > + } >> > + mask = (mask << 1); >> > + } >> > + >> > + return i; >> > +} >> >> Isn't there an existing helper for this in the PCI layer? >> > > Did not find one, this function is used to fill a s390 specific len > in an instruction intercept (architecture specific encoding of the len). If you do need to implement this here then you should probably be using ctz64(). I think what you have here is equivalent to return size ? ctz64(size) : 0; but you should check that. thanks -- PMM
On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: > > > On 11.11.14 13:10, Frank Blaschka wrote: > > On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > >> > >> > >> On 10.11.14 15:20, Frank Blaschka wrote: > >>> From: Frank Blaschka <frank.blaschka@de.ibm.com> > >>> > >>> This patch implements the s390 pci instructions in qemu. It allows > >>> to access and drive pci devices attached to the s390 pci bus. > >>> Because of platform constrains devices using IO BARs are not > >>> supported. Also a device has to support MSI/MSI-X to run on s390. > >>> > >>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > >>> --- > >>> target-s390x/Makefile.objs | 2 +- > >>> target-s390x/kvm.c | 52 ++++ > >>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > >>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > >>> 4 files changed, 1141 insertions(+), 1 deletion(-) > >>> create mode 100644 target-s390x/pci_ic.c > >>> create mode 100644 target-s390x/pci_ic.h > >>> > > [...] > > >>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > >>> +{ > >>> + CPUS390XState *env = &cpu->env; > >>> + S390PCIBusDevice *pbdev; > >>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > >>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > >>> + PciLgStg *rp; > >>> + uint64_t offset; > >>> + uint64_t data; > >>> + uint8_t len; > >>> + > >>> + cpu_synchronize_state(CPU(cpu)); > >>> + > >>> + if (env->psw.mask & PSW_MASK_PSTATE) { > >>> + program_interrupt(env, PGM_PRIVILEGED, 4); > >>> + return 0; > >>> + } > >>> + > >>> + if (r2 & 0x1) { > >>> + program_interrupt(env, PGM_SPECIFICATION, 4); > >>> + return 0; > >>> + } > >>> + > >>> + rp = (PciLgStg *)&env->regs[r2]; > >>> + offset = env->regs[r2 + 1]; > >>> + > >>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); > >>> + if (!pbdev) { > >>> + DPRINTF("pcilg no pci dev\n"); > >>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > >>> + return 0; > >>> + } > >>> + > >>> + len = rp->len & 0xF; > >>> + if (rp->pcias < 6) { > >>> + if ((8 - (offset & 0x7)) < len) { > >>> + program_interrupt(env, PGM_OPERAND, 4); > >>> + return 0; > >>> + } > >>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > >>> + io_mem_read(mr, offset, &data, len); > >>> + } else if (rp->pcias == 15) { > >>> + if ((4 - (offset & 0x3)) < len) { > >>> + program_interrupt(env, PGM_OPERAND, 4); > >>> + return 0; > >>> + } > >>> + data = pci_host_config_read_common( > >>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > >>> + > >>> + switch (len) { > >>> + case 1: > >>> + break; > >>> + case 2: > >>> + data = cpu_to_le16(data); > >>> + break; > >>> + case 4: > >>> + data = cpu_to_le32(data); > >>> + break; > >>> + case 8: > >>> + data = cpu_to_le64(data); > >>> + break; > >> > >> Why? Also, this is wrong. cpu_to_le64 convert between host endianness > >> and LE. So if you're running this on an LE host, you won't swap the > >> value and get a broken result. > >> > >> If you know that the value is always swapped, use bswapxx(). > >> > > > > Actually the code is right and required for a big endian host :-) > > pcilg/pcistg provide access to the PCI config space which is defined > > as PCI byte order (little endian). Since pci_host_config_read_common does > > already a le to cpu conversion we have to convert back to PCI byte order. > > Doing an unconditional swap would be a bug on a little endian host. > > Why would it be a bug? The value you end up writing is contents of a > register and thus doesn't have endianness. So if QEMU was an LE process, No, the s390 guest executing pcilg instruction expects to receive config space data in PCI byte order. > the value of data would be identical as on a BE QEMU before your swab. > After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. > Again on BE endian host we do the swap because of pci_host_config_read_common does read the value and do a byte swap for that value, but we need PCI byte order not BE here. On LE host pci_host_config_read_common does not do a byte swap so we do not have to convert back to PCI byte order. > > Alex >
On Tue, Nov 11, 2014 at 12:17:17PM +0000, Peter Maydell wrote: > On 11 November 2014 12:10, Frank Blaschka <blaschka@linux.vnet.ibm.com> wrote: > > On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > >> > +static uint8_t barsize(uint64_t size) > >> > +{ > >> > + uint64_t mask = 1; > >> > + int i; > >> > + > >> > + if (!size) { > >> > + return 0; > >> > + } > >> > + > >> > + for (i = 0; i < 64; i++) { > >> > + if (size & mask) { > >> > + break; > >> > + } > >> > + mask = (mask << 1); > >> > + } > >> > + > >> > + return i; > >> > +} > >> > >> Isn't there an existing helper for this in the PCI layer? > >> > > > > Did not find one, this function is used to fill a s390 specific len > > in an instruction intercept (architecture specific encoding of the len). > > If you do need to implement this here then you should probably > be using ctz64(). I think what you have here is equivalent to > > return size ? ctz64(size) : 0; > > but you should check that. will do thx! > > thanks > -- PMM >
> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: > >> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >> >> >>> On 11.11.14 13:10, Frank Blaschka wrote: >>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>> >>>> >>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>> >>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>> to access and drive pci devices attached to the s390 pci bus. >>>>> Because of platform constrains devices using IO BARs are not >>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>> >>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>> --- >>>>> target-s390x/Makefile.objs | 2 +- >>>>> target-s390x/kvm.c | 52 ++++ >>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>> create mode 100644 target-s390x/pci_ic.c >>>>> create mode 100644 target-s390x/pci_ic.h >>>>> >> >> [...] >> >>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>> +{ >>>>> + CPUS390XState *env = &cpu->env; >>>>> + S390PCIBusDevice *pbdev; >>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>> + PciLgStg *rp; >>>>> + uint64_t offset; >>>>> + uint64_t data; >>>>> + uint8_t len; >>>>> + >>>>> + cpu_synchronize_state(CPU(cpu)); >>>>> + >>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>> + return 0; >>>>> + } >>>>> + >>>>> + if (r2 & 0x1) { >>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>> + return 0; >>>>> + } >>>>> + >>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>> + offset = env->regs[r2 + 1]; >>>>> + >>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>> + if (!pbdev) { >>>>> + DPRINTF("pcilg no pci dev\n"); >>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>> + return 0; >>>>> + } >>>>> + >>>>> + len = rp->len & 0xF; >>>>> + if (rp->pcias < 6) { >>>>> + if ((8 - (offset & 0x7)) < len) { >>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>> + return 0; >>>>> + } >>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>> + io_mem_read(mr, offset, &data, len); >>>>> + } else if (rp->pcias == 15) { >>>>> + if ((4 - (offset & 0x3)) < len) { >>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>> + return 0; >>>>> + } >>>>> + data = pci_host_config_read_common( >>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>> + >>>>> + switch (len) { >>>>> + case 1: >>>>> + break; >>>>> + case 2: >>>>> + data = cpu_to_le16(data); >>>>> + break; >>>>> + case 4: >>>>> + data = cpu_to_le32(data); >>>>> + break; >>>>> + case 8: >>>>> + data = cpu_to_le64(data); >>>>> + break; >>>> >>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>> and LE. So if you're running this on an LE host, you won't swap the >>>> value and get a broken result. >>>> >>>> If you know that the value is always swapped, use bswapxx(). >>>> >>> >>> Actually the code is right and required for a big endian host :-) >>> pcilg/pcistg provide access to the PCI config space which is defined >>> as PCI byte order (little endian). Since pci_host_config_read_common does >>> already a le to cpu conversion we have to convert back to PCI byte order. >>> Doing an unconditional swap would be a bug on a little endian host. >> >> Why would it be a bug? The value you end up writing is contents of a >> register and thus doesn't have endianness. So if QEMU was an LE process, > > No, the s390 guest executing pcilg instruction expects to receive config space data > in PCI byte order. > >> the value of data would be identical as on a BE QEMU before your swab. >> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >> > > Again on BE endian host we do the swap because of pci_host_config_read_common does > read the value and do a byte swap for that value, but we need PCI byte order not BE here. > > On LE host pci_host_config_read_common does not do a byte swap so we do not have to > convert back to PCI byte order. We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. Alex
On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: > > > > > Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: > > > >> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: > >> > >> > >>> On 11.11.14 13:10, Frank Blaschka wrote: > >>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > >>>> > >>>> > >>>>> On 10.11.14 15:20, Frank Blaschka wrote: > >>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>> > >>>>> This patch implements the s390 pci instructions in qemu. It allows > >>>>> to access and drive pci devices attached to the s390 pci bus. > >>>>> Because of platform constrains devices using IO BARs are not > >>>>> supported. Also a device has to support MSI/MSI-X to run on s390. > >>>>> > >>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>> --- > >>>>> target-s390x/Makefile.objs | 2 +- > >>>>> target-s390x/kvm.c | 52 ++++ > >>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > >>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > >>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) > >>>>> create mode 100644 target-s390x/pci_ic.c > >>>>> create mode 100644 target-s390x/pci_ic.h > >>>>> > >> > >> [...] > >> > >>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > >>>>> +{ > >>>>> + CPUS390XState *env = &cpu->env; > >>>>> + S390PCIBusDevice *pbdev; > >>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > >>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > >>>>> + PciLgStg *rp; > >>>>> + uint64_t offset; > >>>>> + uint64_t data; > >>>>> + uint8_t len; > >>>>> + > >>>>> + cpu_synchronize_state(CPU(cpu)); > >>>>> + > >>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { > >>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); > >>>>> + return 0; > >>>>> + } > >>>>> + > >>>>> + if (r2 & 0x1) { > >>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); > >>>>> + return 0; > >>>>> + } > >>>>> + > >>>>> + rp = (PciLgStg *)&env->regs[r2]; > >>>>> + offset = env->regs[r2 + 1]; > >>>>> + > >>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); > >>>>> + if (!pbdev) { > >>>>> + DPRINTF("pcilg no pci dev\n"); > >>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > >>>>> + return 0; > >>>>> + } > >>>>> + > >>>>> + len = rp->len & 0xF; > >>>>> + if (rp->pcias < 6) { > >>>>> + if ((8 - (offset & 0x7)) < len) { > >>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>> + return 0; > >>>>> + } > >>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > >>>>> + io_mem_read(mr, offset, &data, len); > >>>>> + } else if (rp->pcias == 15) { > >>>>> + if ((4 - (offset & 0x3)) < len) { > >>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>> + return 0; > >>>>> + } > >>>>> + data = pci_host_config_read_common( > >>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > >>>>> + > >>>>> + switch (len) { > >>>>> + case 1: > >>>>> + break; > >>>>> + case 2: > >>>>> + data = cpu_to_le16(data); > >>>>> + break; > >>>>> + case 4: > >>>>> + data = cpu_to_le32(data); > >>>>> + break; > >>>>> + case 8: > >>>>> + data = cpu_to_le64(data); > >>>>> + break; > >>>> > >>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness > >>>> and LE. So if you're running this on an LE host, you won't swap the > >>>> value and get a broken result. > >>>> > >>>> If you know that the value is always swapped, use bswapxx(). > >>>> > >>> > >>> Actually the code is right and required for a big endian host :-) > >>> pcilg/pcistg provide access to the PCI config space which is defined > >>> as PCI byte order (little endian). Since pci_host_config_read_common does > >>> already a le to cpu conversion we have to convert back to PCI byte order. > >>> Doing an unconditional swap would be a bug on a little endian host. > >> > >> Why would it be a bug? The value you end up writing is contents of a > >> register and thus doesn't have endianness. So if QEMU was an LE process, > > > > No, the s390 guest executing pcilg instruction expects to receive config space data > > in PCI byte order. > > > >> the value of data would be identical as on a BE QEMU before your swab. > >> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. > >> > > > > Again on BE endian host we do the swap because of pci_host_config_read_common does > > read the value and do a byte swap for that value, but we need PCI byte order not BE here. > > > > On LE host pci_host_config_read_common does not do a byte swap so we do not have to > > convert back to PCI byte order. > > We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. > > So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. > so you tell me pci_host_config_read_common does not end up in pci_default_read_config? uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len) { uint32_t val = 0; memcpy(&val, d->config + address, len); return le32_to_cpu(val); } What did I miss? > > Alex > >
On 11.11.14 15:08, Frank Blaschka wrote: > On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: >> >> >> >>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: >>> >>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >>>> >>>> >>>>> On 11.11.14 13:10, Frank Blaschka wrote: >>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>>>> >>>>>> >>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>> >>>>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>>>> to access and drive pci devices attached to the s390 pci bus. >>>>>>> Because of platform constrains devices using IO BARs are not >>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>>>> >>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>> --- >>>>>>> target-s390x/Makefile.objs | 2 +- >>>>>>> target-s390x/kvm.c | 52 ++++ >>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>>>> create mode 100644 target-s390x/pci_ic.c >>>>>>> create mode 100644 target-s390x/pci_ic.h >>>>>>> >>>> >>>> [...] >>>> >>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>>>> +{ >>>>>>> + CPUS390XState *env = &cpu->env; >>>>>>> + S390PCIBusDevice *pbdev; >>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>>>> + PciLgStg *rp; >>>>>>> + uint64_t offset; >>>>>>> + uint64_t data; >>>>>>> + uint8_t len; >>>>>>> + >>>>>>> + cpu_synchronize_state(CPU(cpu)); >>>>>>> + >>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + >>>>>>> + if (r2 & 0x1) { >>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + >>>>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>>>> + offset = env->regs[r2 + 1]; >>>>>>> + >>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>>>> + if (!pbdev) { >>>>>>> + DPRINTF("pcilg no pci dev\n"); >>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + >>>>>>> + len = rp->len & 0xF; >>>>>>> + if (rp->pcias < 6) { >>>>>>> + if ((8 - (offset & 0x7)) < len) { >>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>>>> + io_mem_read(mr, offset, &data, len); >>>>>>> + } else if (rp->pcias == 15) { >>>>>>> + if ((4 - (offset & 0x3)) < len) { >>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>> + return 0; >>>>>>> + } >>>>>>> + data = pci_host_config_read_common( >>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>>>> + >>>>>>> + switch (len) { >>>>>>> + case 1: >>>>>>> + break; >>>>>>> + case 2: >>>>>>> + data = cpu_to_le16(data); >>>>>>> + break; >>>>>>> + case 4: >>>>>>> + data = cpu_to_le32(data); >>>>>>> + break; >>>>>>> + case 8: >>>>>>> + data = cpu_to_le64(data); >>>>>>> + break; >>>>>> >>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>>>> and LE. So if you're running this on an LE host, you won't swap the >>>>>> value and get a broken result. >>>>>> >>>>>> If you know that the value is always swapped, use bswapxx(). >>>>>> >>>>> >>>>> Actually the code is right and required for a big endian host :-) >>>>> pcilg/pcistg provide access to the PCI config space which is defined >>>>> as PCI byte order (little endian). Since pci_host_config_read_common does >>>>> already a le to cpu conversion we have to convert back to PCI byte order. >>>>> Doing an unconditional swap would be a bug on a little endian host. >>>> >>>> Why would it be a bug? The value you end up writing is contents of a >>>> register and thus doesn't have endianness. So if QEMU was an LE process, >>> >>> No, the s390 guest executing pcilg instruction expects to receive config space data >>> in PCI byte order. >>> >>>> the value of data would be identical as on a BE QEMU before your swab. >>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >>>> >>> >>> Again on BE endian host we do the swap because of pci_host_config_read_common does >>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. >>> >>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to >>> convert back to PCI byte order. >> >> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. >> >> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. >> > so you tell me pci_host_config_read_common does not end up in pci_default_read_config? > > uint32_t pci_default_read_config(PCIDevice *d, > uint32_t address, int len) > { > uint32_t val = 0; > > memcpy(&val, d->config + address, len); > return le32_to_cpu(val); > } > > What did I miss? That's exactly where you end up in - and it's there to convert from the PCI config space backing storage to a native number. Imagine you write 0x12345678 at offset 0. Because PCI config space is defined to be LE, in the PCI config space memory this gets stored as 78 56 34 12 The reason we do the internal storage of the config space that way is that it's (in some PCI implementations) legal to access with single byte granularities. So you could do a pci_config_read(offset = 1) which should return 0x56. However, that means we completely nullify any effect of host endianness in the PCI config layer already. So if you do pci_config_write(offset = 0, size = 4, value = 0x12345678), the contents of d->config will always be identical, regardless of host endianness. The same holds true for pci_config_read(offset = 0, size = 4). It will always return 0x12345678. In your code, you swab that value again. I assume there's a reason you're swapping it and that it's the way the architecture expects it (mind to point me to the respective spec so I can verify?). But if the architecture expects it, then it expects it regardless of host endianness. The contents of regs[r1] should always be 0x78563412, no matter whether we're in an LE or a BE environment. Does that make sense now? Alex
On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: > > > On 11.11.14 15:08, Frank Blaschka wrote: > > On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: > >> > >> > >> > >>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: > >>> > >>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: > >>>> > >>>> > >>>>> On 11.11.14 13:10, Frank Blaschka wrote: > >>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > >>>>>> > >>>>>> > >>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: > >>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>>>> > >>>>>>> This patch implements the s390 pci instructions in qemu. It allows > >>>>>>> to access and drive pci devices attached to the s390 pci bus. > >>>>>>> Because of platform constrains devices using IO BARs are not > >>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. > >>>>>>> > >>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>>>> --- > >>>>>>> target-s390x/Makefile.objs | 2 +- > >>>>>>> target-s390x/kvm.c | 52 ++++ > >>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > >>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > >>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) > >>>>>>> create mode 100644 target-s390x/pci_ic.c > >>>>>>> create mode 100644 target-s390x/pci_ic.h > >>>>>>> > >>>> > >>>> [...] > >>>> > >>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > >>>>>>> +{ > >>>>>>> + CPUS390XState *env = &cpu->env; > >>>>>>> + S390PCIBusDevice *pbdev; > >>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > >>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > >>>>>>> + PciLgStg *rp; > >>>>>>> + uint64_t offset; > >>>>>>> + uint64_t data; > >>>>>>> + uint8_t len; > >>>>>>> + > >>>>>>> + cpu_synchronize_state(CPU(cpu)); > >>>>>>> + > >>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { > >>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); > >>>>>>> + return 0; > >>>>>>> + } > >>>>>>> + > >>>>>>> + if (r2 & 0x1) { > >>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); > >>>>>>> + return 0; > >>>>>>> + } > >>>>>>> + > >>>>>>> + rp = (PciLgStg *)&env->regs[r2]; > >>>>>>> + offset = env->regs[r2 + 1]; > >>>>>>> + > >>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); > >>>>>>> + if (!pbdev) { > >>>>>>> + DPRINTF("pcilg no pci dev\n"); > >>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > >>>>>>> + return 0; > >>>>>>> + } > >>>>>>> + > >>>>>>> + len = rp->len & 0xF; > >>>>>>> + if (rp->pcias < 6) { > >>>>>>> + if ((8 - (offset & 0x7)) < len) { > >>>>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>>>> + return 0; > >>>>>>> + } > >>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > >>>>>>> + io_mem_read(mr, offset, &data, len); > >>>>>>> + } else if (rp->pcias == 15) { > >>>>>>> + if ((4 - (offset & 0x3)) < len) { > >>>>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>>>> + return 0; > >>>>>>> + } > >>>>>>> + data = pci_host_config_read_common( > >>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > >>>>>>> + > >>>>>>> + switch (len) { > >>>>>>> + case 1: > >>>>>>> + break; > >>>>>>> + case 2: > >>>>>>> + data = cpu_to_le16(data); > >>>>>>> + break; > >>>>>>> + case 4: > >>>>>>> + data = cpu_to_le32(data); > >>>>>>> + break; > >>>>>>> + case 8: > >>>>>>> + data = cpu_to_le64(data); > >>>>>>> + break; > >>>>>> > >>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness > >>>>>> and LE. So if you're running this on an LE host, you won't swap the > >>>>>> value and get a broken result. > >>>>>> > >>>>>> If you know that the value is always swapped, use bswapxx(). > >>>>>> > >>>>> > >>>>> Actually the code is right and required for a big endian host :-) > >>>>> pcilg/pcistg provide access to the PCI config space which is defined > >>>>> as PCI byte order (little endian). Since pci_host_config_read_common does > >>>>> already a le to cpu conversion we have to convert back to PCI byte order. > >>>>> Doing an unconditional swap would be a bug on a little endian host. > >>>> > >>>> Why would it be a bug? The value you end up writing is contents of a > >>>> register and thus doesn't have endianness. So if QEMU was an LE process, > >>> > >>> No, the s390 guest executing pcilg instruction expects to receive config space data > >>> in PCI byte order. > >>> > >>>> the value of data would be identical as on a BE QEMU before your swab. > >>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. > >>>> > >>> > >>> Again on BE endian host we do the swap because of pci_host_config_read_common does > >>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. > >>> > >>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to > >>> convert back to PCI byte order. > >> > >> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. > >> > >> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. > >> > > so you tell me pci_host_config_read_common does not end up in pci_default_read_config? > > > > uint32_t pci_default_read_config(PCIDevice *d, > > uint32_t address, int len) > > { > > uint32_t val = 0; > > > > memcpy(&val, d->config + address, len); > > return le32_to_cpu(val); > > } > > > > What did I miss? > > That's exactly where you end up in - and it's there to convert from the > PCI config space backing storage to a native number. > > Imagine you write 0x12345678 at offset 0. Because PCI config space is > defined to be LE, in the PCI config space memory this gets stored as > > 78 56 34 12 > > The reason we do the internal storage of the config space that way is > that it's (in some PCI implementations) legal to access with single byte > granularities. So you could do a pci_config_read(offset = 1) which > should return 0x56. > > However, that means we completely nullify any effect of host endianness > in the PCI config layer already. So if you do pci_config_write(offset = > 0, size = 4, value = 0x12345678), the contents of d->config will always > be identical, regardless of host endianness. The same holds true for > pci_config_read(offset = 0, size = 4). It will always return 0x12345678. > I understood this from the beginning and I completely agree to this. > In your code, you swab that value again. I assume there's a reason > you're swapping it and that it's the way the architecture expects it Yes, s390 pcilg architecture states: Data in the PCI configuration space are treated as being in little-endian byte ordering > (mind to point me to the respective spec so I can verify?). But if the > architecture expects it, then it expects it regardless of host > endianness. The contents of regs[r1] should always be 0x78563412, no > matter whether we're in an LE or a BE environment. > > Does that make sense now? > Absolutely lets make an example for qemu running on BE and LE byte order config space backing pci_default_read_config pcilg (with cpu_to_le) BE 0x78563412 0x12345678 0x78563412 LE 0x78563412 0x78563412 0x78563412 So what is the problem with my code? Adding unconditional byte swap instead of cpu_to_le in pcilg would break architecture for pcilg if qemu is running on LE platform. > > Alex >
On 12.11.14 09:49, Frank Blaschka wrote: > On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: >> >> >> On 11.11.14 15:08, Frank Blaschka wrote: >>> On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: >>>> >>>> >>>> >>>>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: >>>>> >>>>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >>>>>> >>>>>> >>>>>>> On 11.11.14 13:10, Frank Blaschka wrote: >>>>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>>>>>> >>>>>>>> >>>>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>> >>>>>>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>>>>>> to access and drive pci devices attached to the s390 pci bus. >>>>>>>>> Because of platform constrains devices using IO BARs are not >>>>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>>>>>> >>>>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>> --- >>>>>>>>> target-s390x/Makefile.objs | 2 +- >>>>>>>>> target-s390x/kvm.c | 52 ++++ >>>>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>>>>>> create mode 100644 target-s390x/pci_ic.c >>>>>>>>> create mode 100644 target-s390x/pci_ic.h >>>>>>>>> >>>>>> >>>>>> [...] >>>>>> >>>>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>>>>>> +{ >>>>>>>>> + CPUS390XState *env = &cpu->env; >>>>>>>>> + S390PCIBusDevice *pbdev; >>>>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>>>>>> + PciLgStg *rp; >>>>>>>>> + uint64_t offset; >>>>>>>>> + uint64_t data; >>>>>>>>> + uint8_t len; >>>>>>>>> + >>>>>>>>> + cpu_synchronize_state(CPU(cpu)); >>>>>>>>> + >>>>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>>>>>> + return 0; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (r2 & 0x1) { >>>>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>>>>>> + return 0; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>>>>>> + offset = env->regs[r2 + 1]; >>>>>>>>> + >>>>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>>>>>> + if (!pbdev) { >>>>>>>>> + DPRINTF("pcilg no pci dev\n"); >>>>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>>>>>> + return 0; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + len = rp->len & 0xF; >>>>>>>>> + if (rp->pcias < 6) { >>>>>>>>> + if ((8 - (offset & 0x7)) < len) { >>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>> + return 0; >>>>>>>>> + } >>>>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>>>>>> + io_mem_read(mr, offset, &data, len); >>>>>>>>> + } else if (rp->pcias == 15) { >>>>>>>>> + if ((4 - (offset & 0x3)) < len) { >>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>> + return 0; >>>>>>>>> + } >>>>>>>>> + data = pci_host_config_read_common( >>>>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>>>>>> + >>>>>>>>> + switch (len) { >>>>>>>>> + case 1: >>>>>>>>> + break; >>>>>>>>> + case 2: >>>>>>>>> + data = cpu_to_le16(data); >>>>>>>>> + break; >>>>>>>>> + case 4: >>>>>>>>> + data = cpu_to_le32(data); >>>>>>>>> + break; >>>>>>>>> + case 8: >>>>>>>>> + data = cpu_to_le64(data); >>>>>>>>> + break; >>>>>>>> >>>>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>>>>>> and LE. So if you're running this on an LE host, you won't swap the >>>>>>>> value and get a broken result. >>>>>>>> >>>>>>>> If you know that the value is always swapped, use bswapxx(). >>>>>>>> >>>>>>> >>>>>>> Actually the code is right and required for a big endian host :-) >>>>>>> pcilg/pcistg provide access to the PCI config space which is defined >>>>>>> as PCI byte order (little endian). Since pci_host_config_read_common does >>>>>>> already a le to cpu conversion we have to convert back to PCI byte order. >>>>>>> Doing an unconditional swap would be a bug on a little endian host. >>>>>> >>>>>> Why would it be a bug? The value you end up writing is contents of a >>>>>> register and thus doesn't have endianness. So if QEMU was an LE process, >>>>> >>>>> No, the s390 guest executing pcilg instruction expects to receive config space data >>>>> in PCI byte order. >>>>> >>>>>> the value of data would be identical as on a BE QEMU before your swab. >>>>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >>>>>> >>>>> >>>>> Again on BE endian host we do the swap because of pci_host_config_read_common does >>>>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. >>>>> >>>>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to >>>>> convert back to PCI byte order. >>>> >>>> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. >>>> >>>> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. >>>> >>> so you tell me pci_host_config_read_common does not end up in pci_default_read_config? >>> >>> uint32_t pci_default_read_config(PCIDevice *d, >>> uint32_t address, int len) >>> { >>> uint32_t val = 0; >>> >>> memcpy(&val, d->config + address, len); >>> return le32_to_cpu(val); >>> } >>> >>> What did I miss? >> >> That's exactly where you end up in - and it's there to convert from the >> PCI config space backing storage to a native number. >> >> Imagine you write 0x12345678 at offset 0. Because PCI config space is >> defined to be LE, in the PCI config space memory this gets stored as >> >> 78 56 34 12 >> >> The reason we do the internal storage of the config space that way is >> that it's (in some PCI implementations) legal to access with single byte >> granularities. So you could do a pci_config_read(offset = 1) which >> should return 0x56. >> >> However, that means we completely nullify any effect of host endianness >> in the PCI config layer already. So if you do pci_config_write(offset = >> 0, size = 4, value = 0x12345678), the contents of d->config will always >> be identical, regardless of host endianness. The same holds true for >> pci_config_read(offset = 0, size = 4). It will always return 0x12345678. >> > > I understood this from the beginning and I completely agree to this. > >> In your code, you swab that value again. I assume there's a reason >> you're swapping it and that it's the way the architecture expects it > > Yes, s390 pcilg architecture states: > Data in the PCI configuration space are treated > as being in little-endian byte ordering > >> (mind to point me to the respective spec so I can verify?). But if the >> architecture expects it, then it expects it regardless of host >> endianness. The contents of regs[r1] should always be 0x78563412, no >> matter whether we're in an LE or a BE environment. >> >> Does that make sense now? >> > Absolutely lets make an example for qemu running on BE and LE > > byte order config space backing pci_default_read_config pcilg (with cpu_to_le) > BE 0x78563412 0x12345678 0x78563412 > LE 0x78563412 0x78563412 0x78563412 No, pci_default_read_config() always returns 0x12345678 because it returns a register, not memory. Alex > > So what is the problem with my code? Adding unconditional byte swap instead of > cpu_to_le in pcilg would break architecture for pcilg if qemu is running on LE > platform. > >> >> Alex >> >
On 12/11/2014 10:08, Alexander Graf wrote: > > > On 12.11.14 09:49, Frank Blaschka wrote: >> On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: >>> >>> >>> On 11.11.14 15:08, Frank Blaschka wrote: >>>> On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: >>>>> >>>>> >>>>> >>>>>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: >>>>>> >>>>>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >>>>>>> >>>>>>> >>>>>>>> On 11.11.14 13:10, Frank Blaschka wrote: >>>>>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>>>>>>> >>>>>>>>> >>>>>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>> >>>>>>>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>>>>>>> to access and drive pci devices attached to the s390 pci bus. >>>>>>>>>> Because of platform constrains devices using IO BARs are not >>>>>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>>>>>>> >>>>>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>> --- >>>>>>>>>> target-s390x/Makefile.objs | 2 +- >>>>>>>>>> target-s390x/kvm.c | 52 ++++ >>>>>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>>>>>>> create mode 100644 target-s390x/pci_ic.c >>>>>>>>>> create mode 100644 target-s390x/pci_ic.h >>>>>>>>>> >>>>>>> >>>>>>> [...] >>>>>>> >>>>>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>>>>>>> +{ >>>>>>>>>> + CPUS390XState *env = &cpu->env; >>>>>>>>>> + S390PCIBusDevice *pbdev; >>>>>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>>>>>>> + PciLgStg *rp; >>>>>>>>>> + uint64_t offset; >>>>>>>>>> + uint64_t data; >>>>>>>>>> + uint8_t len; >>>>>>>>>> + >>>>>>>>>> + cpu_synchronize_state(CPU(cpu)); >>>>>>>>>> + >>>>>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>>>>>>> + return 0; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + if (r2 & 0x1) { >>>>>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>>>>>>> + return 0; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>>>>>>> + offset = env->regs[r2 + 1]; >>>>>>>>>> + >>>>>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>>>>>>> + if (!pbdev) { >>>>>>>>>> + DPRINTF("pcilg no pci dev\n"); >>>>>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>>>>>>> + return 0; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + len = rp->len & 0xF; >>>>>>>>>> + if (rp->pcias < 6) { >>>>>>>>>> + if ((8 - (offset & 0x7)) < len) { >>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>> + return 0; >>>>>>>>>> + } >>>>>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>>>>>>> + io_mem_read(mr, offset, &data, len); >>>>>>>>>> + } else if (rp->pcias == 15) { >>>>>>>>>> + if ((4 - (offset & 0x3)) < len) { >>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>> + return 0; >>>>>>>>>> + } >>>>>>>>>> + data = pci_host_config_read_common( >>>>>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>>>>>>> + >>>>>>>>>> + switch (len) { >>>>>>>>>> + case 1: >>>>>>>>>> + break; >>>>>>>>>> + case 2: >>>>>>>>>> + data = cpu_to_le16(data); >>>>>>>>>> + break; >>>>>>>>>> + case 4: >>>>>>>>>> + data = cpu_to_le32(data); >>>>>>>>>> + break; >>>>>>>>>> + case 8: >>>>>>>>>> + data = cpu_to_le64(data); >>>>>>>>>> + break; >>>>>>>>> >>>>>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>>>>>>> and LE. So if you're running this on an LE host, you won't swap the >>>>>>>>> value and get a broken result. >>>>>>>>> >>>>>>>>> If you know that the value is always swapped, use bswapxx(). >>>>>>>>> >>>>>>>> >>>>>>>> Actually the code is right and required for a big endian host :-) >>>>>>>> pcilg/pcistg provide access to the PCI config space which is defined >>>>>>>> as PCI byte order (little endian). Since pci_host_config_read_common does >>>>>>>> already a le to cpu conversion we have to convert back to PCI byte order. >>>>>>>> Doing an unconditional swap would be a bug on a little endian host. >>>>>>> >>>>>>> Why would it be a bug? The value you end up writing is contents of a >>>>>>> register and thus doesn't have endianness. So if QEMU was an LE process, >>>>>> >>>>>> No, the s390 guest executing pcilg instruction expects to receive config space data >>>>>> in PCI byte order. >>>>>> >>>>>>> the value of data would be identical as on a BE QEMU before your swab. >>>>>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >>>>>>> >>>>>> >>>>>> Again on BE endian host we do the swap because of pci_host_config_read_common does >>>>>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. >>>>>> >>>>>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to >>>>>> convert back to PCI byte order. >>>>> >>>>> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. >>>>> >>>>> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. >>>>> >>>> so you tell me pci_host_config_read_common does not end up in pci_default_read_config? >>>> >>>> uint32_t pci_default_read_config(PCIDevice *d, >>>> uint32_t address, int len) >>>> { >>>> uint32_t val = 0; >>>> >>>> memcpy(&val, d->config + address, len); >>>> return le32_to_cpu(val); >>>> } >>>> >>>> What did I miss? >>> >>> That's exactly where you end up in - and it's there to convert from the >>> PCI config space backing storage to a native number. >>> >>> Imagine you write 0x12345678 at offset 0. Because PCI config space is >>> defined to be LE, in the PCI config space memory this gets stored as >>> >>> 78 56 34 12 >>> >>> The reason we do the internal storage of the config space that way is >>> that it's (in some PCI implementations) legal to access with single byte >>> granularities. So you could do a pci_config_read(offset = 1) which >>> should return 0x56. >>> >>> However, that means we completely nullify any effect of host endianness >>> in the PCI config layer already. So if you do pci_config_write(offset = >>> 0, size = 4, value = 0x12345678), the contents of d->config will always >>> be identical, regardless of host endianness. The same holds true for >>> pci_config_read(offset = 0, size = 4). It will always return 0x12345678. >>> >> >> I understood this from the beginning and I completely agree to this. >> >>> In your code, you swab that value again. I assume there's a reason >>> you're swapping it and that it's the way the architecture expects it >> >> Yes, s390 pcilg architecture states: >> Data in the PCI configuration space are treated >> as being in little-endian byte ordering >> >>> (mind to point me to the respective spec so I can verify?). But if the >>> architecture expects it, then it expects it regardless of host >>> endianness. The contents of regs[r1] should always be 0x78563412, no >>> matter whether we're in an LE or a BE environment. >>> >>> Does that make sense now? >>> >> Absolutely lets make an example for qemu running on BE and LE >> >> byte order config space backing pci_default_read_config pcilg (with cpu_to_le) >> BE 0x78563412 0x12345678 0x78563412 >> LE 0x78563412 0x78563412 0x78563412 > > No, pci_default_read_config() always returns 0x12345678 because it > returns a register, not memory. So: config space pci_default_read_config pcilg (bytes) memcpy cpu_to_le (with cpu_to_le) BE 78 56 34 12 0x78563412 0x12345678 0x78563412 LE 78 56 34 12 0x12345678 0x12345678 0x12345678 Right? Paolo
On 12.11.14 10:11, Paolo Bonzini wrote: > > > On 12/11/2014 10:08, Alexander Graf wrote: >> >> >> On 12.11.14 09:49, Frank Blaschka wrote: >>> On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: >>>> >>>> >>>> On 11.11.14 15:08, Frank Blaschka wrote: >>>>> On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: >>>>>> >>>>>> >>>>>> >>>>>>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: >>>>>>> >>>>>>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >>>>>>>> >>>>>>>> >>>>>>>>> On 11.11.14 13:10, Frank Blaschka wrote: >>>>>>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>>>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>>> >>>>>>>>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>>>>>>>> to access and drive pci devices attached to the s390 pci bus. >>>>>>>>>>> Because of platform constrains devices using IO BARs are not >>>>>>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>>>>>>>> >>>>>>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>>> --- >>>>>>>>>>> target-s390x/Makefile.objs | 2 +- >>>>>>>>>>> target-s390x/kvm.c | 52 ++++ >>>>>>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>>>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>>>>>>>> create mode 100644 target-s390x/pci_ic.c >>>>>>>>>>> create mode 100644 target-s390x/pci_ic.h >>>>>>>>>>> >>>>>>>> >>>>>>>> [...] >>>>>>>> >>>>>>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>>>>>>>> +{ >>>>>>>>>>> + CPUS390XState *env = &cpu->env; >>>>>>>>>>> + S390PCIBusDevice *pbdev; >>>>>>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>>>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>>>>>>>> + PciLgStg *rp; >>>>>>>>>>> + uint64_t offset; >>>>>>>>>>> + uint64_t data; >>>>>>>>>>> + uint8_t len; >>>>>>>>>>> + >>>>>>>>>>> + cpu_synchronize_state(CPU(cpu)); >>>>>>>>>>> + >>>>>>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>>>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + if (r2 & 0x1) { >>>>>>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>>>>>>>> + offset = env->regs[r2 + 1]; >>>>>>>>>>> + >>>>>>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>>>>>>>> + if (!pbdev) { >>>>>>>>>>> + DPRINTF("pcilg no pci dev\n"); >>>>>>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + len = rp->len & 0xF; >>>>>>>>>>> + if (rp->pcias < 6) { >>>>>>>>>>> + if ((8 - (offset & 0x7)) < len) { >>>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>>>>>>>> + io_mem_read(mr, offset, &data, len); >>>>>>>>>>> + } else if (rp->pcias == 15) { >>>>>>>>>>> + if ((4 - (offset & 0x3)) < len) { >>>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + data = pci_host_config_read_common( >>>>>>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>>>>>>>> + >>>>>>>>>>> + switch (len) { >>>>>>>>>>> + case 1: >>>>>>>>>>> + break; >>>>>>>>>>> + case 2: >>>>>>>>>>> + data = cpu_to_le16(data); >>>>>>>>>>> + break; >>>>>>>>>>> + case 4: >>>>>>>>>>> + data = cpu_to_le32(data); >>>>>>>>>>> + break; >>>>>>>>>>> + case 8: >>>>>>>>>>> + data = cpu_to_le64(data); >>>>>>>>>>> + break; >>>>>>>>>> >>>>>>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>>>>>>>> and LE. So if you're running this on an LE host, you won't swap the >>>>>>>>>> value and get a broken result. >>>>>>>>>> >>>>>>>>>> If you know that the value is always swapped, use bswapxx(). >>>>>>>>>> >>>>>>>>> >>>>>>>>> Actually the code is right and required for a big endian host :-) >>>>>>>>> pcilg/pcistg provide access to the PCI config space which is defined >>>>>>>>> as PCI byte order (little endian). Since pci_host_config_read_common does >>>>>>>>> already a le to cpu conversion we have to convert back to PCI byte order. >>>>>>>>> Doing an unconditional swap would be a bug on a little endian host. >>>>>>>> >>>>>>>> Why would it be a bug? The value you end up writing is contents of a >>>>>>>> register and thus doesn't have endianness. So if QEMU was an LE process, >>>>>>> >>>>>>> No, the s390 guest executing pcilg instruction expects to receive config space data >>>>>>> in PCI byte order. >>>>>>> >>>>>>>> the value of data would be identical as on a BE QEMU before your swab. >>>>>>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >>>>>>>> >>>>>>> >>>>>>> Again on BE endian host we do the swap because of pci_host_config_read_common does >>>>>>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. >>>>>>> >>>>>>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to >>>>>>> convert back to PCI byte order. >>>>>> >>>>>> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. >>>>>> >>>>>> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. >>>>>> >>>>> so you tell me pci_host_config_read_common does not end up in pci_default_read_config? >>>>> >>>>> uint32_t pci_default_read_config(PCIDevice *d, >>>>> uint32_t address, int len) >>>>> { >>>>> uint32_t val = 0; >>>>> >>>>> memcpy(&val, d->config + address, len); >>>>> return le32_to_cpu(val); >>>>> } >>>>> >>>>> What did I miss? >>>> >>>> That's exactly where you end up in - and it's there to convert from the >>>> PCI config space backing storage to a native number. >>>> >>>> Imagine you write 0x12345678 at offset 0. Because PCI config space is >>>> defined to be LE, in the PCI config space memory this gets stored as >>>> >>>> 78 56 34 12 >>>> >>>> The reason we do the internal storage of the config space that way is >>>> that it's (in some PCI implementations) legal to access with single byte >>>> granularities. So you could do a pci_config_read(offset = 1) which >>>> should return 0x56. >>>> >>>> However, that means we completely nullify any effect of host endianness >>>> in the PCI config layer already. So if you do pci_config_write(offset = >>>> 0, size = 4, value = 0x12345678), the contents of d->config will always >>>> be identical, regardless of host endianness. The same holds true for >>>> pci_config_read(offset = 0, size = 4). It will always return 0x12345678. >>>> >>> >>> I understood this from the beginning and I completely agree to this. >>> >>>> In your code, you swab that value again. I assume there's a reason >>>> you're swapping it and that it's the way the architecture expects it >>> >>> Yes, s390 pcilg architecture states: >>> Data in the PCI configuration space are treated >>> as being in little-endian byte ordering >>> >>>> (mind to point me to the respective spec so I can verify?). But if the >>>> architecture expects it, then it expects it regardless of host >>>> endianness. The contents of regs[r1] should always be 0x78563412, no >>>> matter whether we're in an LE or a BE environment. >>>> >>>> Does that make sense now? >>>> >>> Absolutely lets make an example for qemu running on BE and LE >>> >>> byte order config space backing pci_default_read_config pcilg (with cpu_to_le) >>> BE 0x78563412 0x12345678 0x78563412 >>> LE 0x78563412 0x78563412 0x78563412 >> >> No, pci_default_read_config() always returns 0x12345678 because it >> returns a register, not memory. > > So: > > config space pci_default_read_config pcilg > (bytes) memcpy cpu_to_le (with cpu_to_le) > BE 78 56 34 12 0x78563412 0x12345678 0x78563412 > LE 78 56 34 12 0x12345678 0x12345678 0x12345678 > > Right? Yes, exactly :). Alex
On Wed, Nov 12, 2014 at 10:08:19AM +0100, Alexander Graf wrote: > > > On 12.11.14 09:49, Frank Blaschka wrote: > > On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: > >> > >> > >> On 11.11.14 15:08, Frank Blaschka wrote: > >>> On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: > >>>> > >>>> > >>>> > >>>>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: > >>>>> > >>>>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: > >>>>>> > >>>>>> > >>>>>>> On 11.11.14 13:10, Frank Blaschka wrote: > >>>>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: > >>>>>>>> > >>>>>>>> > >>>>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: > >>>>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>>>>>> > >>>>>>>>> This patch implements the s390 pci instructions in qemu. It allows > >>>>>>>>> to access and drive pci devices attached to the s390 pci bus. > >>>>>>>>> Because of platform constrains devices using IO BARs are not > >>>>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. > >>>>>>>>> > >>>>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> > >>>>>>>>> --- > >>>>>>>>> target-s390x/Makefile.objs | 2 +- > >>>>>>>>> target-s390x/kvm.c | 52 ++++ > >>>>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ > >>>>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ > >>>>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) > >>>>>>>>> create mode 100644 target-s390x/pci_ic.c > >>>>>>>>> create mode 100644 target-s390x/pci_ic.h > >>>>>>>>> > >>>>>> > >>>>>> [...] > >>>>>> > >>>>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > >>>>>>>>> +{ > >>>>>>>>> + CPUS390XState *env = &cpu->env; > >>>>>>>>> + S390PCIBusDevice *pbdev; > >>>>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > >>>>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > >>>>>>>>> + PciLgStg *rp; > >>>>>>>>> + uint64_t offset; > >>>>>>>>> + uint64_t data; > >>>>>>>>> + uint8_t len; > >>>>>>>>> + > >>>>>>>>> + cpu_synchronize_state(CPU(cpu)); > >>>>>>>>> + > >>>>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { > >>>>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); > >>>>>>>>> + return 0; > >>>>>>>>> + } > >>>>>>>>> + > >>>>>>>>> + if (r2 & 0x1) { > >>>>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); > >>>>>>>>> + return 0; > >>>>>>>>> + } > >>>>>>>>> + > >>>>>>>>> + rp = (PciLgStg *)&env->regs[r2]; > >>>>>>>>> + offset = env->regs[r2 + 1]; > >>>>>>>>> + > >>>>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); > >>>>>>>>> + if (!pbdev) { > >>>>>>>>> + DPRINTF("pcilg no pci dev\n"); > >>>>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); > >>>>>>>>> + return 0; > >>>>>>>>> + } > >>>>>>>>> + > >>>>>>>>> + len = rp->len & 0xF; > >>>>>>>>> + if (rp->pcias < 6) { > >>>>>>>>> + if ((8 - (offset & 0x7)) < len) { > >>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>>>>>> + return 0; > >>>>>>>>> + } > >>>>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > >>>>>>>>> + io_mem_read(mr, offset, &data, len); > >>>>>>>>> + } else if (rp->pcias == 15) { > >>>>>>>>> + if ((4 - (offset & 0x3)) < len) { > >>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); > >>>>>>>>> + return 0; > >>>>>>>>> + } > >>>>>>>>> + data = pci_host_config_read_common( > >>>>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); > >>>>>>>>> + > >>>>>>>>> + switch (len) { > >>>>>>>>> + case 1: > >>>>>>>>> + break; > >>>>>>>>> + case 2: > >>>>>>>>> + data = cpu_to_le16(data); > >>>>>>>>> + break; > >>>>>>>>> + case 4: > >>>>>>>>> + data = cpu_to_le32(data); > >>>>>>>>> + break; > >>>>>>>>> + case 8: > >>>>>>>>> + data = cpu_to_le64(data); > >>>>>>>>> + break; > >>>>>>>> > >>>>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness > >>>>>>>> and LE. So if you're running this on an LE host, you won't swap the > >>>>>>>> value and get a broken result. > >>>>>>>> > >>>>>>>> If you know that the value is always swapped, use bswapxx(). > >>>>>>>> > >>>>>>> > >>>>>>> Actually the code is right and required for a big endian host :-) > >>>>>>> pcilg/pcistg provide access to the PCI config space which is defined > >>>>>>> as PCI byte order (little endian). Since pci_host_config_read_common does > >>>>>>> already a le to cpu conversion we have to convert back to PCI byte order. > >>>>>>> Doing an unconditional swap would be a bug on a little endian host. > >>>>>> > >>>>>> Why would it be a bug? The value you end up writing is contents of a > >>>>>> register and thus doesn't have endianness. So if QEMU was an LE process, > >>>>> > >>>>> No, the s390 guest executing pcilg instruction expects to receive config space data > >>>>> in PCI byte order. > >>>>> > >>>>>> the value of data would be identical as on a BE QEMU before your swab. > >>>>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. > >>>>>> > >>>>> > >>>>> Again on BE endian host we do the swap because of pci_host_config_read_common does > >>>>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. > >>>>> > >>>>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to > >>>>> convert back to PCI byte order. > >>>> > >>>> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. > >>>> > >>>> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. > >>>> > >>> so you tell me pci_host_config_read_common does not end up in pci_default_read_config? > >>> > >>> uint32_t pci_default_read_config(PCIDevice *d, > >>> uint32_t address, int len) > >>> { > >>> uint32_t val = 0; > >>> > >>> memcpy(&val, d->config + address, len); > >>> return le32_to_cpu(val); > >>> } > >>> > >>> What did I miss? > >> > >> That's exactly where you end up in - and it's there to convert from the > >> PCI config space backing storage to a native number. > >> > >> Imagine you write 0x12345678 at offset 0. Because PCI config space is > >> defined to be LE, in the PCI config space memory this gets stored as > >> > >> 78 56 34 12 > >> > >> The reason we do the internal storage of the config space that way is > >> that it's (in some PCI implementations) legal to access with single byte > >> granularities. So you could do a pci_config_read(offset = 1) which > >> should return 0x56. > >> > >> However, that means we completely nullify any effect of host endianness > >> in the PCI config layer already. So if you do pci_config_write(offset = > >> 0, size = 4, value = 0x12345678), the contents of d->config will always > >> be identical, regardless of host endianness. The same holds true for > >> pci_config_read(offset = 0, size = 4). It will always return 0x12345678. > >> > > > > I understood this from the beginning and I completely agree to this. > > > >> In your code, you swab that value again. I assume there's a reason > >> you're swapping it and that it's the way the architecture expects it > > > > Yes, s390 pcilg architecture states: > > Data in the PCI configuration space are treated > > as being in little-endian byte ordering > > > >> (mind to point me to the respective spec so I can verify?). But if the > >> architecture expects it, then it expects it regardless of host > >> endianness. The contents of regs[r1] should always be 0x78563412, no > >> matter whether we're in an LE or a BE environment. > >> > >> Does that make sense now? > >> > > Absolutely lets make an example for qemu running on BE and LE > > > > byte order config space backing pci_default_read_config pcilg (with cpu_to_le) > > BE 0x78563412 0x12345678 0x78563412 > > LE 0x78563412 0x78563412 0x78563412 > > No, pci_default_read_config() always returns 0x12345678 because it > returns a register, not memory. > You mean implementation of pci_default_read_config is broken? If it should return a register it should not do "return le32_to_cpu(val);" > > Alex > > > > > So what is the problem with my code? Adding unconditional byte swap instead of > > cpu_to_le in pcilg would break architecture for pcilg if qemu is running on LE > > platform. > > > >> > >> Alex > >> > > >
On 12.11.14 10:19, Frank Blaschka wrote: > On Wed, Nov 12, 2014 at 10:08:19AM +0100, Alexander Graf wrote: >> >> >> On 12.11.14 09:49, Frank Blaschka wrote: >>> On Tue, Nov 11, 2014 at 04:24:24PM +0100, Alexander Graf wrote: >>>> >>>> >>>> On 11.11.14 15:08, Frank Blaschka wrote: >>>>> On Tue, Nov 11, 2014 at 01:51:25PM +0100, Alexander Graf wrote: >>>>>> >>>>>> >>>>>> >>>>>>> Am 11.11.2014 um 13:39 schrieb Frank Blaschka <blaschka@linux.vnet.ibm.com>: >>>>>>> >>>>>>>> On Tue, Nov 11, 2014 at 01:16:04PM +0100, Alexander Graf wrote: >>>>>>>> >>>>>>>> >>>>>>>>> On 11.11.14 13:10, Frank Blaschka wrote: >>>>>>>>>> On Mon, Nov 10, 2014 at 04:56:21PM +0100, Alexander Graf wrote: >>>>>>>>>> >>>>>>>>>> >>>>>>>>>>> On 10.11.14 15:20, Frank Blaschka wrote: >>>>>>>>>>> From: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>>> >>>>>>>>>>> This patch implements the s390 pci instructions in qemu. It allows >>>>>>>>>>> to access and drive pci devices attached to the s390 pci bus. >>>>>>>>>>> Because of platform constrains devices using IO BARs are not >>>>>>>>>>> supported. Also a device has to support MSI/MSI-X to run on s390. >>>>>>>>>>> >>>>>>>>>>> Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com> >>>>>>>>>>> --- >>>>>>>>>>> target-s390x/Makefile.objs | 2 +- >>>>>>>>>>> target-s390x/kvm.c | 52 ++++ >>>>>>>>>>> target-s390x/pci_ic.c | 753 +++++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>>>> target-s390x/pci_ic.h | 335 ++++++++++++++++++++ >>>>>>>>>>> 4 files changed, 1141 insertions(+), 1 deletion(-) >>>>>>>>>>> create mode 100644 target-s390x/pci_ic.c >>>>>>>>>>> create mode 100644 target-s390x/pci_ic.h >>>>>>>>>>> >>>>>>>> >>>>>>>> [...] >>>>>>>> >>>>>>>>>>> +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) >>>>>>>>>>> +{ >>>>>>>>>>> + CPUS390XState *env = &cpu->env; >>>>>>>>>>> + S390PCIBusDevice *pbdev; >>>>>>>>>>> + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; >>>>>>>>>>> + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; >>>>>>>>>>> + PciLgStg *rp; >>>>>>>>>>> + uint64_t offset; >>>>>>>>>>> + uint64_t data; >>>>>>>>>>> + uint8_t len; >>>>>>>>>>> + >>>>>>>>>>> + cpu_synchronize_state(CPU(cpu)); >>>>>>>>>>> + >>>>>>>>>>> + if (env->psw.mask & PSW_MASK_PSTATE) { >>>>>>>>>>> + program_interrupt(env, PGM_PRIVILEGED, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + if (r2 & 0x1) { >>>>>>>>>>> + program_interrupt(env, PGM_SPECIFICATION, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + rp = (PciLgStg *)&env->regs[r2]; >>>>>>>>>>> + offset = env->regs[r2 + 1]; >>>>>>>>>>> + >>>>>>>>>>> + pbdev = s390_pci_find_dev_by_fh(rp->fh); >>>>>>>>>>> + if (!pbdev) { >>>>>>>>>>> + DPRINTF("pcilg no pci dev\n"); >>>>>>>>>>> + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + len = rp->len & 0xF; >>>>>>>>>>> + if (rp->pcias < 6) { >>>>>>>>>>> + if ((8 - (offset & 0x7)) < len) { >>>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; >>>>>>>>>>> + io_mem_read(mr, offset, &data, len); >>>>>>>>>>> + } else if (rp->pcias == 15) { >>>>>>>>>>> + if ((4 - (offset & 0x3)) < len) { >>>>>>>>>>> + program_interrupt(env, PGM_OPERAND, 4); >>>>>>>>>>> + return 0; >>>>>>>>>>> + } >>>>>>>>>>> + data = pci_host_config_read_common( >>>>>>>>>>> + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); >>>>>>>>>>> + >>>>>>>>>>> + switch (len) { >>>>>>>>>>> + case 1: >>>>>>>>>>> + break; >>>>>>>>>>> + case 2: >>>>>>>>>>> + data = cpu_to_le16(data); >>>>>>>>>>> + break; >>>>>>>>>>> + case 4: >>>>>>>>>>> + data = cpu_to_le32(data); >>>>>>>>>>> + break; >>>>>>>>>>> + case 8: >>>>>>>>>>> + data = cpu_to_le64(data); >>>>>>>>>>> + break; >>>>>>>>>> >>>>>>>>>> Why? Also, this is wrong. cpu_to_le64 convert between host endianness >>>>>>>>>> and LE. So if you're running this on an LE host, you won't swap the >>>>>>>>>> value and get a broken result. >>>>>>>>>> >>>>>>>>>> If you know that the value is always swapped, use bswapxx(). >>>>>>>>>> >>>>>>>>> >>>>>>>>> Actually the code is right and required for a big endian host :-) >>>>>>>>> pcilg/pcistg provide access to the PCI config space which is defined >>>>>>>>> as PCI byte order (little endian). Since pci_host_config_read_common does >>>>>>>>> already a le to cpu conversion we have to convert back to PCI byte order. >>>>>>>>> Doing an unconditional swap would be a bug on a little endian host. >>>>>>>> >>>>>>>> Why would it be a bug? The value you end up writing is contents of a >>>>>>>> register and thus doesn't have endianness. So if QEMU was an LE process, >>>>>>> >>>>>>> No, the s390 guest executing pcilg instruction expects to receive config space data >>>>>>> in PCI byte order. >>>>>>> >>>>>>>> the value of data would be identical as on a BE QEMU before your swab. >>>>>>>> After the swab, it would be bswap'ed on BE, but not LE. So LE hosts break. >>>>>>>> >>>>>>> >>>>>>> Again on BE endian host we do the swap because of pci_host_config_read_common does >>>>>>> read the value and do a byte swap for that value, but we need PCI byte order not BE here. >>>>>>> >>>>>>> On LE host pci_host_config_read_common does not do a byte swap so we do not have to >>>>>>> convert back to PCI byte order. >>>>>> >>>>>> We maintain the PCI config space always in LE byte order in memory, that's why there is a bwap in its read function. The return result of the read function however is always the same, regardless of LE or BE host. If I do a read of size 4, I will always get 0x1, not 0x01000000 returned. >>>>>> >>>>>> So now you need to convert that 0x1 into a 0x01000000 manually here because some architect thought that registers have endianness (which they don't). But you need to do it always, even on an LE host, because the pci config space return value is identical on LE and BE. >>>>>> >>>>> so you tell me pci_host_config_read_common does not end up in pci_default_read_config? >>>>> >>>>> uint32_t pci_default_read_config(PCIDevice *d, >>>>> uint32_t address, int len) >>>>> { >>>>> uint32_t val = 0; >>>>> >>>>> memcpy(&val, d->config + address, len); >>>>> return le32_to_cpu(val); >>>>> } >>>>> >>>>> What did I miss? >>>> >>>> That's exactly where you end up in - and it's there to convert from the >>>> PCI config space backing storage to a native number. >>>> >>>> Imagine you write 0x12345678 at offset 0. Because PCI config space is >>>> defined to be LE, in the PCI config space memory this gets stored as >>>> >>>> 78 56 34 12 >>>> >>>> The reason we do the internal storage of the config space that way is >>>> that it's (in some PCI implementations) legal to access with single byte >>>> granularities. So you could do a pci_config_read(offset = 1) which >>>> should return 0x56. >>>> >>>> However, that means we completely nullify any effect of host endianness >>>> in the PCI config layer already. So if you do pci_config_write(offset = >>>> 0, size = 4, value = 0x12345678), the contents of d->config will always >>>> be identical, regardless of host endianness. The same holds true for >>>> pci_config_read(offset = 0, size = 4). It will always return 0x12345678. >>>> >>> >>> I understood this from the beginning and I completely agree to this. >>> >>>> In your code, you swab that value again. I assume there's a reason >>>> you're swapping it and that it's the way the architecture expects it >>> >>> Yes, s390 pcilg architecture states: >>> Data in the PCI configuration space are treated >>> as being in little-endian byte ordering >>> >>>> (mind to point me to the respective spec so I can verify?). But if the >>>> architecture expects it, then it expects it regardless of host >>>> endianness. The contents of regs[r1] should always be 0x78563412, no >>>> matter whether we're in an LE or a BE environment. >>>> >>>> Does that make sense now? >>>> >>> Absolutely lets make an example for qemu running on BE and LE >>> >>> byte order config space backing pci_default_read_config pcilg (with cpu_to_le) >>> BE 0x78563412 0x12345678 0x78563412 >>> LE 0x78563412 0x78563412 0x78563412 >> >> No, pci_default_read_config() always returns 0x12345678 because it >> returns a register, not memory. >> > > You mean implementation of pci_default_read_config is broken? > If it should return a register it should not do "return le32_to_cpu(val);" It has to, to convert from memory (after memcpy) to an actual register value. Look at the value list in Paolo's email - I really have no idea how to explain it any better. Alex
On 12/11/2014 10:22, Alexander Graf wrote: >>>> Absolutely lets make an example for qemu running on BE and LE >>>> >>>> byte order config space backing pci_default_read_config pcilg (with cpu_to_le) >>>> BE 0x78563412 0x12345678 0x78563412 >>>> LE 0x78563412 0x78563412 0x78563412 >>> >>> No, pci_default_read_config() always returns 0x12345678 because it >>> returns a register, not memory. >>> >> >> You mean implementation of pci_default_read_config is broken? >> If it should return a register it should not do "return le32_to_cpu(val);" > > It has to, to convert from memory (after memcpy) to an actual register > value. Look at the value list in Paolo's email - I really have no idea > how to explain it any better. pci_default_read_config is reading from a *device* register, and has absolutely zero knowledge of the host CPU endianness. Another way to explain that the result of pci_default_read_config is independent of the host endianness, is that the function is basically doing this: switch (len) { case 1: return d->config[address]; case 2: return ldw_le_p(&d->config[address)]); case 4: return ldl_le_p(&d->config[address)]); default: abort(); } So if you want to make the outcome big endian, you have to swap unconditionally. Paolo
On Wed, Nov 12, 2014 at 10:36:03AM +0100, Paolo Bonzini wrote: > > > On 12/11/2014 10:22, Alexander Graf wrote: > >>>> Absolutely lets make an example for qemu running on BE and LE > >>>> > >>>> byte order config space backing pci_default_read_config pcilg (with cpu_to_le) > >>>> BE 0x78563412 0x12345678 0x78563412 > >>>> LE 0x78563412 0x78563412 0x78563412 > >>> > >>> No, pci_default_read_config() always returns 0x12345678 because it > >>> returns a register, not memory. > >>> > >> > >> You mean implementation of pci_default_read_config is broken? > >> If it should return a register it should not do "return le32_to_cpu(val);" > > > > It has to, to convert from memory (after memcpy) to an actual register > > value. Look at the value list in Paolo's email - I really have no idea > > how to explain it any better. > > pci_default_read_config is reading from a *device* register, and has > absolutely zero knowledge of the host CPU endianness. > > Another way to explain that the result of pci_default_read_config is > independent of the host endianness, is that the function is basically > doing this: > > switch (len) { > case 1: return d->config[address]; > case 2: return ldw_le_p(&d->config[address)]); > case 4: return ldl_le_p(&d->config[address)]); > default: abort(); > } > > So if you want to make the outcome big endian, you have to swap > unconditionally. > > Paolo Hi Paolo, Alex, thx a lot for all the explanation and patience. I think I have understand your point now. I will change the code to unconditional swap. I feel I had a knowledge gap regarding running guest and host which different byte orders. Hope this gap is filled now ;) Frank >
diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs index 2c57494..cc71400 100644 --- a/target-s390x/Makefile.objs +++ b/target-s390x/Makefile.objs @@ -2,4 +2,4 @@ obj-y += translate.o helper.o cpu.o interrupt.o obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o obj-y += gdbstub.o obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o -obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM) += kvm.o pci_ic.o diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 5b10a25..d59e740 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -40,6 +40,7 @@ #include "exec/gdbstub.h" #include "trace.h" #include "qapi-event.h" +#include "pci_ic.h" /* #define DEBUG_KVM */ @@ -56,6 +57,7 @@ #define IPA0_B2 0xb200 #define IPA0_B9 0xb900 #define IPA0_EB 0xeb00 +#define IPA0_E3 0xe300 #define PRIV_B2_SCLP_CALL 0x20 #define PRIV_B2_CSCH 0x30 @@ -76,8 +78,17 @@ #define PRIV_B2_XSCH 0x76 #define PRIV_EB_SQBS 0x8a +#define PRIV_EB_PCISTB 0xd0 +#define PRIV_EB_SIC 0xd1 #define PRIV_B9_EQBS 0x9c +#define PRIV_B9_CLP 0xa0 +#define PRIV_B9_PCISTG 0xd0 +#define PRIV_B9_PCILG 0xd2 +#define PRIV_B9_RPCIT 0xd3 + +#define PRIV_E3_MPCIFC 0xd0 +#define PRIV_E3_STPCIFC 0xd4 #define DIAG_IPL 0x308 #define DIAG_KVM_HYPERCALL 0x500 @@ -814,6 +825,18 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) int r = 0; switch (ipa1) { + case PRIV_B9_CLP: + r = kvm_clp_service_call(cpu, run); + break; + case PRIV_B9_PCISTG: + r = kvm_pcistg_service_call(cpu, run); + break; + case PRIV_B9_PCILG: + r = kvm_pcilg_service_call(cpu, run); + break; + case PRIV_B9_RPCIT: + r = kvm_rpcit_service_call(cpu, run); + break; case PRIV_B9_EQBS: /* just inject exception */ r = -1; @@ -832,6 +855,12 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) int r = 0; switch (ipa1) { + case PRIV_EB_PCISTB: + r = kvm_pcistb_service_call(cpu, run); + break; + case PRIV_EB_SIC: + r = kvm_sic_service_call(cpu, run); + break; case PRIV_EB_SQBS: /* just inject exception */ r = -1; @@ -845,6 +874,26 @@ static int handle_eb(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) return r; } +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipbl) +{ + int r = 0; + + switch (ipbl) { + case PRIV_E3_MPCIFC: + r = kvm_mpcifc_service_call(cpu, run); + break; + case PRIV_E3_STPCIFC: + r = kvm_stpcifc_service_call(cpu, run); + break; + default: + r = -1; + DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipbl); + break; + } + + return r; +} + static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) { CPUS390XState *env = &cpu->env; @@ -1041,6 +1090,9 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run) case IPA0_EB: r = handle_eb(cpu, run, ipa1); break; + case IPA0_E3: + r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff); + break; case IPA0_DIAG: r = handle_diag(cpu, run, run->s390_sieic.ipb); break; diff --git a/target-s390x/pci_ic.c b/target-s390x/pci_ic.c new file mode 100644 index 0000000..6c05faf --- /dev/null +++ b/target-s390x/pci_ic.c @@ -0,0 +1,753 @@ +/* + * s390 PCI intercepts + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <linux/kvm.h> +#include <asm/ptrace.h> +#include <hw/pci/pci.h> +#include <hw/pci/pci_host.h> +#include <net/net.h> + +#include "qemu-common.h" +#include "qemu/timer.h" +#include "migration/qemu-file.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "cpu.h" +#include "sysemu/device_tree.h" +#include "monitor/monitor.h" +#include "pci_ic.h" + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_host.h" +#include "hw/s390x/s390-pci-bus.h" +#include "exec/exec-all.h" +#include "exec/memory-internal.h" + +/* #define DEBUG_S390PCI_IC */ +#ifdef DEBUG_S390PCI_IC +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "s390pci_ic: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +static uint64_t resume_token; + +static uint8_t barsize(uint64_t size) +{ + uint64_t mask = 1; + int i; + + if (!size) { + return 0; + } + + for (i = 0; i < 64; i++) { + if (size & mask) { + break; + } + mask = (mask << 1); + } + + return i; +} + +static void s390_set_status_code(CPUS390XState *env, + uint8_t r, uint64_t status_code) +{ + env->regs[r] &= ~0xff000000; + env->regs[r] |= (status_code & 0xff) << 24; +} + +static int list_pci(ClpReqRspListPci *rrb, uint8_t *cc) +{ + S390PCIBusDevice *pbdev; + uint32_t res_code, initial_l2, g_l2, finish; + int rc, idx; + + rc = 0; + if (be16_to_cpu(rrb->request.hdr.len) != 32) { + res_code = CLP_RC_LEN; + rc = -EINVAL; + goto out; + } + + if ((be32_to_cpu(rrb->request.fmt) & CLP_MASK_FMT) != 0) { + res_code = CLP_RC_FMT; + rc = -EINVAL; + goto out; + } + + if ((be32_to_cpu(rrb->request.fmt) & ~CLP_MASK_FMT) != 0 || + rrb->request.reserved1 != 0 || + rrb->request.reserved2 != 0) { + res_code = CLP_RC_RESNOT0; + rc = -EINVAL; + goto out; + } + + if (be64_to_cpu(rrb->request.resume_token) == 0) { + resume_token = 0; + } else if (be64_to_cpu(rrb->request.resume_token) != resume_token) { + res_code = CLP_RC_LISTPCI_BADRT; + rc = -EINVAL; + goto out; + } + + if (be16_to_cpu(rrb->response.hdr.len) < 48) { + res_code = CLP_RC_8K; + rc = -EINVAL; + goto out; + } + + initial_l2 = be16_to_cpu(rrb->response.hdr.len); + if ((initial_l2 - LIST_PCI_HDR_LEN) % sizeof(ClpFhListEntry) + != 0) { + rc = -EINVAL; + *cc = 3; + goto out; + } + + rrb->response.fmt = 0; + rrb->response.reserved1 = rrb->response.reserved2 = 0; + rrb->response.mdd = cpu_to_be32(FH_VIRT); + rrb->response.max_fn = cpu_to_be16(PCI_MAX_FUNCTIONS); + rrb->response.entry_size = sizeof(ClpFhListEntry); + finish = 0; + idx = resume_token; + g_l2 = LIST_PCI_HDR_LEN; + do { + pbdev = s390_pci_find_dev_by_idx(idx); + if (!pbdev) { + finish = 1; + break; + } + rrb->response.fh_list[idx - resume_token].device_id = + pci_get_word(pbdev->pdev->config + PCI_DEVICE_ID); + rrb->response.fh_list[idx - resume_token].vendor_id = + pci_get_word(pbdev->pdev->config + PCI_VENDOR_ID); + rrb->response.fh_list[idx - resume_token].config = + cpu_to_be32(0x80000000); + rrb->response.fh_list[idx - resume_token].fid = cpu_to_be32(pbdev->fid); + rrb->response.fh_list[idx - resume_token].fh = cpu_to_be32(pbdev->fh); + + g_l2 += sizeof(ClpFhListEntry); + DPRINTF("g_l2 %d vendor id 0x%x device id 0x%x fid 0x%x fh 0x%x\n", + g_l2, + rrb->response.fh_list[idx - resume_token].vendor_id, + rrb->response.fh_list[idx - resume_token].device_id, + rrb->response.fh_list[idx - resume_token].fid, + rrb->response.fh_list[idx - resume_token].fh); + idx++; + } while (g_l2 < initial_l2); + + if (finish == 1) { + resume_token = 0; + } else { + resume_token = idx; + } + rrb->response.resume_token = cpu_to_be64(resume_token); + rrb->response.hdr.len = cpu_to_be16(g_l2); + rrb->response.hdr.rsp = cpu_to_be16(CLP_RC_OK); +out: + if (rc) { + DPRINTF("list pci failed rc 0x%x\n", rc); + rrb->response.hdr.rsp = cpu_to_be16(res_code); + } + return rc; +} + +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run) +{ + ClpReqHdr *reqh; + ClpRspHdr *resh; + S390PCIBusDevice *pbdev; + uint32_t req_len; + uint32_t res_len; + uint8_t *buffer; + uint8_t cc = 0; + CPUS390XState *env = &cpu->env; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + int i; + + buffer = g_malloc0(4096 * 2); + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + cpu_physical_memory_rw(env->regs[r2], buffer, sizeof(*reqh), 0); + reqh = (ClpReqHdr *)buffer; + req_len = be16_to_cpu(reqh->len); + if (req_len < 16 || req_len > 8184 || (req_len % 8 != 0)) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + sizeof(*resh), 0); + resh = (ClpRspHdr *)(buffer + req_len); + res_len = be16_to_cpu(resh->len); + if (res_len < 8 || res_len > 8176 || (res_len % 8 != 0)) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + if ((req_len + res_len) > 8192) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 0); + + if (req_len != 32) { + resh->rsp = cpu_to_be16(CLP_RC_LEN); + goto out; + } + + switch (reqh->cmd) { + case CLP_LIST_PCI: { + ClpReqRspListPci *rrb = (ClpReqRspListPci *)buffer; + list_pci(rrb, &cc); + break; + } + case CLP_SET_PCI_FN: { + ClpReqSetPci *reqsetpci = (ClpReqSetPci *)reqh; + ClpRspSetPci *ressetpci = (ClpRspSetPci *)resh; + + pbdev = s390_pci_find_dev_by_fh(be32_to_cpu(reqsetpci->fh)); + if (!pbdev) { + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); + goto out; + } + + switch (reqsetpci->oc) { + case CLP_SET_ENABLE_PCI_FN: + pbdev->fh = pbdev->fh | 1 << ENABLE_BIT_OFFSET; + ressetpci->fh = cpu_to_be32(pbdev->fh); + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); + break; + case CLP_SET_DISABLE_PCI_FN: + pbdev->fh = pbdev->fh & ~(1 << ENABLE_BIT_OFFSET); + ressetpci->fh = cpu_to_be32(pbdev->fh); + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_OK); + break; + default: + DPRINTF("unknown set pci command\n"); + ressetpci->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FHOP); + break; + } + break; + } + case CLP_QUERY_PCI_FN: { + ClpReqQueryPci *reqquery = (ClpReqQueryPci *)reqh; + ClpRspQueryPci *resquery = (ClpRspQueryPci *)resh; + + pbdev = s390_pci_find_dev_by_fh(reqquery->fh); + if (!pbdev) { + DPRINTF("query pci no pci dev\n"); + resquery->hdr.rsp = cpu_to_be16(CLP_RC_SETPCIFN_FH); + goto out; + } + + for (i = 0; i < PCI_BAR_COUNT; i++) { + uint64_t data = pci_host_config_read_common(pbdev->pdev, + 0x10 + (i * 4), pci_config_size(pbdev->pdev), 4); + + resquery->bar[i] = bswap32(data); + resquery->bar_size[i] = barsize(pbdev->pdev->io_regions[i].size); + DPRINTF("bar %d addr 0x%x size 0x%lx barsize 0x%x\n", i, + resquery->bar[i], pbdev->pdev->io_regions[i].size, + resquery->bar_size[i]); + } + + resquery->sdma = ZPCI_SDMA_ADDR; + resquery->edma = ZPCI_EDMA_ADDR; + resquery->pchid = 0; + resquery->ug = 1; + resquery->uid = pbdev->fid; + + resquery->hdr.rsp = CLP_RC_OK; + break; + } + case CLP_QUERY_PCI_FNGRP: { + ClpRspQueryPciGrp *resgrp = (ClpRspQueryPciGrp *)resh; + resgrp->fr = 1; + resgrp->dasm = 0; + resgrp->msia = ZPCI_MSI_ADDR; + resgrp->mui = 0; + resgrp->i = 128; + resgrp->version = 0; + + resgrp->hdr.rsp = CLP_RC_OK; + break; + } + default: + DPRINTF("unknown clp command\n"); + resh->rsp = cpu_to_be16(CLP_RC_CMD); + break; + } + +out: + cpu_physical_memory_rw(env->regs[r2], buffer, req_len + res_len, 1); + g_free(buffer); + setcc(cpu, cc); + return 0; +} + +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + S390PCIBusDevice *pbdev; + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + PciLgStg *rp; + uint64_t offset; + uint64_t data; + uint8_t len; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + return 0; + } + + rp = (PciLgStg *)&env->regs[r2]; + offset = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(rp->fh); + if (!pbdev) { + DPRINTF("pcilg no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + len = rp->len & 0xF; + if (rp->pcias < 6) { + if ((8 - (offset & 0x7)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; + io_mem_read(mr, offset, &data, len); + } else if (rp->pcias == 15) { + if ((4 - (offset & 0x3)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + data = pci_host_config_read_common( + pbdev->pdev, offset, pci_config_size(pbdev->pdev), len); + + switch (len) { + case 1: + break; + case 2: + data = cpu_to_le16(data); + break; + case 4: + data = cpu_to_le32(data); + break; + case 8: + data = cpu_to_le64(data); + break; + default: + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + } else { + DPRINTF("invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + env->regs[r1] = data; + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t offset, + uint64_t *data, uint8_t len) +{ + uint32_t msg_data; + + if (offset % PCI_MSIX_ENTRY_SIZE != 8) { + return; + } + + if (len != 4) { + DPRINTF("access msix table msg data but len is %d\n", len); + return; + } + + msg_data = (pbdev->fid << ZPCI_MSI_VEC_BITS) | le32_to_cpu(*data); + *data = cpu_to_le32(msg_data); + DPRINTF("update msix msg_data to 0x%x\n", msg_data); +} + +static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias) +{ + if (pbdev->msix.available && pbdev->msix.table_bar == pcias && + offset >= pbdev->msix.table_offset && + offset <= pbdev->msix.table_offset + + (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) { + return 1; + } else { + return 0; + } +} + +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + PciLgStg *rp; + uint64_t offset, data; + S390PCIBusDevice *pbdev; + uint8_t len; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + return 0; + } + + rp = (PciLgStg *)&env->regs[r2]; + offset = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(rp->fh); + if (!pbdev) { + DPRINTF("pcistg no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + data = env->regs[r1]; + len = rp->len & 0xF; + if (rp->pcias < 6) { + if ((8 - (offset & 0x7)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + MemoryRegion *mr; + if (trap_msix(pbdev, offset, rp->pcias)) { + offset = offset - pbdev->msix.table_offset; + mr = &pbdev->pdev->msix_table_mmio; + update_msix_table_msg_data(pbdev, offset, &data, len); + } else { + mr = pbdev->pdev->io_regions[rp->pcias].memory; + } + + io_mem_write(mr, offset, data, len); + } else if (rp->pcias == 15) { + if ((4 - (offset & 0x3)) < len) { + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + switch (len) { + case 1: + break; + case 2: + data = le16_to_cpu(data); + break; + case 4: + data = le32_to_cpu(data); + break; + case 8: + data = le64_to_cpu(data); + break; + default: + program_interrupt(env, PGM_OPERAND, 4); + return 0; + } + + pci_host_config_write_common(pbdev->pdev, offset, + pci_config_size(pbdev->pdev), + data, len); + } else { + DPRINTF("pcistg invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r2, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; + uint32_t fh; + uint64_t pte; + S390PCIBusDevice *pbdev; + ram_addr_t size; + int flags; + IOMMUTLBEntry entry; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 4); + return 0; + } + + if (r2 & 0x1) { + program_interrupt(env, PGM_SPECIFICATION, 4); + return 0; + } + + fh = env->regs[r1] >> 32; + size = env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(fh); + + if (!pbdev) { + DPRINTF("rpcit no pci dev\n"); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + pte = s390_guest_io_table_walk(s390_pci_get_table_origin(pbdev->g_iota), + env->regs[r2]); + flags = pte & ZPCI_PTE_FLAG_MASK; + entry.target_as = &address_space_memory; + entry.iova = env->regs[r2]; + entry.translated_addr = pte & ZPCI_PTE_ADDR_MASK; + entry.addr_mask = size - 1; + + if (flags & ZPCI_PTE_INVALID) { + entry.perm = IOMMU_NONE; + } else { + entry.perm = IOMMU_RW; + } + + memory_region_notify_iommu(pci_device_iommu_address_space( + pbdev->pdev)->root, entry); + + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run) +{ + qemu_log_mask(LOG_UNIMP, "SIC missing\n"); + return 0; +} + +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint8_t r3 = run->s390_sieic.ipa & 0x000f; + PciStb *rp; + uint64_t gaddr; + uint64_t *uaddr, *pu; + hwaddr len; + S390PCIBusDevice *pbdev; + MemoryRegion *mr; + int i; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 6); + return 0; + } + + rp = (PciStb *)&env->regs[r1]; + if (rp->pcias > 5) { + DPRINTF("pcistb invalid space\n"); + setcc(cpu, ZPCI_PCI_LS_ERR); + s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS); + return 0; + } + + switch (rp->len) { + case 16: + case 32: + case 64: + case 128: + break; + default: + program_interrupt(env, PGM_SPECIFICATION, 6); + return 0; + } + + gaddr = get_base_disp_rsy(cpu, run); + len = rp->len; + + pbdev = s390_pci_find_dev_by_fh(rp->fh); + if (!pbdev) { + DPRINTF("pcistb no pci dev fh 0x%x\n", rp->fh); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + uaddr = cpu_physical_memory_map(gaddr, &len, 0); + mr = pbdev->pdev->io_regions[rp->pcias].memory; + if (!memory_region_access_valid(mr, env->regs[r3], rp->len, true)) { + cpu_physical_memory_unmap(uaddr, len, 0, len); + program_interrupt(env, PGM_ADDRESSING, 6); + return 0; + } + + pu = uaddr; + for (i = 0; i < rp->len / 8; i++) { + io_mem_write(mr, env->regs[r3] + i * 8, *pu, 8); + pu++; + } + + cpu_physical_memory_unmap(uaddr, len, 0, len); + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) +{ + int ret; + S390FLICState *fs = s390_get_flic(); + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); + + ret = css_register_io_adapter(S390_PCIPT_ADAPTER, + FIB_DATA_ISC(fib.data), true, false, + &pbdev->routes.adapter.adapter_id); + assert(ret == 0); + + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aisb, true); + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, fib.aibv, true); + + pbdev->routes.adapter.summary_addr = fib.aisb; + pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(fib.data); + pbdev->routes.adapter.ind_addr = fib.aibv; + pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(fib.data); + + DPRINTF("reg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); + return 0; +} + +static int dereg_irqs(S390PCIBusDevice *pbdev) +{ + S390FLICState *fs = s390_get_flic(); + S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); + + fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id, + pbdev->routes.adapter.ind_addr, false); + + pbdev->routes.adapter.summary_addr = 0; + pbdev->routes.adapter.summary_offset = 0; + pbdev->routes.adapter.ind_addr = 0; + pbdev->routes.adapter.ind_offset = 0; + + DPRINTF("dereg_irqs adapter id %d\n", pbdev->routes.adapter.adapter_id); + return 0; +} + +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint8_t oc; + uint32_t fh; + uint64_t fiba; + ZpciFib fib; + S390PCIBusDevice *pbdev; + + cpu_synchronize_state(CPU(cpu)); + + if (env->psw.mask & PSW_MASK_PSTATE) { + program_interrupt(env, PGM_PRIVILEGED, 6); + return 0; + } + + oc = env->regs[r1] & 0xff; + fh = env->regs[r1] >> 32; + fiba = get_base_disp_rxy(cpu, run); + + if (fiba & 0x7) { + program_interrupt(env, PGM_SPECIFICATION, 6); + return 0; + } + + pbdev = s390_pci_find_dev_by_fh(fh); + if (!pbdev) { + DPRINTF("mpcifc no pci dev fh 0x%x\n", fh); + setcc(cpu, ZPCI_PCI_LS_INVAL_HANDLE); + return 0; + } + + cpu_physical_memory_rw(fiba, (uint8_t *)&fib, sizeof(fib), 0); + + switch (oc) { + case ZPCI_MOD_FC_REG_INT: { + pbdev->isc = FIB_DATA_ISC(fib.data); + reg_irqs(env, pbdev, fib); + break; + } + case ZPCI_MOD_FC_DEREG_INT: + dereg_irqs(pbdev); + break; + case ZPCI_MOD_FC_REG_IOAT: + if (fib.pba > fib.pal) { + program_interrupt(&cpu->env, PGM_OPERAND, 6); + return 0; + } + pbdev->g_iota = fib.iota; + break; + case ZPCI_MOD_FC_DEREG_IOAT: + break; + case ZPCI_MOD_FC_REREG_IOAT: + break; + case ZPCI_MOD_FC_RESET_ERROR: + break; + case ZPCI_MOD_FC_RESET_BLOCK: + break; + case ZPCI_MOD_FC_SET_MEASURE: + break; + default: + program_interrupt(&cpu->env, PGM_OPERAND, 6); + return 0; + } + + setcc(cpu, ZPCI_PCI_LS_OK); + return 0; +} + +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run) +{ + qemu_log_mask(LOG_UNIMP, "STPCIFC missing\n"); + return 0; +} diff --git a/target-s390x/pci_ic.h b/target-s390x/pci_ic.h new file mode 100644 index 0000000..0eb6c27 --- /dev/null +++ b/target-s390x/pci_ic.h @@ -0,0 +1,335 @@ +/* + * s390 PCI intercept definitions + * + * Copyright 2014 IBM Corp. + * Author(s): Frank Blaschka <frank.blaschka@de.ibm.com> + * Hong Bo Li <lihbbj@cn.ibm.com> + * Yi Min Zhao <zyimin@cn.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef PCI_IC_S390X_H +#define PCI_IC_S390X_H + +#include <sysemu/dma.h> + +/* CLP common request & response block size */ +#define CLP_BLK_SIZE 4096 +#define PCI_BAR_COUNT 6 +#define PCI_MAX_FUNCTIONS 4096 + +typedef struct ClpReqHdr { + __uint16_t len; + __uint16_t cmd; +} QEMU_PACKED ClpReqHdr; + +typedef struct ClpRspHdr { + __uint16_t len; + __uint16_t rsp; +} QEMU_PACKED ClpRspHdr; + +/* CLP Response Codes */ +#define CLP_RC_OK 0x0010 /* Command request successfully */ +#define CLP_RC_CMD 0x0020 /* Command code not recognized */ +#define CLP_RC_PERM 0x0030 /* Command not authorized */ +#define CLP_RC_FMT 0x0040 /* Invalid command request format */ +#define CLP_RC_LEN 0x0050 /* Invalid command request length */ +#define CLP_RC_8K 0x0060 /* Command requires 8K LPCB */ +#define CLP_RC_RESNOT0 0x0070 /* Reserved field not zero */ +#define CLP_RC_NODATA 0x0080 /* No data available */ +#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ + +/* + * Call Logical Processor - Command Codes + */ +#define CLP_LIST_PCI 0x0002 +#define CLP_QUERY_PCI_FN 0x0003 +#define CLP_QUERY_PCI_FNGRP 0x0004 +#define CLP_SET_PCI_FN 0x0005 + +/* PCI function handle list entry */ +typedef struct ClpFhListEntry { + __uint16_t device_id; + __uint16_t vendor_id; +#define CLP_FHLIST_MASK_CONFIG 0x80000000 + __uint32_t config; + __uint32_t fid; + __uint32_t fh; +} QEMU_PACKED ClpFhListEntry; + +#define CLP_RC_SETPCIFN_FH 0x0101 /* Invalid PCI fn handle */ +#define CLP_RC_SETPCIFN_FHOP 0x0102 /* Fn handle not valid for op */ +#define CLP_RC_SETPCIFN_DMAAS 0x0103 /* Invalid DMA addr space */ +#define CLP_RC_SETPCIFN_RES 0x0104 /* Insufficient resources */ +#define CLP_RC_SETPCIFN_ALRDY 0x0105 /* Fn already in requested state */ +#define CLP_RC_SETPCIFN_ERR 0x0106 /* Fn in permanent error state */ +#define CLP_RC_SETPCIFN_RECPND 0x0107 /* Error recovery pending */ +#define CLP_RC_SETPCIFN_BUSY 0x0108 /* Fn busy */ +#define CLP_RC_LISTPCI_BADRT 0x010a /* Resume token not recognized */ +#define CLP_RC_QUERYPCIFG_PFGID 0x010b /* Unrecognized PFGID */ + +/* request or response block header length */ +#define LIST_PCI_HDR_LEN 32 + +/* Number of function handles fitting in response block */ +#define CLP_FH_LIST_NR_ENTRIES \ + ((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN) \ + / sizeof(ClpFhListEntry)) + +#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */ +#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */ + +#define CLP_UTIL_STR_LEN 64 + +#define CLP_MASK_FMT 0xf0000000 + +/* List PCI functions request */ +typedef struct ClpReqListPci { + ClpReqHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint64_t resume_token; + __uint64_t reserved2; +} QEMU_PACKED ClpReqListPci; + +/* List PCI functions response */ +typedef struct ClpRspListPci { + ClpRspHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint64_t resume_token; + __uint32_t mdd; + __uint16_t max_fn; + __uint8_t reserved2; + __uint8_t entry_size; + ClpFhListEntry fh_list[CLP_FH_LIST_NR_ENTRIES]; +} QEMU_PACKED ClpRspListPci; + +/* Query PCI function request */ +typedef struct ClpReqQueryPci { + ClpReqHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint32_t fh; /* function handle */ + __uint32_t reserved2; + __uint64_t reserved3; +} QEMU_PACKED ClpReqQueryPci; + +/* Query PCI function response */ +typedef struct ClpRspQueryPci { + ClpRspHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint16_t vfn; /* virtual fn number */ +#define CLP_RSP_QPCI_MASK_UTIL 0x100 +#define CLP_RSP_QPCI_MASK_PFGID 0xff + __uint16_t ug; + __uint32_t fid; /* pci function id */ + __uint8_t bar_size[PCI_BAR_COUNT]; + __uint16_t pchid; + __uint32_t bar[PCI_BAR_COUNT]; + __uint64_t reserved2; + __uint64_t sdma; /* start dma as */ + __uint64_t edma; /* end dma as */ + __uint32_t reserved3[11]; + __uint32_t uid; + __uint8_t util_str[CLP_UTIL_STR_LEN]; /* utility string */ +} QEMU_PACKED ClpRspQueryPci; + +/* Query PCI function group request */ +typedef struct ClpReqQueryPciGrp { + ClpReqHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; +#define CLP_REQ_QPCIG_MASK_PFGID 0xff + __uint32_t g; + __uint32_t reserved2; + __uint64_t reserved3; +} QEMU_PACKED ClpReqQueryPciGrp; + +/* Query PCI function group response */ +typedef struct ClpRspQueryPciGrp { + ClpRspHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; +#define CLP_RSP_QPCIG_MASK_NOI 0xfff + __uint16_t i; + __uint8_t version; +#define CLP_RSP_QPCIG_MASK_FRAME 0x2 +#define CLP_RSP_QPCIG_MASK_REFRESH 0x1 + __uint8_t fr; + __uint16_t reserved2; + __uint16_t mui; + __uint64_t reserved3; + __uint64_t dasm; /* dma address space mask */ + __uint64_t msia; /* MSI address */ + __uint64_t reserved4; + __uint64_t reserved5; +} QEMU_PACKED ClpRspQueryPciGrp; + +/* Set PCI function request */ +typedef struct ClpReqSetPci { + ClpReqHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint32_t fh; /* function handle */ + __uint16_t reserved2; + __uint8_t oc; /* operation controls */ + __uint8_t ndas; /* number of dma spaces */ + __uint64_t reserved3; +} QEMU_PACKED ClpReqSetPci; + +/* Set PCI function response */ +typedef struct ClpRspSetPci { + ClpRspHdr hdr; + __uint32_t fmt; + __uint64_t reserved1; + __uint32_t fh; /* function handle */ + __uint32_t reserved3; + __uint64_t reserved4; +} QEMU_PACKED ClpRspSetPci; + +typedef struct ClpReqRspListPci { + ClpReqListPci request; + ClpRspListPci response; +} QEMU_PACKED ClpReqRspListPci; + +typedef struct ClpReqRspSetPci { + ClpReqSetPci request; + ClpRspSetPci response; +} QEMU_PACKED ClpReqRspSetPci; + +typedef struct ClpReqRspQueryPci { + ClpReqQueryPci request; + ClpRspQueryPci response; +} QEMU_PACKED ClpReqRspQueryPci; + +typedef struct ClpReqRspQueryPciGrp { + ClpReqQueryPciGrp request; + ClpRspQueryPciGrp response; +} QEMU_PACKED ClpReqRspQueryPciGrp; + +typedef struct PciLgStg { + uint32_t fh; + uint8_t status; + uint8_t pcias; + uint8_t reserved; + uint8_t len; +} QEMU_PACKED PciLgStg; + +typedef struct PciStb { + uint32_t fh; + uint8_t status; + uint8_t pcias; + uint8_t reserved; + uint8_t len; +} QEMU_PACKED PciStb; + +/* Load/Store status codes */ +#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4 +#define ZPCI_PCI_ST_FUNC_IN_ERR 8 +#define ZPCI_PCI_ST_BLOCKED 12 +#define ZPCI_PCI_ST_INSUF_RES 16 +#define ZPCI_PCI_ST_INVAL_AS 20 +#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED 24 +#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED 28 +#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS 36 +#define ZPCI_PCI_ST_FUNC_NOT_AVAIL 40 +#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE 44 + +/* Load/Store return codes */ +#define ZPCI_PCI_LS_OK 0 +#define ZPCI_PCI_LS_ERR 1 +#define ZPCI_PCI_LS_BUSY 2 +#define ZPCI_PCI_LS_INVAL_HANDLE 3 + +/* Modify PCI Function Controls */ +#define ZPCI_MOD_FC_REG_INT 2 +#define ZPCI_MOD_FC_DEREG_INT 3 +#define ZPCI_MOD_FC_REG_IOAT 4 +#define ZPCI_MOD_FC_DEREG_IOAT 5 +#define ZPCI_MOD_FC_REREG_IOAT 6 +#define ZPCI_MOD_FC_RESET_ERROR 7 +#define ZPCI_MOD_FC_RESET_BLOCK 9 +#define ZPCI_MOD_FC_SET_MEASURE 10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* FIB function controls */ +#define ZPCI_FIB_FC_ENABLED 0x80 +#define ZPCI_FIB_FC_ERROR 0x40 +#define ZPCI_FIB_FC_LS_BLOCKED 0x20 +#define ZPCI_FIB_FC_DMAAS_REG 0x10 + +/* Function Information Block */ +typedef struct ZpciFib { + __uint8_t fmt; /* format */ + __uint8_t reserved1[7]; + __uint8_t fc; /* function controls */ + __uint8_t reserved2; + __uint16_t reserved3; + __uint32_t reserved4; + __uint64_t pba; /* PCI base address */ + __uint64_t pal; /* PCI address limit */ + __uint64_t iota; /* I/O Translation Anchor */ +#define FIB_DATA_ISC(x) (((x) >> 28) & 0x7) +#define FIB_DATA_NOI(x) (((x) >> 16) & 0xfff) +#define FIB_DATA_AIBVO(x) (((x) >> 8) & 0x3f) +#define FIB_DATA_SUM(x) (((x) >> 7) & 0x1) +#define FIB_DATA_AISBO(x) ((x) & 0x3f) + __uint32_t data; + __uint32_t reserved5; + __uint64_t aibv; /* Adapter int bit vector address */ + __uint64_t aisb; /* Adapter int summary bit address */ + __uint64_t fmb_addr; /* Function measurement address and key */ + __uint32_t reserved6; + __uint32_t gd; +} QEMU_PACKED ZpciFib; + +static inline uint64_t get_base_disp_rxy(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint32_t x2 = (run->s390_sieic.ipa & 0x000f); + uint32_t base2 = run->s390_sieic.ipb >> 28; + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + + ((run->s390_sieic.ipb & 0xff00) << 4); + + if (disp2 & 0x80000) { + disp2 += 0xfff00000; + } + + return (base2 ? env->regs[base2] : 0) + + (x2 ? env->regs[x2] : 0) + (long)(int)disp2; +} + +static inline uint64_t get_base_disp_rsy(S390CPU *cpu, struct kvm_run *run) +{ + CPUS390XState *env = &cpu->env; + uint32_t base2 = run->s390_sieic.ipb >> 28; + uint32_t disp2 = ((run->s390_sieic.ipb & 0x0fff0000) >> 16) + + ((run->s390_sieic.ipb & 0xff00) << 4); + + if (disp2 & 0x80000) { + disp2 += 0xfff00000; + } + + return (base2 ? env->regs[base2] : 0) + (long)(int)disp2; +} + +int kvm_clp_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run); +int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run); + +#endif