@@ -105,6 +105,8 @@ ifeq ($(uname_M),x86_64)
DEFINES += -DCONFIG_X86_64
endif
+LIBFDT_SRC = fdt.o fdt_ro.o fdt_wip.o fdt_sw.o fdt_rw.o fdt_strerror.o
+LIBFDT_OBJS = $(patsubst %,../../scripts/dtc/libfdt/%,$(LIBFDT_SRC))
### Arch-specific stuff
@@ -130,9 +132,14 @@ ifeq ($(uname_M), ppc64)
OBJS += powerpc/ioport.o
OBJS += powerpc/irq.o
OBJS += powerpc/kvm.o
+ OBJS += powerpc/cpu_info.o
OBJS += powerpc/kvm-cpu.o
+# We use libfdt, but it's sometimes not packaged 64bit. It's small too,
+# so just build it in:
+ CFLAGS += -I../../scripts/dtc/libfdt
+ OBJS += $(LIBFDT_OBJS)
ARCH_INCLUDE := powerpc/include
- CFLAGS += -m64
+ CFLAGS += -m64
endif
###
@@ -198,10 +205,6 @@ DEFINES += -DBUILD_ARCH='"$(ARCH)"'
KVM_INCLUDE := include
CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I$(KINCL_PATH)/include -I$(KINCL_PATH)/arch/$(ARCH)/include/ -O2 -fno-strict-aliasing -g
-ifneq ($(WERROR),0)
- WARNINGS += -Werror
-endif
-
WARNINGS += -Wall
WARNINGS += -Wcast-align
WARNINGS += -Wformat=2
@@ -220,6 +223,13 @@ WARNINGS += -Wwrite-strings
CFLAGS += $(WARNINGS)
+# Some targets may use 'external' sources that don't build totally cleanly.
+CFLAGS_EASYGOING := $(CFLAGS)
+
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
all: arch_support_check $(PROGRAM) $(PROGRAM_ALIAS) $(GUEST_INIT) $(GUEST_INIT_S2)
arch_support_check:
@@ -262,6 +272,12 @@ builtin-help.d: $(KVM_INCLUDE)/common-cmds.h
$(OBJS):
+# This rule relaxes the -Werror on libfdt, since for now it still has
+# a bunch of warnings. :(
+../../scripts/dtc/libfdt/%.o: ../../scripts/dtc/libfdt/%.c
+ $(E) " CC " $@
+ $(Q) $(CC) -c $(CFLAGS_EASYGOING) $< -o $@
+
util/rbtree.o: ../../lib/rbtree.c
$(E) " CC " $@
$(Q) $(CC) -c $(CFLAGS) $< -o $@
new file mode 100644
@@ -0,0 +1,83 @@
+/*
+ * PPC CPU identification
+ *
+ * This is a very simple "host CPU info" struct to get us going.
+ * For the little host information we need, I don't want to grub about
+ * parsing stuff in /proc/device-tree so just match host PVR to differentiate
+ * PPC970 and POWER7 (which is all that's currently supported).
+ *
+ * Qemu does something similar but this is MUCH simpler!
+ *
+ * Copyright 2012 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "cpu_info.h"
+#include "kvm/util.h"
+
+/* POWER7 */
+
+/*
+ * Basic set of pages for POWER7. It actually supports more but there were some
+ * limitations as to which may be advertised to the guest. FIXME when this
+ * settles down -- for now use basic set:
+ */
+static u32 power7_page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0, 0x18, 0x100, 0x1, 0x18, 0x0};
+/* POWER7 has 1T segments, so advertise these */
+static u32 power7_segment_sizes_prop[] = {0x1c, 0x28, 0xffffffff, 0xffffffff};
+
+static struct cpu_info cpu_power7_info = {
+ "POWER7",
+ power7_page_sizes_prop, sizeof(power7_page_sizes_prop),
+ power7_segment_sizes_prop, sizeof(power7_segment_sizes_prop),
+ 32, /* SLB size */
+ 512000000, /* TB frequency */
+ 128, /* d-cache block size */
+ 128, /* i-cache block size */
+ CPUINFO_FLAG_DFP | CPUINFO_FLAG_VSX | CPUINFO_FLAG_VMX
+};
+
+/* PPC970/G5 */
+
+static u32 g5_page_sizes_prop[] = {0xc, 0x0, 0x1, 0xc, 0x0, 0x18, 0x100, 0x1, 0x18, 0x0};
+
+static struct cpu_info cpu_970_info = {
+ "G5",
+ g5_page_sizes_prop, sizeof(g5_page_sizes_prop),
+ 0 /* Null = no segment sizes prop, use defaults */, 0,
+ 0, /* SLB size default */
+ 33333333, /* TB frequency */
+ 128, /* d-cache block size */
+ 128, /* i-cache block size */
+ CPUINFO_FLAG_VMX
+};
+
+/* This is a default catchall for 'no match' on PVR: */
+static struct cpu_info cpu_dummy_info = { "unknown", 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static struct pvr_info host_pvr_info[] = {
+ { 0xffffffff, 0x0f000003, &cpu_power7_info },
+ { 0xffff0000, 0x003f0000, &cpu_power7_info },
+ { 0xffff0000, 0x004a0000, &cpu_power7_info },
+ { 0xffff0000, 0x00390000, &cpu_970_info },
+ { 0xffff0000, 0x003c0000, &cpu_970_info },
+ { 0xffff0000, 0x00440000, &cpu_970_info },
+ { 0xffff0000, 0x00450000, &cpu_970_info },
+};
+
+struct cpu_info *find_cpu_info(u32 pvr)
+{
+ unsigned int i;
+ for (i = 0; i < sizeof(host_pvr_info)/sizeof(struct pvr_info); i++) {
+ if ((pvr & host_pvr_info[i].pvr_mask) ==
+ host_pvr_info[i].pvr) {
+ return host_pvr_info[i].cpu_info;
+ }
+ }
+ /* Didn't find anything? Rut-ro. */
+ pr_warning("Host CPU unsupported by kvmtool\n");
+ return &cpu_dummy_info;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/*
+ * PPC CPU identification
+ *
+ * Copyright 2012 Matt Evans <matt@ozlabs.org>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#ifndef CPU_INFO_H
+#define CPU_INFO_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+struct cpu_info {
+ const char *name;
+ u32 *page_sizes_prop;
+ u32 page_sizes_prop_len;
+ u32 *segment_sizes_prop;
+ u32 segment_sizes_prop_len;
+ u32 slb_size;
+ u32 tb_freq;
+ u32 d_bsize;
+ u32 i_bsize;
+ u32 flags;
+};
+
+struct pvr_info {
+ u32 pvr_mask;
+ u32 pvr;
+ struct cpu_info *cpu_info;
+};
+
+/* Misc capabilities/CPU properties */
+#define CPUINFO_FLAG_DFP 0x00000001
+#define CPUINFO_FLAG_VMX 0x00000002
+#define CPUINFO_FLAG_VSX 0x00000004
+
+struct cpu_info *find_cpu_info(u32 pvr);
+
+#endif
@@ -53,6 +53,7 @@ struct kvm {
void *ram_start;
u64 sdr1;
+ u32 pvr;
bool nmi_disabled;
@@ -70,4 +71,14 @@ struct kvm {
int vm_state;
};
+/* Helper for the various bits of code that generate FDT nodes */
+#define _FDT(exp) \
+ do { \
+ int ret = (exp); \
+ if (ret < 0) { \
+ die("Error creating device tree: %s: %s\n", \
+ #exp, fdt_strerror(ret)); \
+ } \
+ } while (0)
+
#endif /* KVM__KVM_ARCH_H */
@@ -134,6 +134,7 @@ static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu)
die("KVM_GET_SREGS failed");
sregs.u.s.sdr1 = vcpu->kvm->sdr1;
+ sregs.pvr = vcpu->kvm->pvr;
if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &sregs) < 0)
die("KVM_SET_SREGS failed");
@@ -3,6 +3,9 @@
*
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation.
*
+ * Portions of FDT setup borrowed from QEMU, copyright 2010 David Gibson, IBM
+ * Corporation.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
@@ -10,6 +13,8 @@
#include "kvm/kvm.h"
#include "kvm/util.h"
+#include "libfdt.h"
+#include "cpu_info.h"
#include <linux/kvm.h>
@@ -26,7 +31,8 @@
#include <errno.h>
#include <linux/byteorder.h>
-#include <libfdt.h>
+
+#define HPT_ORDER 24
#define HUGETLBFS_PATH "/var/lib/hugetlbfs/global/pagesize-16MB/"
@@ -36,6 +42,13 @@ struct kvm_ext kvm_req_ext[] = {
{ 0, 0 }
};
+static uint32_t mfpvr(void)
+{
+ uint32_t r;
+ asm volatile ("mfpvr %0" : "=r"(r));
+ return r;
+}
+
bool kvm__arch_cpu_supports_vm(void)
{
return true;
@@ -106,6 +119,8 @@ void kvm__arch_init(struct kvm *kvm, const char *hugetlbfs_path, u64 ram_size)
kvm->sdr1 = ((hpt + 0x3ffffULL) & ~0x3ffffULL) | (HPT_ORDER-18);
+ kvm->pvr = mfpvr();
+
/* FIXME: This is book3s-specific */
cap_ppc_rma = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_RMA);
if (cap_ppc_rma == 2)
@@ -183,9 +198,146 @@ bool load_bzimage(struct kvm *kvm, int fd_kernel,
return false;
}
+#define SMT_THREADS 4
+
+/*
+ * Set up the FDT for the kernel: This function is currently fairly SPAPR-heavy,
+ * and whilst most PPC targets will require CPU/memory nodes, others like RTAS
+ * should eventually be added separately.
+ */
static void setup_fdt(struct kvm *kvm)
{
+ uint64_t mem_reg_property[] = { 0, cpu_to_be64(kvm->ram_size) };
+ int smp_cpus = kvm->nrcpus;
+ char hypertas_prop_kvm[] = "hcall-pft\0hcall-term\0"
+ "hcall-dabr\0hcall-interrupt\0hcall-tce\0hcall-vio\0"
+ "hcall-splpar\0hcall-bulk";
+ int i, j;
+ char cpu_name[30];
+ u8 staging_fdt[FDT_MAX_SIZE];
+ struct cpu_info *cpu_info = find_cpu_info(kvm->pvr);
+
+ /* Generate an appropriate DT at kvm->fdt_gra */
+ void *fdt_dest = guest_flat_to_host(kvm, kvm->fdt_gra);
+ void *fdt = staging_fdt;
+
+ _FDT(fdt_create(fdt, FDT_MAX_SIZE));
+ _FDT(fdt_finish_reservemap(fdt));
+
+ _FDT(fdt_begin_node(fdt, ""));
+
+ _FDT(fdt_property_string(fdt, "device_type", "chrp"));
+ _FDT(fdt_property_string(fdt, "model", "IBM pSeries (kvmtool)"));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 0x2));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 0x2));
+
+ /* /chosen */
+ _FDT(fdt_begin_node(fdt, "chosen"));
+ /* cmdline */
+ _FDT(fdt_property_string(fdt, "bootargs", kern_cmdline));
+ /* Initrd */
+ if (kvm->initrd_size != 0) {
+ uint32_t ird_st_prop = cpu_to_be32(kvm->initrd_gra);
+ uint32_t ird_end_prop = cpu_to_be32(kvm->initrd_gra +
+ kvm->initrd_size);
+ _FDT(fdt_property(fdt, "linux,initrd-start",
+ &ird_st_prop, sizeof(ird_st_prop)));
+ _FDT(fdt_property(fdt, "linux,initrd-end",
+ &ird_end_prop, sizeof(ird_end_prop)));
+ }
+ _FDT(fdt_end_node(fdt));
+
+ /*
+ * Memory: We don't alloc. a separate RMA yet. If we ever need to
+ * (CAP_PPC_RMA == 2) then have one memory node for 0->RMAsize, and
+ * another RMAsize->endOfMem.
+ */
+ _FDT(fdt_begin_node(fdt, "memory@0"));
+ _FDT(fdt_property_string(fdt, "device_type", "memory"));
+ _FDT(fdt_property(fdt, "reg", mem_reg_property,
+ sizeof(mem_reg_property)));
+ _FDT(fdt_end_node(fdt));
+
+ /* CPUs */
+ _FDT(fdt_begin_node(fdt, "cpus"));
+ _FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+ _FDT(fdt_property_cell(fdt, "#size-cells", 0x0));
+
+ for (i = 0; i < smp_cpus; i += SMT_THREADS) {
+ int32_t pft_size_prop[] = { 0, HPT_ORDER };
+ uint32_t servers_prop[SMT_THREADS];
+ uint32_t gservers_prop[SMT_THREADS * 2];
+ int threads = (smp_cpus - i) >= SMT_THREADS ? SMT_THREADS :
+ smp_cpus - i;
+
+ sprintf(cpu_name, "PowerPC,%s@%d", cpu_info->name, i);
+ _FDT(fdt_begin_node(fdt, cpu_name));
+ sprintf(cpu_name, "PowerPC,%s", cpu_info->name);
+ _FDT(fdt_property_string(fdt, "name", cpu_name));
+ _FDT(fdt_property_string(fdt, "device_type", "cpu"));
+
+ _FDT(fdt_property_cell(fdt, "reg", i));
+ _FDT(fdt_property_cell(fdt, "cpu-version", kvm->pvr));
+
+ _FDT(fdt_property_cell(fdt, "dcache-block-size", cpu_info->d_bsize));
+ _FDT(fdt_property_cell(fdt, "icache-block-size", cpu_info->i_bsize));
+
+ _FDT(fdt_property_cell(fdt, "timebase-frequency", cpu_info->tb_freq));
+ /* Lies, but safeish lies! */
+ _FDT(fdt_property_cell(fdt, "clock-frequency", 0xddbab200));
+
+ if (cpu_info->slb_size)
+ _FDT(fdt_property_cell(fdt, "ibm,slb-size", cpu_info->slb_size));
+ /*
+ * HPT size is hardwired; KVM currently fixes it at 16MB but the
+ * moment that changes we'll need to read it out of the kernel.
+ */
+ _FDT(fdt_property(fdt, "ibm,pft-size", pft_size_prop,
+ sizeof(pft_size_prop)));
+
+ _FDT(fdt_property_string(fdt, "status", "okay"));
+ _FDT(fdt_property(fdt, "64-bit", NULL, 0));
+ /* A server for each thread in this core */
+ for (j = 0; j < SMT_THREADS; j++) {
+ servers_prop[j] = cpu_to_be32(i+j);
+ /*
+ * Hack borrowed from QEMU, direct the group queues back
+ * to cpu 0:
+ */
+ gservers_prop[j*2] = cpu_to_be32(i+j);
+ gservers_prop[j*2 + 1] = 0;
+ }
+ _FDT(fdt_property(fdt, "ibm,ppc-interrupt-server#s",
+ servers_prop, threads * sizeof(uint32_t)));
+ _FDT(fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
+ gservers_prop,
+ threads * 2 * sizeof(uint32_t)));
+ if (cpu_info->page_sizes_prop)
+ _FDT(fdt_property(fdt, "ibm,segment-page-sizes",
+ cpu_info->page_sizes_prop,
+ cpu_info->page_sizes_prop_len));
+ if (cpu_info->segment_sizes_prop)
+ _FDT(fdt_property(fdt, "ibm,processor-segment-sizes",
+ cpu_info->segment_sizes_prop,
+ cpu_info->segment_sizes_prop_len));
+ /* VSX / DFP options: */
+ if (cpu_info->flags & CPUINFO_FLAG_VMX)
+ _FDT(fdt_property_cell(fdt, "ibm,vmx",
+ (cpu_info->flags &
+ CPUINFO_FLAG_VSX) ? 2 : 1));
+ if (cpu_info->flags & CPUINFO_FLAG_DFP)
+ _FDT(fdt_property_cell(fdt, "ibm,dfp", 0x1));
+ _FDT(fdt_end_node(fdt));
+ }
+ _FDT(fdt_end_node(fdt));
+
+ /* Finalise: */
+ _FDT(fdt_end_node(fdt)); /* Root node */
+ _FDT(fdt_finish(fdt));
+ _FDT(fdt_open_into(fdt, fdt_dest, FDT_MAX_SIZE));
+ _FDT(fdt_add_mem_rsv(fdt_dest, kvm->rtas_gra, kvm->rtas_size));
+ _FDT(fdt_pack(fdt_dest));
}
/**
The generated DT is the bare minimum structure required for SPAPR (on which subsequent patches for VIO, XICS, PCI etc. will build); root node, cpus, memory. The DT contains CPU-specific configuration; a very simple 'cpu info' mechanism is added to recognise/differentiate DT entries for POWER7 and PPC970 host CPUs. Future support of more CPUs is possible. libfdt is included from scripts/dtc/libfdt. Signed-off-by: Matt Evans <matt@ozlabs.org> --- tools/kvm/Makefile | 26 ++++- tools/kvm/powerpc/cpu_info.c | 83 ++++++++++++++++ tools/kvm/powerpc/cpu_info.h | 43 ++++++++ tools/kvm/powerpc/include/kvm/kvm-arch.h | 11 ++ tools/kvm/powerpc/kvm-cpu.c | 1 + tools/kvm/powerpc/kvm.c | 154 +++++++++++++++++++++++++++++- 6 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 tools/kvm/powerpc/cpu_info.c create mode 100644 tools/kvm/powerpc/cpu_info.h