Message ID | 4EDD8DED.3070908@ozlabs.org |
---|---|
State | New, archived |
Headers | show |
The code doesn't build after this patch due to missing header issues which you fixed in patches #10 & #11. Could you please move those two to the beginning of the series for the sake of bisectablilty? On Tue, 2011-12-06 at 14:37 +1100, Matt Evans wrote: > Create a new arch-specific subdirectory to contain architecture-specific code > and includes. > > The Makefile now adds various arch-specific objects based on detected > architecture. That aside, this patch should only contain code moves. These > include: > > - x86-specific kvm_cpu setup, kernel loading, memory setup etc. now in > x86/kvm{-cpu}.c > - BIOS now lives in x86/bios/ > - ioport setup > - KVM extensions are asserted in arch-specific kvm.c now, so each architecture > can manage its own dependencies. > - Various architecture-specific #defines are moved into $(ARCH)/include/kvm{-cpu}.h > such as struct kvm_cpu, KVM_NR_CPUS, KVM_32BIT_GAP_SIZE. > > Signed-off-by: Matt Evans <matt@ozlabs.org> > --- > tools/kvm/Makefile | 96 ++++--- > tools/kvm/builtin-run.c | 6 +- > tools/kvm/include/kvm/ioport.h | 2 +- > tools/kvm/include/kvm/kvm-cpu.h | 27 +-- > tools/kvm/include/kvm/kvm.h | 58 +--- > tools/kvm/ioport.c | 54 ---- > tools/kvm/kvm-cpu.c | 372 ---------------------- > tools/kvm/kvm.c | 323 +------------------- > tools/kvm/{ => x86}/bios.c | 0 > tools/kvm/{ => x86}/bios/.gitignore | 0 > tools/kvm/{ => x86}/bios/bios-rom.S | 2 +- > tools/kvm/{ => x86}/bios/e820.c | 0 > tools/kvm/{ => x86}/bios/entry.S | 0 > tools/kvm/{ => x86}/bios/gen-offsets.sh | 0 > tools/kvm/{ => x86}/bios/int10.c | 0 > tools/kvm/{ => x86}/bios/int15.c | 0 > tools/kvm/{ => x86}/bios/local.S | 0 > tools/kvm/{ => x86}/bios/macro.S | 0 > tools/kvm/{ => x86}/bios/memcpy.c | 0 > tools/kvm/{ => x86}/bios/rom.ld.S | 0 > tools/kvm/{ => x86}/cpuid.c | 0 > tools/kvm/{ => x86}/include/kvm/assembly.h | 0 > tools/kvm/{ => x86}/include/kvm/barrier.h | 0 > tools/kvm/{ => x86}/include/kvm/bios-export.h | 0 > tools/kvm/{ => x86}/include/kvm/bios.h | 0 > tools/kvm/{ => x86}/include/kvm/boot-protocol.h | 0 > tools/kvm/{ => x86}/include/kvm/cpufeature.h | 0 > tools/kvm/{ => x86}/include/kvm/interrupt.h | 0 > tools/kvm/x86/include/kvm/kvm-arch.h | 59 ++++ > tools/kvm/x86/include/kvm/kvm-cpu-arch.h | 33 ++ > tools/kvm/{ => x86}/include/kvm/mptable.h | 0 > tools/kvm/{ => x86}/interrupt.c | 0 > tools/kvm/x86/ioport.c | 59 ++++ > tools/kvm/{ => x86}/irq.c | 0 > tools/kvm/x86/kvm-cpu.c | 383 +++++++++++++++++++++++ > tools/kvm/x86/kvm.c | 330 +++++++++++++++++++ > tools/kvm/{ => x86}/mptable.c | 0 > 37 files changed, 951 insertions(+), 853 deletions(-) > rename tools/kvm/{ => x86}/bios.c (100%) > rename tools/kvm/{ => x86}/bios/.gitignore (100%) > rename tools/kvm/{ => x86}/bios/bios-rom.S (80%) > rename tools/kvm/{ => x86}/bios/e820.c (100%) > rename tools/kvm/{ => x86}/bios/entry.S (100%) > rename tools/kvm/{ => x86}/bios/gen-offsets.sh (100%) > rename tools/kvm/{ => x86}/bios/int10.c (100%) > rename tools/kvm/{ => x86}/bios/int15.c (100%) > rename tools/kvm/{ => x86}/bios/local.S (100%) > rename tools/kvm/{ => x86}/bios/macro.S (100%) > rename tools/kvm/{ => x86}/bios/memcpy.c (100%) > rename tools/kvm/{ => x86}/bios/rom.ld.S (100%) > rename tools/kvm/{ => x86}/cpuid.c (100%) > rename tools/kvm/{ => x86}/include/kvm/assembly.h (100%) > rename tools/kvm/{ => x86}/include/kvm/barrier.h (100%) > rename tools/kvm/{ => x86}/include/kvm/bios-export.h (100%) > rename tools/kvm/{ => x86}/include/kvm/bios.h (100%) > rename tools/kvm/{ => x86}/include/kvm/boot-protocol.h (100%) > rename tools/kvm/{ => x86}/include/kvm/cpufeature.h (100%) > rename tools/kvm/{ => x86}/include/kvm/interrupt.h (100%) > create mode 100644 tools/kvm/x86/include/kvm/kvm-arch.h > create mode 100644 tools/kvm/x86/include/kvm/kvm-cpu-arch.h > rename tools/kvm/{ => x86}/include/kvm/mptable.h (100%) > rename tools/kvm/{ => x86}/interrupt.c (100%) > create mode 100644 tools/kvm/x86/ioport.c > rename tools/kvm/{ => x86}/irq.c (100%) > create mode 100644 tools/kvm/x86/kvm-cpu.c > create mode 100644 tools/kvm/x86/kvm.c > rename tools/kvm/{ => x86}/mptable.c (100%) > > diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile > index bb5f6b0..243886e 100644 > --- a/tools/kvm/Makefile > +++ b/tools/kvm/Makefile > @@ -33,13 +33,11 @@ OBJS += builtin-run.o > OBJS += builtin-setup.o > OBJS += builtin-stop.o > OBJS += builtin-version.o > -OBJS += cpuid.o > OBJS += disk/core.o > OBJS += framebuffer.o > OBJS += guest_compat.o > OBJS += hw/rtc.o > OBJS += hw/serial.o > -OBJS += interrupt.o > OBJS += ioport.o > OBJS += kvm-cpu.o > OBJS += kvm.o > @@ -61,7 +59,6 @@ OBJS += disk/blk.o > OBJS += disk/qcow.o > OBJS += disk/raw.o > OBJS += ioeventfd.o > -OBJS += irq.o > OBJS += net/uip/core.o > OBJS += net/uip/arp.o > OBJS += net/uip/icmp.o > @@ -72,7 +69,6 @@ OBJS += net/uip/buf.o > OBJS += net/uip/csum.o > OBJS += net/uip/dhcp.o > OBJS += kvm-cmd.o > -OBJS += mptable.o > OBJS += rbtree.o > OBJS += threadpool.o > OBJS += util/parse-options.o > @@ -123,12 +119,6 @@ ifeq ($(has_AIO),y) > LIBS += -laio > endif > > -DEPS := $(patsubst %.o,%.d,$(OBJS)) > - > -# Exclude BIOS object files from header dependencies. > -OBJS += bios.o > -OBJS += bios/bios-rom.o > - > LIBS += -lrt > LIBS += -lpthread > LIBS += -lutil > @@ -150,12 +140,43 @@ ifeq ($(uname_M),x86_64) > DEFINES += -DCONFIG_X86_64 > endif > > + > +### Arch-specific stuff > + > +#x86 > +ifeq ($(ARCH),x86) > + DEFINES += -DCONFIG_X86 > + OBJS += x86/cpuid.o > + OBJS += x86/interrupt.o > + OBJS += x86/ioport.o > + OBJS += x86/irq.o > + OBJS += x86/kvm.o > + OBJS += x86/kvm-cpu.o > + OBJS += x86/mptable.o > +# Exclude BIOS object files from header dependencies. > + OTHEROBJS += x86/bios.o > + OTHEROBJS += x86/bios/bios-rom.o > + ARCH_INCLUDE := x86/include > +endif > + > +### > + > +ifeq (,$(ARCH_INCLUDE)) > + UNSUPP_ERR = @echo "This architecture is not supported in kvmtool." && exit 1 > +else > + UNSUPP_ERR = > +endif > + > +DEPS := $(patsubst %.o,%.d,$(OBJS)) > +OBJS += $(OTHEROBJS) > + > DEFINES += -D_FILE_OFFSET_BITS=64 > DEFINES += -D_GNU_SOURCE > DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"' > +DEFINES += -DBUILD_ARCH='"$(ARCH)"' > > KVM_INCLUDE := include > -CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g > +CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g > > ifneq ($(WERROR),0) > WARNINGS += -Werror > @@ -179,7 +200,10 @@ WARNINGS += -Wwrite-strings > > CFLAGS += $(WARNINGS) > > -all: $(PROGRAM) $(GUEST_INIT) > +all: arch_support_check $(PROGRAM) $(GUEST_INIT) > + > +arch_support_check: > + $(UNSUPP_ERR) > > KVMTOOLS-VERSION-FILE: > @$(SHELL_PATH) util/KVMTOOLS-VERSION-GEN $(OUTPUT) > @@ -227,33 +251,33 @@ BIOS_CFLAGS += -mregparm=3 > BIOS_CFLAGS += -fno-stack-protector > BIOS_CFLAGS += -I../../arch/$(ARCH) > > -bios.o: bios/bios.bin bios/bios-rom.h > - > -bios/bios.bin.elf: bios/entry.S bios/e820.c bios/int10.c bios/int15.c bios/rom.ld.S > - $(E) " CC bios/memcpy.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/memcpy.c -o bios/memcpy.o > - $(E) " CC bios/e820.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/e820.c -o bios/e820.o > - $(E) " CC bios/int10.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int10.c -o bios/int10.o > - $(E) " CC bios/int15.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int15.c -o bios/int15.o > - $(E) " CC bios/entry.o" > - $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/entry.S -o bios/entry.o > +x86/bios.o: x86/bios/bios.bin x86/bios/bios-rom.h > + > +x86/bios/bios.bin.elf: x86/bios/entry.S x86/bios/e820.c x86/bios/int10.c x86/bios/int15.c x86/bios/rom.ld.S > + $(E) " CC x86/bios/memcpy.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/memcpy.c -o x86/bios/memcpy.o > + $(E) " CC x86/bios/e820.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/e820.c -o x86/bios/e820.o > + $(E) " CC x86/bios/int10.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int10.c -o x86/bios/int10.o > + $(E) " CC x86/bios/int15.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int15.c -o x86/bios/int15.o > + $(E) " CC x86/bios/entry.o" > + $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/entry.S -o x86/bios/entry.o > $(E) " LD " $@ > - $(Q) ld -T bios/rom.ld.S -o bios/bios.bin.elf bios/memcpy.o bios/entry.o bios/e820.o bios/int10.o bios/int15.o > + $(Q) ld -T x86/bios/rom.ld.S -o x86/bios/bios.bin.elf x86/bios/memcpy.o x86/bios/entry.o x86/bios/e820.o x86/bios/int10.o x86/bios/int15.o > > -bios/bios.bin: bios/bios.bin.elf > +x86/bios/bios.bin: x86/bios/bios.bin.elf > $(E) " OBJCOPY " $@ > - $(Q) objcopy -O binary -j .text bios/bios.bin.elf bios/bios.bin > + $(Q) objcopy -O binary -j .text x86/bios/bios.bin.elf x86/bios/bios.bin > > -bios/bios-rom.o: bios/bios-rom.S bios/bios.bin bios/bios-rom.h > +x86/bios/bios-rom.o: x86/bios/bios-rom.S x86/bios/bios.bin x86/bios/bios-rom.h > $(E) " CC " $@ > - $(Q) $(CC) -c $(CFLAGS) bios/bios-rom.S -o bios/bios-rom.o > + $(Q) $(CC) -c $(CFLAGS) x86/bios/bios-rom.S -o x86/bios/bios-rom.o > > -bios/bios-rom.h: bios/bios.bin.elf > +x86/bios/bios-rom.h: x86/bios/bios.bin.elf > $(E) " NM " $@ > - $(Q) cd bios && sh gen-offsets.sh > bios-rom.h && cd .. > + $(Q) cd x86/bios && sh gen-offsets.sh > bios-rom.h && cd .. > > check: $(PROGRAM) > $(MAKE) -C tests > @@ -263,10 +287,10 @@ check: $(PROGRAM) > > clean: > $(E) " CLEAN" > - $(Q) rm -f bios/*.bin > - $(Q) rm -f bios/*.elf > - $(Q) rm -f bios/*.o > - $(Q) rm -f bios/bios-rom.h > + $(Q) rm -f x86/bios/*.bin > + $(Q) rm -f x86/bios/*.elf > + $(Q) rm -f x86/bios/*.o > + $(Q) rm -f x86/bios/bios-rom.h > $(Q) rm -f tests/boot/boot_test.iso > $(Q) rm -rf tests/boot/rootfs/ > $(Q) rm -f $(DEPS) $(OBJS) $(PROGRAM) $(GUEST_INIT) > diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c > index 33de4f6..9148d83 100644 > --- a/tools/kvm/builtin-run.c > +++ b/tools/kvm/builtin-run.c > @@ -568,7 +568,7 @@ static const char *host_kernels[] = { > > static const char *default_kernels[] = { > "./bzImage", > - "../../arch/x86/boot/bzImage", > + "../../arch/" BUILD_ARCH "/boot/bzImage", > NULL > }; > > @@ -886,7 +886,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) > > kvm->vmlinux = vmlinux_filename; > > - ioport__setup_legacy(); > + ioport__setup_arch(); > > rtc__init(); > > @@ -931,7 +931,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) > > kvm__start_timer(kvm); > > - kvm__setup_bios(kvm); > + kvm__arch_setup_firmware(kvm); > > for (i = 0; i < nrcpus; i++) { > kvm_cpus[i] = kvm_cpu__init(kvm, i); > diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h > index 5b857dd..61a70ec 100644 > --- a/tools/kvm/include/kvm/ioport.h > +++ b/tools/kvm/include/kvm/ioport.h > @@ -28,7 +28,7 @@ struct ioport_operations { > bool (*io_out)(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size); > }; > > -void ioport__setup_legacy(void); > +void ioport__setup_arch(void); > > u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); > > diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h > index 01540ac..719e286 100644 > --- a/tools/kvm/include/kvm/kvm-cpu.h > +++ b/tools/kvm/include/kvm/kvm-cpu.h > @@ -1,32 +1,7 @@ > #ifndef KVM__KVM_CPU_H > #define KVM__KVM_CPU_H > > -#include <linux/kvm.h> /* for struct kvm_regs */ > - > -#include <pthread.h> > - > -struct kvm; > - > -struct kvm_cpu { > - pthread_t thread; /* VCPU thread */ > - > - unsigned long cpu_id; > - > - struct kvm *kvm; /* parent KVM */ > - int vcpu_fd; /* For VCPU ioctls() */ > - struct kvm_run *kvm_run; > - > - struct kvm_regs regs; > - struct kvm_sregs sregs; > - struct kvm_fpu fpu; > - > - struct kvm_msrs *msrs; /* dynamically allocated */ > - > - u8 is_running; > - u8 paused; > - > - struct kvm_coalesced_mmio_ring *ring; > -}; > +#include "kvm/kvm-cpu-arch.h" > > struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id); > void kvm_cpu__delete(struct kvm_cpu *vcpu); > diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h > index 2b3024a..ca1acc0 100644 > --- a/tools/kvm/include/kvm/kvm.h > +++ b/tools/kvm/include/kvm/kvm.h > @@ -1,22 +1,13 @@ > #ifndef KVM__KVM_H > #define KVM__KVM_H > > -#include "kvm/interrupt.h" > -#include "kvm/segment.h" > +#include "kvm/kvm-arch.h" > > #include <stdbool.h> > #include <linux/types.h> > #include <time.h> > #include <signal.h> > > -#define KVM_NR_CPUS (255) > - > -/* > - * The hole includes VESA framebuffer and PCI memory. > - */ > -#define KVM_32BIT_GAP_SIZE (768 << 20) > -#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) > - > #define SIGKVMEXIT (SIGRTMIN + 0) > #define SIGKVMPAUSE (SIGRTMIN + 1) > #define SIGKVMSTOP (SIGRTMIN + 4) > @@ -25,33 +16,15 @@ > #define KVM_PID_FILE_PATH "/.kvm-tools/" > #define HOME_DIR getenv("HOME") > > -struct kvm { > - int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ > - int vm_fd; /* For VM ioctls() */ > - timer_t timerid; /* Posix timer for interrupts */ > - > - int nrcpus; /* Number of cpus to run */ > - > - u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ > - > - u64 ram_size; > - void *ram_start; > - > - bool nmi_disabled; > - > - bool single_step; > +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) > > - u16 boot_selector; > - u16 boot_ip; > - u16 boot_sp; > +#define DEFINE_KVM_EXT(ext) \ > + .name = #ext, \ > + .code = ext > > - struct interrupt_table interrupt_table; > - > - const char *vmlinux; > - struct disk_image **disks; > - int nr_disks; > - > - const char *name; > +struct kvm_ext { > + const char *name; > + int code; > }; > > void kvm__set_dir(const char *fmt, ...); > @@ -64,7 +37,6 @@ void kvm__init_ram(struct kvm *kvm); > void kvm__delete(struct kvm *kvm); > bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, > const char *initrd_filename, const char *kernel_cmdline, u16 vidmode); > -void kvm__setup_bios(struct kvm *kvm); > void kvm__start_timer(struct kvm *kvm); > void kvm__stop_timer(struct kvm *kvm); > void kvm__irq_line(struct kvm *kvm, int irq, int level); > @@ -81,6 +53,13 @@ int kvm__get_sock_by_instance(const char *name); > int kvm__enumerate_instances(int (*callback)(const char *name, int pid)); > void kvm__remove_socket(const char *name); > > +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name); > +void kvm__arch_setup_firmware(struct kvm *kvm); > +bool kvm__arch_cpu_supports_vm(void); > + > +int load_flat_binary(struct kvm *kvm, int fd); > +bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline, u16 vidmode); > + > /* > * Debugging > */ > @@ -98,11 +77,4 @@ static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset) > return kvm->ram_start + offset; > } > > -static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) > -{ > - unsigned long flat = segment_to_flat(selector, offset); > - > - return guest_flat_to_host(kvm, flat); > -} > - > #endif /* KVM__KVM_H */ > diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c > index 7cbc44e..965cfc2 100644 > --- a/tools/kvm/ioport.c > +++ b/tools/kvm/ioport.c > @@ -52,34 +52,6 @@ static int ioport_insert(struct rb_root *root, struct ioport *data) > return rb_int_insert(root, &data->node); > } > > -static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - exit(EXIT_SUCCESS); > -} > - > -static struct ioport_operations debug_ops = { > - .io_out = debug_io_out, > -}; > - > -static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - return true; > -} > - > -static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - return true; > -} > - > -static struct ioport_operations dummy_read_write_ioport_ops = { > - .io_in = dummy_io_in, > - .io_out = dummy_io_out, > -}; > - > -static struct ioport_operations dummy_write_only_ioport_ops = { > - .io_out = dummy_io_out, > -}; > - > u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) > { > struct ioport *entry; > @@ -164,29 +136,3 @@ error: > > return !ioport_debug; > } > - > -void ioport__setup_legacy(void) > -{ > - /* 0x0020 - 0x003F - 8259A PIC 1 */ > - ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); > - > - /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ > - ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); > - > - /* 0x00A0 - 0x00AF - 8259A PIC 2 */ > - ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); > - > - /* PORT 00E0-00EF are 'motherboard specific' so we use them for our > - internal debugging purposes. */ > - ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); > - > - /* PORT 00ED - DUMMY PORT FOR DELAY??? */ > - ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); > - > - /* 0x00F0 - 0x00FF - Math co-processor */ > - ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); > - > - /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ > - ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); > - ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); > -} > diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c > index 0ad6f3b..5aba3bb 100644 > --- a/tools/kvm/kvm-cpu.c > +++ b/tools/kvm/kvm-cpu.c > @@ -4,8 +4,6 @@ > #include "kvm/util.h" > #include "kvm/kvm.h" > > -#include <asm/msr-index.h> > - > #include <sys/ioctl.h> > #include <sys/mman.h> > #include <signal.h> > @@ -14,106 +12,9 @@ > #include <errno.h> > #include <stdio.h> > > -#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) > - > extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; > extern __thread struct kvm_cpu *current_kvm_cpu; > > -static int debug_fd; > - > -void kvm_cpu__set_debug_fd(int fd) > -{ > - debug_fd = fd; > -} > - > -int kvm_cpu__get_debug_fd(void) > -{ > - return debug_fd; > -} > - > -static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) > -{ > - return vcpu->sregs.cr0 & 0x01; > -} > - > -static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) > -{ > - u64 cs; > - > - /* > - * NOTE! We should take code segment base address into account here. > - * Luckily it's usually zero because Linux uses flat memory model. > - */ > - if (is_in_protected_mode(vcpu)) > - return ip; > - > - cs = vcpu->sregs.cs.selector; > - > - return ip + (cs << 4); > -} > - > -static inline u32 selector_to_base(u16 selector) > -{ > - /* > - * KVM on Intel requires 'base' to be 'selector * 16' in real mode. > - */ > - return (u32)selector * 16; > -} > - > -static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) > -{ > - struct kvm_cpu *vcpu; > - > - vcpu = calloc(1, sizeof *vcpu); > - if (!vcpu) > - return NULL; > - > - vcpu->kvm = kvm; > - > - return vcpu; > -} > - > -void kvm_cpu__delete(struct kvm_cpu *vcpu) > -{ > - if (vcpu->msrs) > - free(vcpu->msrs); > - > - free(vcpu); > -} > - > -struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) > -{ > - struct kvm_cpu *vcpu; > - int mmap_size; > - int coalesced_offset; > - > - vcpu = kvm_cpu__new(kvm); > - if (!vcpu) > - return NULL; > - > - vcpu->cpu_id = cpu_id; > - > - vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); > - if (vcpu->vcpu_fd < 0) > - die_perror("KVM_CREATE_VCPU ioctl"); > - > - mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); > - if (mmap_size < 0) > - die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); > - > - vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); > - if (vcpu->kvm_run == MAP_FAILED) > - die("unable to mmap vcpu fd"); > - > - coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); > - if (coalesced_offset) > - vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); > - > - vcpu->is_running = true; > - > - return vcpu; > -} > - > void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) > { > struct kvm_guest_debug debug = { > @@ -124,278 +25,6 @@ void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) > pr_warning("KVM_SET_GUEST_DEBUG failed"); > } > > -static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) > -{ > - struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); > - > - if (!vcpu) > - die("out of memory"); > - > - return vcpu; > -} > - > -#define KVM_MSR_ENTRY(_index, _data) \ > - (struct kvm_msr_entry) { .index = _index, .data = _data } > - > -static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) > -{ > - unsigned long ndx = 0; > - > - vcpu->msrs = kvm_msrs__new(100); > - > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); > -#ifdef CONFIG_X86_64 > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); > -#endif > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, > - MSR_IA32_MISC_ENABLE_FAST_STRING); > - > - vcpu->msrs->nmsrs = ndx; > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) > - die_perror("KVM_SET_MSRS failed"); > -} > - > -static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) > -{ > - vcpu->fpu = (struct kvm_fpu) { > - .fcw = 0x37f, > - .mxcsr = 0x1f80, > - }; > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) > - die_perror("KVM_SET_FPU failed"); > -} > - > -static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) > -{ > - vcpu->regs = (struct kvm_regs) { > - /* We start the guest in 16-bit real mode */ > - .rflags = 0x0000000000000002ULL, > - > - .rip = vcpu->kvm->boot_ip, > - .rsp = vcpu->kvm->boot_sp, > - .rbp = vcpu->kvm->boot_sp, > - }; > - > - if (vcpu->regs.rip > USHRT_MAX) > - die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) > - die_perror("KVM_SET_REGS failed"); > -} > - > -static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) > -{ > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die_perror("KVM_GET_SREGS failed"); > - > - vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.es.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) > - die_perror("KVM_SET_SREGS failed"); > -} > - > -/** > - * kvm_cpu__reset_vcpu - reset virtual CPU to a known state > - */ > -void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) > -{ > - kvm_cpu__setup_sregs(vcpu); > - kvm_cpu__setup_regs(vcpu); > - kvm_cpu__setup_fpu(vcpu); > - kvm_cpu__setup_msrs(vcpu); > -} > - > -static void print_dtable(const char *name, struct kvm_dtable *dtable) > -{ > - dprintf(debug_fd, " %s %016llx %08hx\n", > - name, (u64) dtable->base, (u16) dtable->limit); > -} > - > -static void print_segment(const char *name, struct kvm_segment *seg) > -{ > - dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", > - name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, > - (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); > -} > - > -void kvm_cpu__show_registers(struct kvm_cpu *vcpu) > -{ > - unsigned long cr0, cr2, cr3; > - unsigned long cr4, cr8; > - unsigned long rax, rbx, rcx; > - unsigned long rdx, rsi, rdi; > - unsigned long rbp, r8, r9; > - unsigned long r10, r11, r12; > - unsigned long r13, r14, r15; > - unsigned long rip, rsp; > - struct kvm_sregs sregs; > - unsigned long rflags; > - struct kvm_regs regs; > - int i; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) > - die("KVM_GET_REGS failed"); > - > - rflags = regs.rflags; > - > - rip = regs.rip; rsp = regs.rsp; > - rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; > - rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; > - rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; > - r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; > - r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; > - > - dprintf(debug_fd, "\n Registers:\n"); > - dprintf(debug_fd, " ----------\n"); > - dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); > - dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); > - dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); > - dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); > - dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); > - dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) > - die("KVM_GET_REGS failed"); > - > - cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; > - cr4 = sregs.cr4; cr8 = sregs.cr8; > - > - dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); > - dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); > - dprintf(debug_fd, "\n Segment registers:\n"); > - dprintf(debug_fd, " ------------------\n"); > - dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); > - print_segment("cs ", &sregs.cs); > - print_segment("ss ", &sregs.ss); > - print_segment("ds ", &sregs.ds); > - print_segment("es ", &sregs.es); > - print_segment("fs ", &sregs.fs); > - print_segment("gs ", &sregs.gs); > - print_segment("tr ", &sregs.tr); > - print_segment("ldt", &sregs.ldt); > - print_dtable("gdt", &sregs.gdt); > - print_dtable("idt", &sregs.idt); > - > - dprintf(debug_fd, "\n APIC:\n"); > - dprintf(debug_fd, " -----\n"); > - dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", > - (u64) sregs.efer, (u64) sregs.apic_base, > - (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); > - > - dprintf(debug_fd, "\n Interrupt bitmap:\n"); > - dprintf(debug_fd, " -----------------\n"); > - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) > - dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); > - dprintf(debug_fd, "\n"); > -} > - > -#define MAX_SYM_LEN 128 > - > -void kvm_cpu__show_code(struct kvm_cpu *vcpu) > -{ > - unsigned int code_bytes = 64; > - unsigned int code_prologue = code_bytes * 43 / 64; > - unsigned int code_len = code_bytes; > - char sym[MAX_SYM_LEN]; > - unsigned char c; > - unsigned int i; > - u8 *ip; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) > - die("KVM_GET_REGS failed"); > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die("KVM_GET_SREGS failed"); > - > - ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); > - > - dprintf(debug_fd, "\n Code:\n"); > - dprintf(debug_fd, " -----\n"); > - > - symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); > - > - dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); > - > - for (i = 0; i < code_len; i++, ip++) { > - if (!host_ptr_in_ram(vcpu->kvm, ip)) > - break; > - > - c = *ip; > - > - if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) > - dprintf(debug_fd, " <%02x>", c); > - else > - dprintf(debug_fd, " %02x", c); > - } > - > - dprintf(debug_fd, "\n"); > - > - dprintf(debug_fd, "\n Stack:\n"); > - dprintf(debug_fd, " ------\n"); > - kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); > -} > - > -void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) > -{ > - u64 *pte1; > - u64 *pte2; > - u64 *pte3; > - u64 *pte4; > - > - if (!is_in_protected_mode(vcpu)) > - return; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die("KVM_GET_SREGS failed"); > - > - pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); > - if (!host_ptr_in_ram(vcpu->kvm, pte4)) > - return; > - > - pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte3)) > - return; > - > - pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte2)) > - return; > - > - pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte1)) > - return; > - > - dprintf(debug_fd, "Page Tables:\n"); > - if (*pte2 & (1 << 7)) > - dprintf(debug_fd, " pte4: %016llx pte3: %016llx" > - " pte2: %016llx\n", > - *pte4, *pte3, *pte2); > - else > - dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" > - "llx pte1: %016llx\n", > - *pte4, *pte3, *pte2, *pte1); > -} > - > void kvm_cpu__run(struct kvm_cpu *vcpu) > { > int err; > @@ -454,7 +83,6 @@ int kvm_cpu__start(struct kvm_cpu *cpu) > signal(SIGKVMEXIT, kvm_cpu_signal_handler); > signal(SIGKVMPAUSE, kvm_cpu_signal_handler); > > - kvm_cpu__setup_cpuid(cpu); > kvm_cpu__reset_vcpu(cpu); > > if (cpu->kvm->single_step) > diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c > index 252bd18..7ce1640 100644 > --- a/tools/kvm/kvm.c > +++ b/tools/kvm/kvm.c > @@ -1,10 +1,5 @@ > #include "kvm/kvm.h" > - > -#include "kvm/boot-protocol.h" > -#include "kvm/cpufeature.h" > #include "kvm/read-write.h" > -#include "kvm/interrupt.h" > -#include "kvm/mptable.h" > #include "kvm/util.h" > #include "kvm/mutex.h" > #include "kvm/kvm-cpu.h" > @@ -12,14 +7,11 @@ > > #include <linux/kvm.h> > > -#include <asm/bootparam.h> > - > #include <sys/un.h> > #include <sys/types.h> > #include <sys/socket.h> > #include <sys/ioctl.h> > #include <sys/mman.h> > -#include <sys/stat.h> > #include <stdbool.h> > #include <assert.h> > #include <limits.h> > @@ -58,29 +50,11 @@ const char *kvm_exit_reasons[] = { > DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), > }; > > -#define DEFINE_KVM_EXT(ext) \ > - .name = #ext, \ > - .code = ext > - > -struct { > - const char *name; > - int code; > -} kvm_req_ext[] = { > - { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, > - { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, > - { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, > - { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, > - { DEFINE_KVM_EXT(KVM_CAP_HLT) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, > - { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, > -}; > - > extern struct kvm *kvm; > extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; > static int pause_event; > static DEFINE_MUTEX(pause_lock); > +extern struct kvm_ext kvm_req_ext[]; > > static char kvm_dir[PATH_MAX]; > > @@ -127,7 +101,9 @@ static int kvm__check_extensions(struct kvm *kvm) > { > unsigned int i; > > - for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) { > + for (i = 0; ; i++) { > + if (!kvm_req_ext[i].name) > + break; > if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { > pr_error("Unsuppored KVM extension detected: %s", > kvm_req_ext[i].name); > @@ -261,48 +237,6 @@ void kvm__delete(struct kvm *kvm) > free(kvm); > } > > -static bool kvm__cpu_supports_vm(void) > -{ > - struct cpuid_regs regs; > - u32 eax_base; > - int feature; > - > - regs = (struct cpuid_regs) { > - .eax = 0x00, > - }; > - host_cpuid(®s); > - > - switch (regs.ebx) { > - case CPUID_VENDOR_INTEL_1: > - eax_base = 0x00; > - feature = KVM__X86_FEATURE_VMX; > - break; > - > - case CPUID_VENDOR_AMD_1: > - eax_base = 0x80000000; > - feature = KVM__X86_FEATURE_SVM; > - break; > - > - default: > - return false; > - } > - > - regs = (struct cpuid_regs) { > - .eax = eax_base, > - }; > - host_cpuid(®s); > - > - if (regs.eax < eax_base + 0x01) > - return false; > - > - regs = (struct cpuid_regs) { > - .eax = eax_base + 0x01 > - }; > - host_cpuid(®s); > - > - return regs.ecx & (1 << feature); > -} > - > /* > * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping > * memory regions to it. Therefore, be careful if you use this function for > @@ -325,47 +259,6 @@ void kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspac > die_perror("KVM_SET_USER_MEMORY_REGION ioctl"); > } > > -/* > - * Allocating RAM size bigger than 4GB requires us to leave a gap > - * in the RAM which is used for PCI MMIO, hotplug, and unconfigured > - * devices (see documentation of e820_setup_gap() for details). > - * > - * If we're required to initialize RAM bigger than 4GB, we will create > - * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. > - */ > - > -void kvm__init_ram(struct kvm *kvm) > -{ > - u64 phys_start, phys_size; > - void *host_mem; > - > - if (kvm->ram_size < KVM_32BIT_GAP_START) { > - /* Use a single block of RAM for 32bit RAM */ > - > - phys_start = 0; > - phys_size = kvm->ram_size; > - host_mem = kvm->ram_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - } else { > - /* First RAM range from zero to the PCI gap: */ > - > - phys_start = 0; > - phys_size = KVM_32BIT_GAP_START; > - host_mem = kvm->ram_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - > - /* Second RAM range from 4GB to the end of RAM: */ > - > - phys_start = 0x100000000ULL; > - phys_size = kvm->ram_size - phys_size; > - host_mem = kvm->ram_start + phys_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - } > -} > - > int kvm__recommended_cpus(struct kvm *kvm) > { > int ret; > @@ -410,11 +303,10 @@ int kvm__max_cpus(struct kvm *kvm) > > struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > { > - struct kvm_pit_config pit_config = { .flags = 0, }; > struct kvm *kvm; > int ret; > > - if (!kvm__cpu_supports_vm()) > + if (!kvm__arch_cpu_supports_vm()) > die("Your CPU does not support hardware virtualization"); > > kvm = kvm__new(); > @@ -442,36 +334,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > if (kvm__check_extensions(kvm)) > die("A required KVM extention is not supported by OS"); > > - ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); > - if (ret < 0) > - die_perror("KVM_SET_TSS_ADDR ioctl"); > - > - ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); > - if (ret < 0) > - die_perror("KVM_CREATE_PIT2 ioctl"); > - > - kvm->ram_size = ram_size; > - > - if (kvm->ram_size < KVM_32BIT_GAP_START) { > - kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > - } else { > - kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > - if (kvm->ram_start != MAP_FAILED) { > - /* > - * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that > - * if we accidently write to it, we will know. > - */ > - mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); > - } > - } > - if (kvm->ram_start == MAP_FAILED) > - die("out of memory"); > - > - madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); > - > - ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); > - if (ret < 0) > - die_perror("KVM_CREATE_IRQCHIP ioctl"); > + kvm__arch_init(kvm, kvm_dev, ram_size, name); > > kvm->name = name; > > @@ -480,141 +343,6 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > return kvm; > } > > -#define BOOT_LOADER_SELECTOR 0x1000 > -#define BOOT_LOADER_IP 0x0000 > -#define BOOT_LOADER_SP 0x8000 > -#define BOOT_CMDLINE_OFFSET 0x20000 > - > -#define BOOT_PROTOCOL_REQUIRED 0x206 > -#define LOAD_HIGH 0x01 > - > -static int load_flat_binary(struct kvm *kvm, int fd) > -{ > - void *p; > - int nr; > - > - if (lseek(fd, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > - > - while ((nr = read(fd, p, 65536)) > 0) > - p += nr; > - > - kvm->boot_selector = BOOT_LOADER_SELECTOR; > - kvm->boot_ip = BOOT_LOADER_IP; > - kvm->boot_sp = BOOT_LOADER_SP; > - > - return true; > -} > - > -static const char *BZIMAGE_MAGIC = "HdrS"; > - > -static bool load_bzimage(struct kvm *kvm, int fd_kernel, > - int fd_initrd, const char *kernel_cmdline, u16 vidmode) > -{ > - struct boot_params *kern_boot; > - unsigned long setup_sects; > - struct boot_params boot; > - size_t cmdline_size; > - ssize_t setup_size; > - void *p; > - int nr; > - > - /* > - * See Documentation/x86/boot.txt for details no bzImage on-disk and > - * memory layout. > - */ > - > - if (lseek(fd_kernel, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) > - return false; > - > - if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) > - return false; > - > - if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) > - die("Too old kernel"); > - > - if (lseek(fd_kernel, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - if (!boot.hdr.setup_sects) > - boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; > - setup_sects = boot.hdr.setup_sects + 1; > - > - setup_size = setup_sects << 9; > - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > - > - /* copy setup.bin to mem*/ > - if (read(fd_kernel, p, setup_size) != setup_size) > - die_perror("read"); > - > - /* copy vmlinux.bin to BZ_KERNEL_START*/ > - p = guest_flat_to_host(kvm, BZ_KERNEL_START); > - > - while ((nr = read(fd_kernel, p, 65536)) > 0) > - p += nr; > - > - p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); > - if (kernel_cmdline) { > - cmdline_size = strlen(kernel_cmdline) + 1; > - if (cmdline_size > boot.hdr.cmdline_size) > - cmdline_size = boot.hdr.cmdline_size; > - > - memset(p, 0, boot.hdr.cmdline_size); > - memcpy(p, kernel_cmdline, cmdline_size - 1); > - } > - > - kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); > - > - kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; > - kern_boot->hdr.type_of_loader = 0xff; > - kern_boot->hdr.heap_end_ptr = 0xfe00; > - kern_boot->hdr.loadflags |= CAN_USE_HEAP; > - kern_boot->hdr.vid_mode = vidmode; > - > - /* > - * Read initrd image into guest memory > - */ > - if (fd_initrd >= 0) { > - struct stat initrd_stat; > - unsigned long addr; > - > - if (fstat(fd_initrd, &initrd_stat)) > - die_perror("fstat"); > - > - addr = boot.hdr.initrd_addr_max & ~0xfffff; > - for (;;) { > - if (addr < BZ_KERNEL_START) > - die("Not enough memory for initrd"); > - else if (addr < (kvm->ram_size - initrd_stat.st_size)) > - break; > - addr -= 0x100000; > - } > - > - p = guest_flat_to_host(kvm, addr); > - nr = read(fd_initrd, p, initrd_stat.st_size); > - if (nr != initrd_stat.st_size) > - die("Failed to read initrd"); > - > - kern_boot->hdr.ramdisk_image = addr; > - kern_boot->hdr.ramdisk_size = initrd_stat.st_size; > - } > - > - kvm->boot_selector = BOOT_LOADER_SELECTOR; > - /* > - * The real-mode setup code starts at offset 0x200 of a bzImage. See > - * Documentation/x86/boot.txt for details. > - */ > - kvm->boot_ip = BOOT_LOADER_IP + 0x200; > - kvm->boot_sp = BOOT_LOADER_SP; > - > - return true; > -} > - > /* RFC 1952 */ > #define GZIP_ID1 0x1f > #define GZIP_ID2 0x8b > @@ -675,24 +403,6 @@ found_kernel: > return ret; > } > > -/** > - * kvm__setup_bios - inject BIOS into guest system memory > - * @kvm - guest system descriptor > - * > - * This function is a main routine where we poke guest memory > - * and install BIOS there. > - */ > -void kvm__setup_bios(struct kvm *kvm) > -{ > - /* standart minimal configuration */ > - setup_bios(kvm); > - > - /* FIXME: SMP, ACPI and friends here */ > - > - /* MP table */ > - mptable_setup(kvm, kvm->nrcpus); > -} > - > #define TIMER_INTERVAL_NS 1000000 /* 1 msec */ > > /* > @@ -732,27 +442,6 @@ void kvm__stop_timer(struct kvm *kvm) > kvm->timerid = 0; > } > > -void kvm__irq_line(struct kvm *kvm, int irq, int level) > -{ > - struct kvm_irq_level irq_level; > - > - irq_level = (struct kvm_irq_level) { > - { > - .irq = irq, > - }, > - .level = level, > - }; > - > - if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) > - die_perror("KVM_IRQ_LINE failed"); > -} > - > -void kvm__irq_trigger(struct kvm *kvm, int irq) > -{ > - kvm__irq_line(kvm, irq, 1); > - kvm__irq_line(kvm, irq, 0); > -} > - > void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size) > { > unsigned char *p; > diff --git a/tools/kvm/bios.c b/tools/kvm/x86/bios.c > similarity index 100% > rename from tools/kvm/bios.c > rename to tools/kvm/x86/bios.c > diff --git a/tools/kvm/bios/.gitignore b/tools/kvm/x86/bios/.gitignore > similarity index 100% > rename from tools/kvm/bios/.gitignore > rename to tools/kvm/x86/bios/.gitignore > diff --git a/tools/kvm/bios/bios-rom.S b/tools/kvm/x86/bios/bios-rom.S > similarity index 80% > rename from tools/kvm/bios/bios-rom.S > rename to tools/kvm/x86/bios/bios-rom.S > index dc52b1e..3269ce9 100644 > --- a/tools/kvm/bios/bios-rom.S > +++ b/tools/kvm/x86/bios/bios-rom.S > @@ -8,5 +8,5 @@ > #endif > > GLOBAL(bios_rom) > - .incbin "bios/bios.bin" > + .incbin "x86/bios/bios.bin" > END(bios_rom) > diff --git a/tools/kvm/bios/e820.c b/tools/kvm/x86/bios/e820.c > similarity index 100% > rename from tools/kvm/bios/e820.c > rename to tools/kvm/x86/bios/e820.c > diff --git a/tools/kvm/bios/entry.S b/tools/kvm/x86/bios/entry.S > similarity index 100% > rename from tools/kvm/bios/entry.S > rename to tools/kvm/x86/bios/entry.S > diff --git a/tools/kvm/bios/gen-offsets.sh b/tools/kvm/x86/bios/gen-offsets.sh > similarity index 100% > rename from tools/kvm/bios/gen-offsets.sh > rename to tools/kvm/x86/bios/gen-offsets.sh > diff --git a/tools/kvm/bios/int10.c b/tools/kvm/x86/bios/int10.c > similarity index 100% > rename from tools/kvm/bios/int10.c > rename to tools/kvm/x86/bios/int10.c > diff --git a/tools/kvm/bios/int15.c b/tools/kvm/x86/bios/int15.c > similarity index 100% > rename from tools/kvm/bios/int15.c > rename to tools/kvm/x86/bios/int15.c > diff --git a/tools/kvm/bios/local.S b/tools/kvm/x86/bios/local.S > similarity index 100% > rename from tools/kvm/bios/local.S > rename to tools/kvm/x86/bios/local.S > diff --git a/tools/kvm/bios/macro.S b/tools/kvm/x86/bios/macro.S > similarity index 100% > rename from tools/kvm/bios/macro.S > rename to tools/kvm/x86/bios/macro.S > diff --git a/tools/kvm/bios/memcpy.c b/tools/kvm/x86/bios/memcpy.c > similarity index 100% > rename from tools/kvm/bios/memcpy.c > rename to tools/kvm/x86/bios/memcpy.c > diff --git a/tools/kvm/bios/rom.ld.S b/tools/kvm/x86/bios/rom.ld.S > similarity index 100% > rename from tools/kvm/bios/rom.ld.S > rename to tools/kvm/x86/bios/rom.ld.S > diff --git a/tools/kvm/cpuid.c b/tools/kvm/x86/cpuid.c > similarity index 100% > rename from tools/kvm/cpuid.c > rename to tools/kvm/x86/cpuid.c > diff --git a/tools/kvm/include/kvm/assembly.h b/tools/kvm/x86/include/kvm/assembly.h > similarity index 100% > rename from tools/kvm/include/kvm/assembly.h > rename to tools/kvm/x86/include/kvm/assembly.h > diff --git a/tools/kvm/include/kvm/barrier.h b/tools/kvm/x86/include/kvm/barrier.h > similarity index 100% > rename from tools/kvm/include/kvm/barrier.h > rename to tools/kvm/x86/include/kvm/barrier.h > diff --git a/tools/kvm/include/kvm/bios-export.h b/tools/kvm/x86/include/kvm/bios-export.h > similarity index 100% > rename from tools/kvm/include/kvm/bios-export.h > rename to tools/kvm/x86/include/kvm/bios-export.h > diff --git a/tools/kvm/include/kvm/bios.h b/tools/kvm/x86/include/kvm/bios.h > similarity index 100% > rename from tools/kvm/include/kvm/bios.h > rename to tools/kvm/x86/include/kvm/bios.h > diff --git a/tools/kvm/include/kvm/boot-protocol.h b/tools/kvm/x86/include/kvm/boot-protocol.h > similarity index 100% > rename from tools/kvm/include/kvm/boot-protocol.h > rename to tools/kvm/x86/include/kvm/boot-protocol.h > diff --git a/tools/kvm/include/kvm/cpufeature.h b/tools/kvm/x86/include/kvm/cpufeature.h > similarity index 100% > rename from tools/kvm/include/kvm/cpufeature.h > rename to tools/kvm/x86/include/kvm/cpufeature.h > diff --git a/tools/kvm/include/kvm/interrupt.h b/tools/kvm/x86/include/kvm/interrupt.h > similarity index 100% > rename from tools/kvm/include/kvm/interrupt.h > rename to tools/kvm/x86/include/kvm/interrupt.h > diff --git a/tools/kvm/x86/include/kvm/kvm-arch.h b/tools/kvm/x86/include/kvm/kvm-arch.h > new file mode 100644 > index 0000000..02aa8b9 > --- /dev/null > +++ b/tools/kvm/x86/include/kvm/kvm-arch.h > @@ -0,0 +1,59 @@ > +#ifndef KVM__KVM_ARCH_H > +#define KVM__KVM_ARCH_H > + > +#include "kvm/interrupt.h" > +#include "kvm/segment.h" > + > +#include <stdbool.h> > +#include <linux/types.h> > +#include <time.h> > + > +#define KVM_NR_CPUS (255) > + > +/* > + * The hole includes VESA framebuffer and PCI memory. > + */ > +#define KVM_32BIT_GAP_SIZE (768 << 20) > +#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) > + > +#define KVM_MMIO_START KVM_32BIT_GAP_START > + > +struct kvm { > + int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ > + int vm_fd; /* For VM ioctls() */ > + timer_t timerid; /* Posix timer for interrupts */ > + > + int nrcpus; /* Number of cpus to run */ > + > + u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ > + > + u64 ram_size; > + void *ram_start; > + > + bool nmi_disabled; > + > + bool single_step; > + > + u16 boot_selector; > + u16 boot_ip; > + u16 boot_sp; > + > + struct interrupt_table interrupt_table; > + > + const char *vmlinux; > + struct disk_image **disks; > + int nr_disks; > + > + const char *name; > +}; > + > +static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset); /* In kvm.h */ > + > +static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) > +{ > + unsigned long flat = segment_to_flat(selector, offset); > + > + return guest_flat_to_host(kvm, flat); > +} > + > +#endif /* KVM__KVM_ARCH_H */ > diff --git a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h > new file mode 100644 > index 0000000..ed1c727 > --- /dev/null > +++ b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h > @@ -0,0 +1,33 @@ > +#ifndef KVM__KVM_CPU_ARCH_H > +#define KVM__KVM_CPU_ARCH_H > + > +/* Architecture-specific kvm_cpu definitions. */ > + > +#include <linux/kvm.h> /* for struct kvm_regs */ > + > +#include <pthread.h> > + > +struct kvm; > + > +struct kvm_cpu { > + pthread_t thread; /* VCPU thread */ > + > + unsigned long cpu_id; > + > + struct kvm *kvm; /* parent KVM */ > + int vcpu_fd; /* For VCPU ioctls() */ > + struct kvm_run *kvm_run; > + > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_fpu fpu; > + > + struct kvm_msrs *msrs; /* dynamically allocated */ > + > + u8 is_running; > + u8 paused; > + > + struct kvm_coalesced_mmio_ring *ring; > +}; > + > +#endif /* KVM__KVM_CPU_ARCH_H */ > diff --git a/tools/kvm/include/kvm/mptable.h b/tools/kvm/x86/include/kvm/mptable.h > similarity index 100% > rename from tools/kvm/include/kvm/mptable.h > rename to tools/kvm/x86/include/kvm/mptable.h > diff --git a/tools/kvm/interrupt.c b/tools/kvm/x86/interrupt.c > similarity index 100% > rename from tools/kvm/interrupt.c > rename to tools/kvm/x86/interrupt.c > diff --git a/tools/kvm/x86/ioport.c b/tools/kvm/x86/ioport.c > new file mode 100644 > index 0000000..8a91bf2 > --- /dev/null > +++ b/tools/kvm/x86/ioport.c > @@ -0,0 +1,59 @@ > +#include "kvm/ioport.h" > + > +#include <stdlib.h> > + > +static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + exit(EXIT_SUCCESS); > +} > + > +static struct ioport_operations debug_ops = { > + .io_out = debug_io_out, > +}; > + > +static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + return true; > +} > + > +static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + return true; > +} > + > +static struct ioport_operations dummy_read_write_ioport_ops = { > + .io_in = dummy_io_in, > + .io_out = dummy_io_out, > +}; > + > +static struct ioport_operations dummy_write_only_ioport_ops = { > + .io_out = dummy_io_out, > +}; > + > +void ioport__setup_arch(void) > +{ > + /* Legacy ioport setup */ > + > + /* 0x0020 - 0x003F - 8259A PIC 1 */ > + ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); > + > + /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ > + ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); > + > + /* 0x00A0 - 0x00AF - 8259A PIC 2 */ > + ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); > + > + /* PORT 00E0-00EF are 'motherboard specific' so we use them for our > + internal debugging purposes. */ > + ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); > + > + /* PORT 00ED - DUMMY PORT FOR DELAY??? */ > + ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); > + > + /* 0x00F0 - 0x00FF - Math co-processor */ > + ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); > + > + /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ > + ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); > + ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); > +} > diff --git a/tools/kvm/irq.c b/tools/kvm/x86/irq.c > similarity index 100% > rename from tools/kvm/irq.c > rename to tools/kvm/x86/irq.c > diff --git a/tools/kvm/x86/kvm-cpu.c b/tools/kvm/x86/kvm-cpu.c > new file mode 100644 > index 0000000..b26b208 > --- /dev/null > +++ b/tools/kvm/x86/kvm-cpu.c > @@ -0,0 +1,383 @@ > +#include "kvm/kvm-cpu.h" > + > +#include "kvm/symbol.h" > +#include "kvm/util.h" > +#include "kvm/kvm.h" > + > +#include <asm/msr-index.h> > + > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <signal.h> > +#include <stdlib.h> > +#include <string.h> > +#include <errno.h> > +#include <stdio.h> > + > +static int debug_fd; > + > +void kvm_cpu__set_debug_fd(int fd) > +{ > + debug_fd = fd; > +} > + > +int kvm_cpu__get_debug_fd(void) > +{ > + return debug_fd; > +} > + > +static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) > +{ > + return vcpu->sregs.cr0 & 0x01; > +} > + > +static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) > +{ > + u64 cs; > + > + /* > + * NOTE! We should take code segment base address into account here. > + * Luckily it's usually zero because Linux uses flat memory model. > + */ > + if (is_in_protected_mode(vcpu)) > + return ip; > + > + cs = vcpu->sregs.cs.selector; > + > + return ip + (cs << 4); > +} > + > +static inline u32 selector_to_base(u16 selector) > +{ > + /* > + * KVM on Intel requires 'base' to be 'selector * 16' in real mode. > + */ > + return (u32)selector * 16; > +} > + > +static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) > +{ > + struct kvm_cpu *vcpu; > + > + vcpu = calloc(1, sizeof *vcpu); > + if (!vcpu) > + return NULL; > + > + vcpu->kvm = kvm; > + > + return vcpu; > +} > + > +void kvm_cpu__delete(struct kvm_cpu *vcpu) > +{ > + if (vcpu->msrs) > + free(vcpu->msrs); > + > + free(vcpu); > +} > + > +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) > +{ > + struct kvm_cpu *vcpu; > + int mmap_size; > + int coalesced_offset; > + > + vcpu = kvm_cpu__new(kvm); > + if (!vcpu) > + return NULL; > + > + vcpu->cpu_id = cpu_id; > + > + vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); > + if (vcpu->vcpu_fd < 0) > + die_perror("KVM_CREATE_VCPU ioctl"); > + > + mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); > + if (mmap_size < 0) > + die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); > + > + vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); > + if (vcpu->kvm_run == MAP_FAILED) > + die("unable to mmap vcpu fd"); > + > + coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); > + if (coalesced_offset) > + vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); > + > + vcpu->is_running = true; > + > + return vcpu; > +} > + > +static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) > +{ > + struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); > + > + if (!vcpu) > + die("out of memory"); > + > + return vcpu; > +} > + > +#define KVM_MSR_ENTRY(_index, _data) \ > + (struct kvm_msr_entry) { .index = _index, .data = _data } > + > +static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) > +{ > + unsigned long ndx = 0; > + > + vcpu->msrs = kvm_msrs__new(100); > + > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); > +#ifdef CONFIG_X86_64 > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); > +#endif > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, > + MSR_IA32_MISC_ENABLE_FAST_STRING); > + > + vcpu->msrs->nmsrs = ndx; > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) > + die_perror("KVM_SET_MSRS failed"); > +} > + > +static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) > +{ > + vcpu->fpu = (struct kvm_fpu) { > + .fcw = 0x37f, > + .mxcsr = 0x1f80, > + }; > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) > + die_perror("KVM_SET_FPU failed"); > +} > + > +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) > +{ > + vcpu->regs = (struct kvm_regs) { > + /* We start the guest in 16-bit real mode */ > + .rflags = 0x0000000000000002ULL, > + > + .rip = vcpu->kvm->boot_ip, > + .rsp = vcpu->kvm->boot_sp, > + .rbp = vcpu->kvm->boot_sp, > + }; > + > + if (vcpu->regs.rip > USHRT_MAX) > + die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) > + die_perror("KVM_SET_REGS failed"); > +} > + > +static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) > +{ > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die_perror("KVM_GET_SREGS failed"); > + > + vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.es.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) > + die_perror("KVM_SET_SREGS failed"); > +} > + > +/** > + * kvm_cpu__reset_vcpu - reset virtual CPU to a known state > + */ > +void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) > +{ > + kvm_cpu__setup_cpuid(vcpu); > + kvm_cpu__setup_sregs(vcpu); > + kvm_cpu__setup_regs(vcpu); > + kvm_cpu__setup_fpu(vcpu); > + kvm_cpu__setup_msrs(vcpu); > +} > + > +static void print_dtable(const char *name, struct kvm_dtable *dtable) > +{ > + dprintf(debug_fd, " %s %016llx %08hx\n", > + name, (u64) dtable->base, (u16) dtable->limit); > +} > + > +static void print_segment(const char *name, struct kvm_segment *seg) > +{ > + dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", > + name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, > + (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); > +} > + > +void kvm_cpu__show_registers(struct kvm_cpu *vcpu) > +{ > + unsigned long cr0, cr2, cr3; > + unsigned long cr4, cr8; > + unsigned long rax, rbx, rcx; > + unsigned long rdx, rsi, rdi; > + unsigned long rbp, r8, r9; > + unsigned long r10, r11, r12; > + unsigned long r13, r14, r15; > + unsigned long rip, rsp; > + struct kvm_sregs sregs; > + unsigned long rflags; > + struct kvm_regs regs; > + int i; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) > + die("KVM_GET_REGS failed"); > + > + rflags = regs.rflags; > + > + rip = regs.rip; rsp = regs.rsp; > + rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; > + rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; > + rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; > + r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; > + r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; > + > + dprintf(debug_fd, "\n Registers:\n"); > + dprintf(debug_fd, " ----------\n"); > + dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); > + dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); > + dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); > + dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); > + dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); > + dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) > + die("KVM_GET_REGS failed"); > + > + cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; > + cr4 = sregs.cr4; cr8 = sregs.cr8; > + > + dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); > + dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); > + dprintf(debug_fd, "\n Segment registers:\n"); > + dprintf(debug_fd, " ------------------\n"); > + dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); > + print_segment("cs ", &sregs.cs); > + print_segment("ss ", &sregs.ss); > + print_segment("ds ", &sregs.ds); > + print_segment("es ", &sregs.es); > + print_segment("fs ", &sregs.fs); > + print_segment("gs ", &sregs.gs); > + print_segment("tr ", &sregs.tr); > + print_segment("ldt", &sregs.ldt); > + print_dtable("gdt", &sregs.gdt); > + print_dtable("idt", &sregs.idt); > + > + dprintf(debug_fd, "\n APIC:\n"); > + dprintf(debug_fd, " -----\n"); > + dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", > + (u64) sregs.efer, (u64) sregs.apic_base, > + (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); > + > + dprintf(debug_fd, "\n Interrupt bitmap:\n"); > + dprintf(debug_fd, " -----------------\n"); > + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) > + dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); > + dprintf(debug_fd, "\n"); > +} > + > +#define MAX_SYM_LEN 128 > + > +void kvm_cpu__show_code(struct kvm_cpu *vcpu) > +{ > + unsigned int code_bytes = 64; > + unsigned int code_prologue = code_bytes * 43 / 64; > + unsigned int code_len = code_bytes; > + char sym[MAX_SYM_LEN]; > + unsigned char c; > + unsigned int i; > + u8 *ip; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) > + die("KVM_GET_REGS failed"); > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die("KVM_GET_SREGS failed"); > + > + ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); > + > + dprintf(debug_fd, "\n Code:\n"); > + dprintf(debug_fd, " -----\n"); > + > + symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); > + > + dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); > + > + for (i = 0; i < code_len; i++, ip++) { > + if (!host_ptr_in_ram(vcpu->kvm, ip)) > + break; > + > + c = *ip; > + > + if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) > + dprintf(debug_fd, " <%02x>", c); > + else > + dprintf(debug_fd, " %02x", c); > + } > + > + dprintf(debug_fd, "\n"); > + > + dprintf(debug_fd, "\n Stack:\n"); > + dprintf(debug_fd, " ------\n"); > + kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); > +} > + > +void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) > +{ > + u64 *pte1; > + u64 *pte2; > + u64 *pte3; > + u64 *pte4; > + > + if (!is_in_protected_mode(vcpu)) > + return; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die("KVM_GET_SREGS failed"); > + > + pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); > + if (!host_ptr_in_ram(vcpu->kvm, pte4)) > + return; > + > + pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte3)) > + return; > + > + pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte2)) > + return; > + > + pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte1)) > + return; > + > + dprintf(debug_fd, "Page Tables:\n"); > + if (*pte2 & (1 << 7)) > + dprintf(debug_fd, " pte4: %016llx pte3: %016llx" > + " pte2: %016llx\n", > + *pte4, *pte3, *pte2); > + else > + dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" > + "llx pte1: %016llx\n", > + *pte4, *pte3, *pte2, *pte1); > +} > diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c > new file mode 100644 > index 0000000..ac6c91e > --- /dev/null > +++ b/tools/kvm/x86/kvm.c > @@ -0,0 +1,330 @@ > +#include "kvm/kvm.h" > +#include "kvm/boot-protocol.h" > +#include "kvm/cpufeature.h" > +#include "kvm/interrupt.h" > +#include "kvm/mptable.h" > +#include "kvm/util.h" > + > +#include <asm/bootparam.h> > +#include <linux/kvm.h> > + > +#include <sys/types.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/stat.h> > +#include <stdbool.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <stdio.h> > +#include <fcntl.h> > +#include <asm/unistd.h> > + > +struct kvm_ext kvm_req_ext[] = { > + { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, > + { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, > + { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, > + { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, > + { DEFINE_KVM_EXT(KVM_CAP_HLT) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, > + { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, > + { 0, 0 } > +}; > + > +bool kvm__arch_cpu_supports_vm(void) > +{ > + struct cpuid_regs regs; > + u32 eax_base; > + int feature; > + > + regs = (struct cpuid_regs) { > + .eax = 0x00, > + }; > + host_cpuid(®s); > + > + switch (regs.ebx) { > + case CPUID_VENDOR_INTEL_1: > + eax_base = 0x00; > + feature = KVM__X86_FEATURE_VMX; > + break; > + > + case CPUID_VENDOR_AMD_1: > + eax_base = 0x80000000; > + feature = KVM__X86_FEATURE_SVM; > + break; > + > + default: > + return false; > + } > + > + regs = (struct cpuid_regs) { > + .eax = eax_base, > + }; > + host_cpuid(®s); > + > + if (regs.eax < eax_base + 0x01) > + return false; > + > + regs = (struct cpuid_regs) { > + .eax = eax_base + 0x01 > + }; > + host_cpuid(®s); > + > + return regs.ecx & (1 << feature); > +} > + > +/* > + * Allocating RAM size bigger than 4GB requires us to leave a gap > + * in the RAM which is used for PCI MMIO, hotplug, and unconfigured > + * devices (see documentation of e820_setup_gap() for details). > + * > + * If we're required to initialize RAM bigger than 4GB, we will create > + * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. > + */ > + > +void kvm__init_ram(struct kvm *kvm) > +{ > + u64 phys_start, phys_size; > + void *host_mem; > + > + if (kvm->ram_size < KVM_32BIT_GAP_START) { > + /* Use a single block of RAM for 32bit RAM */ > + > + phys_start = 0; > + phys_size = kvm->ram_size; > + host_mem = kvm->ram_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + } else { > + /* First RAM range from zero to the PCI gap: */ > + > + phys_start = 0; > + phys_size = KVM_32BIT_GAP_START; > + host_mem = kvm->ram_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + > + /* Second RAM range from 4GB to the end of RAM: */ > + > + phys_start = 0x100000000ULL; > + phys_size = kvm->ram_size - phys_size; > + host_mem = kvm->ram_start + phys_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + } > +} > + > +/* Architecture-specific KVM init */ > +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) > +{ > + struct kvm_pit_config pit_config = { .flags = 0, }; > + int ret; > + > + ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); > + if (ret < 0) > + die_perror("KVM_SET_TSS_ADDR ioctl"); > + > + ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); > + if (ret < 0) > + die_perror("KVM_CREATE_PIT2 ioctl"); > + > + kvm->ram_size = ram_size; > + > + if (kvm->ram_size < KVM_32BIT_GAP_START) { > + kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > + } else { > + kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > + if (kvm->ram_start != MAP_FAILED) { > + /* > + * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that > + * if we accidently write to it, we will know. > + */ > + mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); > + } > + } > + if (kvm->ram_start == MAP_FAILED) > + die("out of memory"); > + > + madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); > + > + ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); > + if (ret < 0) > + die_perror("KVM_CREATE_IRQCHIP ioctl"); > +} > + > +void kvm__irq_line(struct kvm *kvm, int irq, int level) > +{ > + struct kvm_irq_level irq_level; > + > + irq_level = (struct kvm_irq_level) { > + { > + .irq = irq, > + }, > + .level = level, > + }; > + > + if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) > + die_perror("KVM_IRQ_LINE failed"); > +} > + > +void kvm__irq_trigger(struct kvm *kvm, int irq) > +{ > + kvm__irq_line(kvm, irq, 1); > + kvm__irq_line(kvm, irq, 0); > +} > + > +#define BOOT_LOADER_SELECTOR 0x1000 > +#define BOOT_LOADER_IP 0x0000 > +#define BOOT_LOADER_SP 0x8000 > +#define BOOT_CMDLINE_OFFSET 0x20000 > + > +#define BOOT_PROTOCOL_REQUIRED 0x206 > +#define LOAD_HIGH 0x01 > + > +int load_flat_binary(struct kvm *kvm, int fd) > +{ > + void *p; > + int nr; > + > + if (lseek(fd, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > + > + while ((nr = read(fd, p, 65536)) > 0) > + p += nr; > + > + kvm->boot_selector = BOOT_LOADER_SELECTOR; > + kvm->boot_ip = BOOT_LOADER_IP; > + kvm->boot_sp = BOOT_LOADER_SP; > + > + return true; > +} > + > +static const char *BZIMAGE_MAGIC = "HdrS"; > + > +bool load_bzimage(struct kvm *kvm, int fd_kernel, > + int fd_initrd, const char *kernel_cmdline, u16 vidmode) > +{ > + struct boot_params *kern_boot; > + unsigned long setup_sects; > + struct boot_params boot; > + size_t cmdline_size; > + ssize_t setup_size; > + void *p; > + int nr; > + > + /* > + * See Documentation/x86/boot.txt for details no bzImage on-disk and > + * memory layout. > + */ > + > + if (lseek(fd_kernel, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) > + return false; > + > + if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) > + return false; > + > + if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) > + die("Too old kernel"); > + > + if (lseek(fd_kernel, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + if (!boot.hdr.setup_sects) > + boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; > + setup_sects = boot.hdr.setup_sects + 1; > + > + setup_size = setup_sects << 9; > + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > + > + /* copy setup.bin to mem*/ > + if (read(fd_kernel, p, setup_size) != setup_size) > + die_perror("read"); > + > + /* copy vmlinux.bin to BZ_KERNEL_START*/ > + p = guest_flat_to_host(kvm, BZ_KERNEL_START); > + > + while ((nr = read(fd_kernel, p, 65536)) > 0) > + p += nr; > + > + p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); > + if (kernel_cmdline) { > + cmdline_size = strlen(kernel_cmdline) + 1; > + if (cmdline_size > boot.hdr.cmdline_size) > + cmdline_size = boot.hdr.cmdline_size; > + > + memset(p, 0, boot.hdr.cmdline_size); > + memcpy(p, kernel_cmdline, cmdline_size - 1); > + } > + > + kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); > + > + kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; > + kern_boot->hdr.type_of_loader = 0xff; > + kern_boot->hdr.heap_end_ptr = 0xfe00; > + kern_boot->hdr.loadflags |= CAN_USE_HEAP; > + kern_boot->hdr.vid_mode = vidmode; > + > + /* > + * Read initrd image into guest memory > + */ > + if (fd_initrd >= 0) { > + struct stat initrd_stat; > + unsigned long addr; > + > + if (fstat(fd_initrd, &initrd_stat)) > + die_perror("fstat"); > + > + addr = boot.hdr.initrd_addr_max & ~0xfffff; > + for (;;) { > + if (addr < BZ_KERNEL_START) > + die("Not enough memory for initrd"); > + else if (addr < (kvm->ram_size - initrd_stat.st_size)) > + break; > + addr -= 0x100000; > + } > + > + p = guest_flat_to_host(kvm, addr); > + nr = read(fd_initrd, p, initrd_stat.st_size); > + if (nr != initrd_stat.st_size) > + die("Failed to read initrd"); > + > + kern_boot->hdr.ramdisk_image = addr; > + kern_boot->hdr.ramdisk_size = initrd_stat.st_size; > + } > + > + kvm->boot_selector = BOOT_LOADER_SELECTOR; > + /* > + * The real-mode setup code starts at offset 0x200 of a bzImage. See > + * Documentation/x86/boot.txt for details. > + */ > + kvm->boot_ip = BOOT_LOADER_IP + 0x200; > + kvm->boot_sp = BOOT_LOADER_SP; > + > + return true; > +} > + > +/** > + * kvm__arch_setup_firmware - inject BIOS into guest system memory > + * @kvm - guest system descriptor > + * > + * This function is a main routine where we poke guest memory > + * and install BIOS there. > + */ > +void kvm__arch_setup_firmware(struct kvm *kvm) > +{ > + /* standart minimal configuration */ > + setup_bios(kvm); > + > + /* FIXME: SMP, ACPI and friends here */ > + > + /* MP table */ > + mptable_setup(kvm, kvm->nrcpus); > +} > diff --git a/tools/kvm/mptable.c b/tools/kvm/x86/mptable.c > similarity index 100% > rename from tools/kvm/mptable.c > rename to tools/kvm/x86/mptable.c > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Dec 6, 2011 at 10:07 AM, Sasha Levin <levinsasha928@gmail.com> wrote: > The code doesn't build after this patch due to missing header issues > which you fixed in patches #10 & #11. Could you please move those two to > the beginning of the series for the sake of bisectablilty? I did that myself. Patches 10, 11, and 1 applied, thanks! -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index bb5f6b0..243886e 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -33,13 +33,11 @@ OBJS += builtin-run.o OBJS += builtin-setup.o OBJS += builtin-stop.o OBJS += builtin-version.o -OBJS += cpuid.o OBJS += disk/core.o OBJS += framebuffer.o OBJS += guest_compat.o OBJS += hw/rtc.o OBJS += hw/serial.o -OBJS += interrupt.o OBJS += ioport.o OBJS += kvm-cpu.o OBJS += kvm.o @@ -61,7 +59,6 @@ OBJS += disk/blk.o OBJS += disk/qcow.o OBJS += disk/raw.o OBJS += ioeventfd.o -OBJS += irq.o OBJS += net/uip/core.o OBJS += net/uip/arp.o OBJS += net/uip/icmp.o @@ -72,7 +69,6 @@ OBJS += net/uip/buf.o OBJS += net/uip/csum.o OBJS += net/uip/dhcp.o OBJS += kvm-cmd.o -OBJS += mptable.o OBJS += rbtree.o OBJS += threadpool.o OBJS += util/parse-options.o @@ -123,12 +119,6 @@ ifeq ($(has_AIO),y) LIBS += -laio endif -DEPS := $(patsubst %.o,%.d,$(OBJS)) - -# Exclude BIOS object files from header dependencies. -OBJS += bios.o -OBJS += bios/bios-rom.o - LIBS += -lrt LIBS += -lpthread LIBS += -lutil @@ -150,12 +140,43 @@ ifeq ($(uname_M),x86_64) DEFINES += -DCONFIG_X86_64 endif + +### Arch-specific stuff + +#x86 +ifeq ($(ARCH),x86) + DEFINES += -DCONFIG_X86 + OBJS += x86/cpuid.o + OBJS += x86/interrupt.o + OBJS += x86/ioport.o + OBJS += x86/irq.o + OBJS += x86/kvm.o + OBJS += x86/kvm-cpu.o + OBJS += x86/mptable.o +# Exclude BIOS object files from header dependencies. + OTHEROBJS += x86/bios.o + OTHEROBJS += x86/bios/bios-rom.o + ARCH_INCLUDE := x86/include +endif + +### + +ifeq (,$(ARCH_INCLUDE)) + UNSUPP_ERR = @echo "This architecture is not supported in kvmtool." && exit 1 +else + UNSUPP_ERR = +endif + +DEPS := $(patsubst %.o,%.d,$(OBJS)) +OBJS += $(OTHEROBJS) + DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"' +DEFINES += -DBUILD_ARCH='"$(ARCH)"' KVM_INCLUDE := include -CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g +CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g ifneq ($(WERROR),0) WARNINGS += -Werror @@ -179,7 +200,10 @@ WARNINGS += -Wwrite-strings CFLAGS += $(WARNINGS) -all: $(PROGRAM) $(GUEST_INIT) +all: arch_support_check $(PROGRAM) $(GUEST_INIT) + +arch_support_check: + $(UNSUPP_ERR) KVMTOOLS-VERSION-FILE: @$(SHELL_PATH) util/KVMTOOLS-VERSION-GEN $(OUTPUT) @@ -227,33 +251,33 @@ BIOS_CFLAGS += -mregparm=3 BIOS_CFLAGS += -fno-stack-protector BIOS_CFLAGS += -I../../arch/$(ARCH) -bios.o: bios/bios.bin bios/bios-rom.h - -bios/bios.bin.elf: bios/entry.S bios/e820.c bios/int10.c bios/int15.c bios/rom.ld.S - $(E) " CC bios/memcpy.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/memcpy.c -o bios/memcpy.o - $(E) " CC bios/e820.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/e820.c -o bios/e820.o - $(E) " CC bios/int10.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int10.c -o bios/int10.o - $(E) " CC bios/int15.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int15.c -o bios/int15.o - $(E) " CC bios/entry.o" - $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/entry.S -o bios/entry.o +x86/bios.o: x86/bios/bios.bin x86/bios/bios-rom.h + +x86/bios/bios.bin.elf: x86/bios/entry.S x86/bios/e820.c x86/bios/int10.c x86/bios/int15.c x86/bios/rom.ld.S + $(E) " CC x86/bios/memcpy.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/memcpy.c -o x86/bios/memcpy.o + $(E) " CC x86/bios/e820.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/e820.c -o x86/bios/e820.o + $(E) " CC x86/bios/int10.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int10.c -o x86/bios/int10.o + $(E) " CC x86/bios/int15.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int15.c -o x86/bios/int15.o + $(E) " CC x86/bios/entry.o" + $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/entry.S -o x86/bios/entry.o $(E) " LD " $@ - $(Q) ld -T bios/rom.ld.S -o bios/bios.bin.elf bios/memcpy.o bios/entry.o bios/e820.o bios/int10.o bios/int15.o + $(Q) ld -T x86/bios/rom.ld.S -o x86/bios/bios.bin.elf x86/bios/memcpy.o x86/bios/entry.o x86/bios/e820.o x86/bios/int10.o x86/bios/int15.o -bios/bios.bin: bios/bios.bin.elf +x86/bios/bios.bin: x86/bios/bios.bin.elf $(E) " OBJCOPY " $@ - $(Q) objcopy -O binary -j .text bios/bios.bin.elf bios/bios.bin + $(Q) objcopy -O binary -j .text x86/bios/bios.bin.elf x86/bios/bios.bin -bios/bios-rom.o: bios/bios-rom.S bios/bios.bin bios/bios-rom.h +x86/bios/bios-rom.o: x86/bios/bios-rom.S x86/bios/bios.bin x86/bios/bios-rom.h $(E) " CC " $@ - $(Q) $(CC) -c $(CFLAGS) bios/bios-rom.S -o bios/bios-rom.o + $(Q) $(CC) -c $(CFLAGS) x86/bios/bios-rom.S -o x86/bios/bios-rom.o -bios/bios-rom.h: bios/bios.bin.elf +x86/bios/bios-rom.h: x86/bios/bios.bin.elf $(E) " NM " $@ - $(Q) cd bios && sh gen-offsets.sh > bios-rom.h && cd .. + $(Q) cd x86/bios && sh gen-offsets.sh > bios-rom.h && cd .. check: $(PROGRAM) $(MAKE) -C tests @@ -263,10 +287,10 @@ check: $(PROGRAM) clean: $(E) " CLEAN" - $(Q) rm -f bios/*.bin - $(Q) rm -f bios/*.elf - $(Q) rm -f bios/*.o - $(Q) rm -f bios/bios-rom.h + $(Q) rm -f x86/bios/*.bin + $(Q) rm -f x86/bios/*.elf + $(Q) rm -f x86/bios/*.o + $(Q) rm -f x86/bios/bios-rom.h $(Q) rm -f tests/boot/boot_test.iso $(Q) rm -rf tests/boot/rootfs/ $(Q) rm -f $(DEPS) $(OBJS) $(PROGRAM) $(GUEST_INIT) diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c index 33de4f6..9148d83 100644 --- a/tools/kvm/builtin-run.c +++ b/tools/kvm/builtin-run.c @@ -568,7 +568,7 @@ static const char *host_kernels[] = { static const char *default_kernels[] = { "./bzImage", - "../../arch/x86/boot/bzImage", + "../../arch/" BUILD_ARCH "/boot/bzImage", NULL }; @@ -886,7 +886,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm->vmlinux = vmlinux_filename; - ioport__setup_legacy(); + ioport__setup_arch(); rtc__init(); @@ -931,7 +931,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm__start_timer(kvm); - kvm__setup_bios(kvm); + kvm__arch_setup_firmware(kvm); for (i = 0; i < nrcpus; i++) { kvm_cpus[i] = kvm_cpu__init(kvm, i); diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 5b857dd..61a70ec 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -28,7 +28,7 @@ struct ioport_operations { bool (*io_out)(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size); }; -void ioport__setup_legacy(void); +void ioport__setup_arch(void); u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h index 01540ac..719e286 100644 --- a/tools/kvm/include/kvm/kvm-cpu.h +++ b/tools/kvm/include/kvm/kvm-cpu.h @@ -1,32 +1,7 @@ #ifndef KVM__KVM_CPU_H #define KVM__KVM_CPU_H -#include <linux/kvm.h> /* for struct kvm_regs */ - -#include <pthread.h> - -struct kvm; - -struct kvm_cpu { - pthread_t thread; /* VCPU thread */ - - unsigned long cpu_id; - - struct kvm *kvm; /* parent KVM */ - int vcpu_fd; /* For VCPU ioctls() */ - struct kvm_run *kvm_run; - - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - - struct kvm_msrs *msrs; /* dynamically allocated */ - - u8 is_running; - u8 paused; - - struct kvm_coalesced_mmio_ring *ring; -}; +#include "kvm/kvm-cpu-arch.h" struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id); void kvm_cpu__delete(struct kvm_cpu *vcpu); diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h index 2b3024a..ca1acc0 100644 --- a/tools/kvm/include/kvm/kvm.h +++ b/tools/kvm/include/kvm/kvm.h @@ -1,22 +1,13 @@ #ifndef KVM__KVM_H #define KVM__KVM_H -#include "kvm/interrupt.h" -#include "kvm/segment.h" +#include "kvm/kvm-arch.h" #include <stdbool.h> #include <linux/types.h> #include <time.h> #include <signal.h> -#define KVM_NR_CPUS (255) - -/* - * The hole includes VESA framebuffer and PCI memory. - */ -#define KVM_32BIT_GAP_SIZE (768 << 20) -#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) - #define SIGKVMEXIT (SIGRTMIN + 0) #define SIGKVMPAUSE (SIGRTMIN + 1) #define SIGKVMSTOP (SIGRTMIN + 4) @@ -25,33 +16,15 @@ #define KVM_PID_FILE_PATH "/.kvm-tools/" #define HOME_DIR getenv("HOME") -struct kvm { - int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ - int vm_fd; /* For VM ioctls() */ - timer_t timerid; /* Posix timer for interrupts */ - - int nrcpus; /* Number of cpus to run */ - - u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ - - u64 ram_size; - void *ram_start; - - bool nmi_disabled; - - bool single_step; +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) - u16 boot_selector; - u16 boot_ip; - u16 boot_sp; +#define DEFINE_KVM_EXT(ext) \ + .name = #ext, \ + .code = ext - struct interrupt_table interrupt_table; - - const char *vmlinux; - struct disk_image **disks; - int nr_disks; - - const char *name; +struct kvm_ext { + const char *name; + int code; }; void kvm__set_dir(const char *fmt, ...); @@ -64,7 +37,6 @@ void kvm__init_ram(struct kvm *kvm); void kvm__delete(struct kvm *kvm); bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, const char *initrd_filename, const char *kernel_cmdline, u16 vidmode); -void kvm__setup_bios(struct kvm *kvm); void kvm__start_timer(struct kvm *kvm); void kvm__stop_timer(struct kvm *kvm); void kvm__irq_line(struct kvm *kvm, int irq, int level); @@ -81,6 +53,13 @@ int kvm__get_sock_by_instance(const char *name); int kvm__enumerate_instances(int (*callback)(const char *name, int pid)); void kvm__remove_socket(const char *name); +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name); +void kvm__arch_setup_firmware(struct kvm *kvm); +bool kvm__arch_cpu_supports_vm(void); + +int load_flat_binary(struct kvm *kvm, int fd); +bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline, u16 vidmode); + /* * Debugging */ @@ -98,11 +77,4 @@ static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset) return kvm->ram_start + offset; } -static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) -{ - unsigned long flat = segment_to_flat(selector, offset); - - return guest_flat_to_host(kvm, flat); -} - #endif /* KVM__KVM_H */ diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 7cbc44e..965cfc2 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -52,34 +52,6 @@ static int ioport_insert(struct rb_root *root, struct ioport *data) return rb_int_insert(root, &data->node); } -static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - exit(EXIT_SUCCESS); -} - -static struct ioport_operations debug_ops = { - .io_out = debug_io_out, -}; - -static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - return true; -} - -static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - return true; -} - -static struct ioport_operations dummy_read_write_ioport_ops = { - .io_in = dummy_io_in, - .io_out = dummy_io_out, -}; - -static struct ioport_operations dummy_write_only_ioport_ops = { - .io_out = dummy_io_out, -}; - u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) { struct ioport *entry; @@ -164,29 +136,3 @@ error: return !ioport_debug; } - -void ioport__setup_legacy(void) -{ - /* 0x0020 - 0x003F - 8259A PIC 1 */ - ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); - - /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ - ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); - - /* 0x00A0 - 0x00AF - 8259A PIC 2 */ - ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); - - /* PORT 00E0-00EF are 'motherboard specific' so we use them for our - internal debugging purposes. */ - ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); - - /* PORT 00ED - DUMMY PORT FOR DELAY??? */ - ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); - - /* 0x00F0 - 0x00FF - Math co-processor */ - ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); - - /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ - ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); - ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); -} diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c index 0ad6f3b..5aba3bb 100644 --- a/tools/kvm/kvm-cpu.c +++ b/tools/kvm/kvm-cpu.c @@ -4,8 +4,6 @@ #include "kvm/util.h" #include "kvm/kvm.h" -#include <asm/msr-index.h> - #include <sys/ioctl.h> #include <sys/mman.h> #include <signal.h> @@ -14,106 +12,9 @@ #include <errno.h> #include <stdio.h> -#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) - extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; extern __thread struct kvm_cpu *current_kvm_cpu; -static int debug_fd; - -void kvm_cpu__set_debug_fd(int fd) -{ - debug_fd = fd; -} - -int kvm_cpu__get_debug_fd(void) -{ - return debug_fd; -} - -static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) -{ - return vcpu->sregs.cr0 & 0x01; -} - -static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) -{ - u64 cs; - - /* - * NOTE! We should take code segment base address into account here. - * Luckily it's usually zero because Linux uses flat memory model. - */ - if (is_in_protected_mode(vcpu)) - return ip; - - cs = vcpu->sregs.cs.selector; - - return ip + (cs << 4); -} - -static inline u32 selector_to_base(u16 selector) -{ - /* - * KVM on Intel requires 'base' to be 'selector * 16' in real mode. - */ - return (u32)selector * 16; -} - -static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) -{ - struct kvm_cpu *vcpu; - - vcpu = calloc(1, sizeof *vcpu); - if (!vcpu) - return NULL; - - vcpu->kvm = kvm; - - return vcpu; -} - -void kvm_cpu__delete(struct kvm_cpu *vcpu) -{ - if (vcpu->msrs) - free(vcpu->msrs); - - free(vcpu); -} - -struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) -{ - struct kvm_cpu *vcpu; - int mmap_size; - int coalesced_offset; - - vcpu = kvm_cpu__new(kvm); - if (!vcpu) - return NULL; - - vcpu->cpu_id = cpu_id; - - vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); - if (vcpu->vcpu_fd < 0) - die_perror("KVM_CREATE_VCPU ioctl"); - - mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); - if (mmap_size < 0) - die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); - - vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); - if (vcpu->kvm_run == MAP_FAILED) - die("unable to mmap vcpu fd"); - - coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); - if (coalesced_offset) - vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); - - vcpu->is_running = true; - - return vcpu; -} - void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) { struct kvm_guest_debug debug = { @@ -124,278 +25,6 @@ void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) pr_warning("KVM_SET_GUEST_DEBUG failed"); } -static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) -{ - struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); - - if (!vcpu) - die("out of memory"); - - return vcpu; -} - -#define KVM_MSR_ENTRY(_index, _data) \ - (struct kvm_msr_entry) { .index = _index, .data = _data } - -static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) -{ - unsigned long ndx = 0; - - vcpu->msrs = kvm_msrs__new(100); - - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); -#ifdef CONFIG_X86_64 - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); -#endif - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_FAST_STRING); - - vcpu->msrs->nmsrs = ndx; - - if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) - die_perror("KVM_SET_MSRS failed"); -} - -static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) -{ - vcpu->fpu = (struct kvm_fpu) { - .fcw = 0x37f, - .mxcsr = 0x1f80, - }; - - if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) - die_perror("KVM_SET_FPU failed"); -} - -static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) -{ - vcpu->regs = (struct kvm_regs) { - /* We start the guest in 16-bit real mode */ - .rflags = 0x0000000000000002ULL, - - .rip = vcpu->kvm->boot_ip, - .rsp = vcpu->kvm->boot_sp, - .rbp = vcpu->kvm->boot_sp, - }; - - if (vcpu->regs.rip > USHRT_MAX) - die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); - - if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) - die_perror("KVM_SET_REGS failed"); -} - -static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) -{ - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die_perror("KVM_GET_SREGS failed"); - - vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; - vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; - vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.es.selector = vcpu->kvm->boot_selector; - vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); - - if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) - die_perror("KVM_SET_SREGS failed"); -} - -/** - * kvm_cpu__reset_vcpu - reset virtual CPU to a known state - */ -void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) -{ - kvm_cpu__setup_sregs(vcpu); - kvm_cpu__setup_regs(vcpu); - kvm_cpu__setup_fpu(vcpu); - kvm_cpu__setup_msrs(vcpu); -} - -static void print_dtable(const char *name, struct kvm_dtable *dtable) -{ - dprintf(debug_fd, " %s %016llx %08hx\n", - name, (u64) dtable->base, (u16) dtable->limit); -} - -static void print_segment(const char *name, struct kvm_segment *seg) -{ - dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", - name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, - (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); -} - -void kvm_cpu__show_registers(struct kvm_cpu *vcpu) -{ - unsigned long cr0, cr2, cr3; - unsigned long cr4, cr8; - unsigned long rax, rbx, rcx; - unsigned long rdx, rsi, rdi; - unsigned long rbp, r8, r9; - unsigned long r10, r11, r12; - unsigned long r13, r14, r15; - unsigned long rip, rsp; - struct kvm_sregs sregs; - unsigned long rflags; - struct kvm_regs regs; - int i; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) - die("KVM_GET_REGS failed"); - - rflags = regs.rflags; - - rip = regs.rip; rsp = regs.rsp; - rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; - rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; - rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; - r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; - r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; - - dprintf(debug_fd, "\n Registers:\n"); - dprintf(debug_fd, " ----------\n"); - dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); - dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); - dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); - dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); - dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); - dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) - die("KVM_GET_REGS failed"); - - cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; - cr4 = sregs.cr4; cr8 = sregs.cr8; - - dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); - dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); - dprintf(debug_fd, "\n Segment registers:\n"); - dprintf(debug_fd, " ------------------\n"); - dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); - print_segment("cs ", &sregs.cs); - print_segment("ss ", &sregs.ss); - print_segment("ds ", &sregs.ds); - print_segment("es ", &sregs.es); - print_segment("fs ", &sregs.fs); - print_segment("gs ", &sregs.gs); - print_segment("tr ", &sregs.tr); - print_segment("ldt", &sregs.ldt); - print_dtable("gdt", &sregs.gdt); - print_dtable("idt", &sregs.idt); - - dprintf(debug_fd, "\n APIC:\n"); - dprintf(debug_fd, " -----\n"); - dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", - (u64) sregs.efer, (u64) sregs.apic_base, - (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); - - dprintf(debug_fd, "\n Interrupt bitmap:\n"); - dprintf(debug_fd, " -----------------\n"); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) - dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); - dprintf(debug_fd, "\n"); -} - -#define MAX_SYM_LEN 128 - -void kvm_cpu__show_code(struct kvm_cpu *vcpu) -{ - unsigned int code_bytes = 64; - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - char sym[MAX_SYM_LEN]; - unsigned char c; - unsigned int i; - u8 *ip; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) - die("KVM_GET_REGS failed"); - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die("KVM_GET_SREGS failed"); - - ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); - - dprintf(debug_fd, "\n Code:\n"); - dprintf(debug_fd, " -----\n"); - - symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); - - dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); - - for (i = 0; i < code_len; i++, ip++) { - if (!host_ptr_in_ram(vcpu->kvm, ip)) - break; - - c = *ip; - - if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) - dprintf(debug_fd, " <%02x>", c); - else - dprintf(debug_fd, " %02x", c); - } - - dprintf(debug_fd, "\n"); - - dprintf(debug_fd, "\n Stack:\n"); - dprintf(debug_fd, " ------\n"); - kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); -} - -void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) -{ - u64 *pte1; - u64 *pte2; - u64 *pte3; - u64 *pte4; - - if (!is_in_protected_mode(vcpu)) - return; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die("KVM_GET_SREGS failed"); - - pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); - if (!host_ptr_in_ram(vcpu->kvm, pte4)) - return; - - pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte3)) - return; - - pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte2)) - return; - - pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte1)) - return; - - dprintf(debug_fd, "Page Tables:\n"); - if (*pte2 & (1 << 7)) - dprintf(debug_fd, " pte4: %016llx pte3: %016llx" - " pte2: %016llx\n", - *pte4, *pte3, *pte2); - else - dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" - "llx pte1: %016llx\n", - *pte4, *pte3, *pte2, *pte1); -} - void kvm_cpu__run(struct kvm_cpu *vcpu) { int err; @@ -454,7 +83,6 @@ int kvm_cpu__start(struct kvm_cpu *cpu) signal(SIGKVMEXIT, kvm_cpu_signal_handler); signal(SIGKVMPAUSE, kvm_cpu_signal_handler); - kvm_cpu__setup_cpuid(cpu); kvm_cpu__reset_vcpu(cpu); if (cpu->kvm->single_step) diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c index 252bd18..7ce1640 100644 --- a/tools/kvm/kvm.c +++ b/tools/kvm/kvm.c @@ -1,10 +1,5 @@ #include "kvm/kvm.h" - -#include "kvm/boot-protocol.h" -#include "kvm/cpufeature.h" #include "kvm/read-write.h" -#include "kvm/interrupt.h" -#include "kvm/mptable.h" #include "kvm/util.h" #include "kvm/mutex.h" #include "kvm/kvm-cpu.h" @@ -12,14 +7,11 @@ #include <linux/kvm.h> -#include <asm/bootparam.h> - #include <sys/un.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/mman.h> -#include <sys/stat.h> #include <stdbool.h> #include <assert.h> #include <limits.h> @@ -58,29 +50,11 @@ const char *kvm_exit_reasons[] = { DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), }; -#define DEFINE_KVM_EXT(ext) \ - .name = #ext, \ - .code = ext - -struct { - const char *name; - int code; -} kvm_req_ext[] = { - { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, - { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, - { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, - { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, - { DEFINE_KVM_EXT(KVM_CAP_HLT) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, - { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, -}; - extern struct kvm *kvm; extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; static int pause_event; static DEFINE_MUTEX(pause_lock); +extern struct kvm_ext kvm_req_ext[]; static char kvm_dir[PATH_MAX]; @@ -127,7 +101,9 @@ static int kvm__check_extensions(struct kvm *kvm) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) { + for (i = 0; ; i++) { + if (!kvm_req_ext[i].name) + break; if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { pr_error("Unsuppored KVM extension detected: %s", kvm_req_ext[i].name); @@ -261,48 +237,6 @@ void kvm__delete(struct kvm *kvm) free(kvm); } -static bool kvm__cpu_supports_vm(void) -{ - struct cpuid_regs regs; - u32 eax_base; - int feature; - - regs = (struct cpuid_regs) { - .eax = 0x00, - }; - host_cpuid(®s); - - switch (regs.ebx) { - case CPUID_VENDOR_INTEL_1: - eax_base = 0x00; - feature = KVM__X86_FEATURE_VMX; - break; - - case CPUID_VENDOR_AMD_1: - eax_base = 0x80000000; - feature = KVM__X86_FEATURE_SVM; - break; - - default: - return false; - } - - regs = (struct cpuid_regs) { - .eax = eax_base, - }; - host_cpuid(®s); - - if (regs.eax < eax_base + 0x01) - return false; - - regs = (struct cpuid_regs) { - .eax = eax_base + 0x01 - }; - host_cpuid(®s); - - return regs.ecx & (1 << feature); -} - /* * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping * memory regions to it. Therefore, be careful if you use this function for @@ -325,47 +259,6 @@ void kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspac die_perror("KVM_SET_USER_MEMORY_REGION ioctl"); } -/* - * Allocating RAM size bigger than 4GB requires us to leave a gap - * in the RAM which is used for PCI MMIO, hotplug, and unconfigured - * devices (see documentation of e820_setup_gap() for details). - * - * If we're required to initialize RAM bigger than 4GB, we will create - * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. - */ - -void kvm__init_ram(struct kvm *kvm) -{ - u64 phys_start, phys_size; - void *host_mem; - - if (kvm->ram_size < KVM_32BIT_GAP_START) { - /* Use a single block of RAM for 32bit RAM */ - - phys_start = 0; - phys_size = kvm->ram_size; - host_mem = kvm->ram_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - } else { - /* First RAM range from zero to the PCI gap: */ - - phys_start = 0; - phys_size = KVM_32BIT_GAP_START; - host_mem = kvm->ram_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - - /* Second RAM range from 4GB to the end of RAM: */ - - phys_start = 0x100000000ULL; - phys_size = kvm->ram_size - phys_size; - host_mem = kvm->ram_start + phys_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - } -} - int kvm__recommended_cpus(struct kvm *kvm) { int ret; @@ -410,11 +303,10 @@ int kvm__max_cpus(struct kvm *kvm) struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) { - struct kvm_pit_config pit_config = { .flags = 0, }; struct kvm *kvm; int ret; - if (!kvm__cpu_supports_vm()) + if (!kvm__arch_cpu_supports_vm()) die("Your CPU does not support hardware virtualization"); kvm = kvm__new(); @@ -442,36 +334,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) if (kvm__check_extensions(kvm)) die("A required KVM extention is not supported by OS"); - ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); - if (ret < 0) - die_perror("KVM_SET_TSS_ADDR ioctl"); - - ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); - if (ret < 0) - die_perror("KVM_CREATE_PIT2 ioctl"); - - kvm->ram_size = ram_size; - - if (kvm->ram_size < KVM_32BIT_GAP_START) { - kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); - } else { - kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); - if (kvm->ram_start != MAP_FAILED) { - /* - * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that - * if we accidently write to it, we will know. - */ - mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); - } - } - if (kvm->ram_start == MAP_FAILED) - die("out of memory"); - - madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); - - ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); - if (ret < 0) - die_perror("KVM_CREATE_IRQCHIP ioctl"); + kvm__arch_init(kvm, kvm_dev, ram_size, name); kvm->name = name; @@ -480,141 +343,6 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) return kvm; } -#define BOOT_LOADER_SELECTOR 0x1000 -#define BOOT_LOADER_IP 0x0000 -#define BOOT_LOADER_SP 0x8000 -#define BOOT_CMDLINE_OFFSET 0x20000 - -#define BOOT_PROTOCOL_REQUIRED 0x206 -#define LOAD_HIGH 0x01 - -static int load_flat_binary(struct kvm *kvm, int fd) -{ - void *p; - int nr; - - if (lseek(fd, 0, SEEK_SET) < 0) - die_perror("lseek"); - - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); - - while ((nr = read(fd, p, 65536)) > 0) - p += nr; - - kvm->boot_selector = BOOT_LOADER_SELECTOR; - kvm->boot_ip = BOOT_LOADER_IP; - kvm->boot_sp = BOOT_LOADER_SP; - - return true; -} - -static const char *BZIMAGE_MAGIC = "HdrS"; - -static bool load_bzimage(struct kvm *kvm, int fd_kernel, - int fd_initrd, const char *kernel_cmdline, u16 vidmode) -{ - struct boot_params *kern_boot; - unsigned long setup_sects; - struct boot_params boot; - size_t cmdline_size; - ssize_t setup_size; - void *p; - int nr; - - /* - * See Documentation/x86/boot.txt for details no bzImage on-disk and - * memory layout. - */ - - if (lseek(fd_kernel, 0, SEEK_SET) < 0) - die_perror("lseek"); - - if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) - return false; - - if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) - return false; - - if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) - die("Too old kernel"); - - if (lseek(fd_kernel, 0, SEEK_SET) < 0) - die_perror("lseek"); - - if (!boot.hdr.setup_sects) - boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; - setup_sects = boot.hdr.setup_sects + 1; - - setup_size = setup_sects << 9; - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); - - /* copy setup.bin to mem*/ - if (read(fd_kernel, p, setup_size) != setup_size) - die_perror("read"); - - /* copy vmlinux.bin to BZ_KERNEL_START*/ - p = guest_flat_to_host(kvm, BZ_KERNEL_START); - - while ((nr = read(fd_kernel, p, 65536)) > 0) - p += nr; - - p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); - if (kernel_cmdline) { - cmdline_size = strlen(kernel_cmdline) + 1; - if (cmdline_size > boot.hdr.cmdline_size) - cmdline_size = boot.hdr.cmdline_size; - - memset(p, 0, boot.hdr.cmdline_size); - memcpy(p, kernel_cmdline, cmdline_size - 1); - } - - kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); - - kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; - kern_boot->hdr.type_of_loader = 0xff; - kern_boot->hdr.heap_end_ptr = 0xfe00; - kern_boot->hdr.loadflags |= CAN_USE_HEAP; - kern_boot->hdr.vid_mode = vidmode; - - /* - * Read initrd image into guest memory - */ - if (fd_initrd >= 0) { - struct stat initrd_stat; - unsigned long addr; - - if (fstat(fd_initrd, &initrd_stat)) - die_perror("fstat"); - - addr = boot.hdr.initrd_addr_max & ~0xfffff; - for (;;) { - if (addr < BZ_KERNEL_START) - die("Not enough memory for initrd"); - else if (addr < (kvm->ram_size - initrd_stat.st_size)) - break; - addr -= 0x100000; - } - - p = guest_flat_to_host(kvm, addr); - nr = read(fd_initrd, p, initrd_stat.st_size); - if (nr != initrd_stat.st_size) - die("Failed to read initrd"); - - kern_boot->hdr.ramdisk_image = addr; - kern_boot->hdr.ramdisk_size = initrd_stat.st_size; - } - - kvm->boot_selector = BOOT_LOADER_SELECTOR; - /* - * The real-mode setup code starts at offset 0x200 of a bzImage. See - * Documentation/x86/boot.txt for details. - */ - kvm->boot_ip = BOOT_LOADER_IP + 0x200; - kvm->boot_sp = BOOT_LOADER_SP; - - return true; -} - /* RFC 1952 */ #define GZIP_ID1 0x1f #define GZIP_ID2 0x8b @@ -675,24 +403,6 @@ found_kernel: return ret; } -/** - * kvm__setup_bios - inject BIOS into guest system memory - * @kvm - guest system descriptor - * - * This function is a main routine where we poke guest memory - * and install BIOS there. - */ -void kvm__setup_bios(struct kvm *kvm) -{ - /* standart minimal configuration */ - setup_bios(kvm); - - /* FIXME: SMP, ACPI and friends here */ - - /* MP table */ - mptable_setup(kvm, kvm->nrcpus); -} - #define TIMER_INTERVAL_NS 1000000 /* 1 msec */ /* @@ -732,27 +442,6 @@ void kvm__stop_timer(struct kvm *kvm) kvm->timerid = 0; } -void kvm__irq_line(struct kvm *kvm, int irq, int level) -{ - struct kvm_irq_level irq_level; - - irq_level = (struct kvm_irq_level) { - { - .irq = irq, - }, - .level = level, - }; - - if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) - die_perror("KVM_IRQ_LINE failed"); -} - -void kvm__irq_trigger(struct kvm *kvm, int irq) -{ - kvm__irq_line(kvm, irq, 1); - kvm__irq_line(kvm, irq, 0); -} - void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size) { unsigned char *p; diff --git a/tools/kvm/bios.c b/tools/kvm/x86/bios.c similarity index 100% rename from tools/kvm/bios.c rename to tools/kvm/x86/bios.c diff --git a/tools/kvm/bios/.gitignore b/tools/kvm/x86/bios/.gitignore similarity index 100% rename from tools/kvm/bios/.gitignore rename to tools/kvm/x86/bios/.gitignore diff --git a/tools/kvm/bios/bios-rom.S b/tools/kvm/x86/bios/bios-rom.S similarity index 80% rename from tools/kvm/bios/bios-rom.S rename to tools/kvm/x86/bios/bios-rom.S index dc52b1e..3269ce9 100644 --- a/tools/kvm/bios/bios-rom.S +++ b/tools/kvm/x86/bios/bios-rom.S @@ -8,5 +8,5 @@ #endif GLOBAL(bios_rom) - .incbin "bios/bios.bin" + .incbin "x86/bios/bios.bin" END(bios_rom) diff --git a/tools/kvm/bios/e820.c b/tools/kvm/x86/bios/e820.c similarity index 100% rename from tools/kvm/bios/e820.c rename to tools/kvm/x86/bios/e820.c diff --git a/tools/kvm/bios/entry.S b/tools/kvm/x86/bios/entry.S similarity index 100% rename from tools/kvm/bios/entry.S rename to tools/kvm/x86/bios/entry.S diff --git a/tools/kvm/bios/gen-offsets.sh b/tools/kvm/x86/bios/gen-offsets.sh similarity index 100% rename from tools/kvm/bios/gen-offsets.sh rename to tools/kvm/x86/bios/gen-offsets.sh diff --git a/tools/kvm/bios/int10.c b/tools/kvm/x86/bios/int10.c similarity index 100% rename from tools/kvm/bios/int10.c rename to tools/kvm/x86/bios/int10.c diff --git a/tools/kvm/bios/int15.c b/tools/kvm/x86/bios/int15.c similarity index 100% rename from tools/kvm/bios/int15.c rename to tools/kvm/x86/bios/int15.c diff --git a/tools/kvm/bios/local.S b/tools/kvm/x86/bios/local.S similarity index 100% rename from tools/kvm/bios/local.S rename to tools/kvm/x86/bios/local.S diff --git a/tools/kvm/bios/macro.S b/tools/kvm/x86/bios/macro.S similarity index 100% rename from tools/kvm/bios/macro.S rename to tools/kvm/x86/bios/macro.S diff --git a/tools/kvm/bios/memcpy.c b/tools/kvm/x86/bios/memcpy.c similarity index 100% rename from tools/kvm/bios/memcpy.c rename to tools/kvm/x86/bios/memcpy.c diff --git a/tools/kvm/bios/rom.ld.S b/tools/kvm/x86/bios/rom.ld.S similarity index 100% rename from tools/kvm/bios/rom.ld.S rename to tools/kvm/x86/bios/rom.ld.S diff --git a/tools/kvm/cpuid.c b/tools/kvm/x86/cpuid.c similarity index 100% rename from tools/kvm/cpuid.c rename to tools/kvm/x86/cpuid.c diff --git a/tools/kvm/include/kvm/assembly.h b/tools/kvm/x86/include/kvm/assembly.h similarity index 100% rename from tools/kvm/include/kvm/assembly.h rename to tools/kvm/x86/include/kvm/assembly.h diff --git a/tools/kvm/include/kvm/barrier.h b/tools/kvm/x86/include/kvm/barrier.h similarity index 100% rename from tools/kvm/include/kvm/barrier.h rename to tools/kvm/x86/include/kvm/barrier.h diff --git a/tools/kvm/include/kvm/bios-export.h b/tools/kvm/x86/include/kvm/bios-export.h similarity index 100% rename from tools/kvm/include/kvm/bios-export.h rename to tools/kvm/x86/include/kvm/bios-export.h diff --git a/tools/kvm/include/kvm/bios.h b/tools/kvm/x86/include/kvm/bios.h similarity index 100% rename from tools/kvm/include/kvm/bios.h rename to tools/kvm/x86/include/kvm/bios.h diff --git a/tools/kvm/include/kvm/boot-protocol.h b/tools/kvm/x86/include/kvm/boot-protocol.h similarity index 100% rename from tools/kvm/include/kvm/boot-protocol.h rename to tools/kvm/x86/include/kvm/boot-protocol.h diff --git a/tools/kvm/include/kvm/cpufeature.h b/tools/kvm/x86/include/kvm/cpufeature.h similarity index 100% rename from tools/kvm/include/kvm/cpufeature.h rename to tools/kvm/x86/include/kvm/cpufeature.h diff --git a/tools/kvm/include/kvm/interrupt.h b/tools/kvm/x86/include/kvm/interrupt.h similarity index 100% rename from tools/kvm/include/kvm/interrupt.h rename to tools/kvm/x86/include/kvm/interrupt.h diff --git a/tools/kvm/x86/include/kvm/kvm-arch.h b/tools/kvm/x86/include/kvm/kvm-arch.h new file mode 100644 index 0000000..02aa8b9 --- /dev/null +++ b/tools/kvm/x86/include/kvm/kvm-arch.h @@ -0,0 +1,59 @@ +#ifndef KVM__KVM_ARCH_H +#define KVM__KVM_ARCH_H + +#include "kvm/interrupt.h" +#include "kvm/segment.h" + +#include <stdbool.h> +#include <linux/types.h> +#include <time.h> + +#define KVM_NR_CPUS (255) + +/* + * The hole includes VESA framebuffer and PCI memory. + */ +#define KVM_32BIT_GAP_SIZE (768 << 20) +#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) + +#define KVM_MMIO_START KVM_32BIT_GAP_START + +struct kvm { + int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ + int vm_fd; /* For VM ioctls() */ + timer_t timerid; /* Posix timer for interrupts */ + + int nrcpus; /* Number of cpus to run */ + + u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ + + u64 ram_size; + void *ram_start; + + bool nmi_disabled; + + bool single_step; + + u16 boot_selector; + u16 boot_ip; + u16 boot_sp; + + struct interrupt_table interrupt_table; + + const char *vmlinux; + struct disk_image **disks; + int nr_disks; + + const char *name; +}; + +static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset); /* In kvm.h */ + +static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) +{ + unsigned long flat = segment_to_flat(selector, offset); + + return guest_flat_to_host(kvm, flat); +} + +#endif /* KVM__KVM_ARCH_H */ diff --git a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h new file mode 100644 index 0000000..ed1c727 --- /dev/null +++ b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h @@ -0,0 +1,33 @@ +#ifndef KVM__KVM_CPU_ARCH_H +#define KVM__KVM_CPU_ARCH_H + +/* Architecture-specific kvm_cpu definitions. */ + +#include <linux/kvm.h> /* for struct kvm_regs */ + +#include <pthread.h> + +struct kvm; + +struct kvm_cpu { + pthread_t thread; /* VCPU thread */ + + unsigned long cpu_id; + + struct kvm *kvm; /* parent KVM */ + int vcpu_fd; /* For VCPU ioctls() */ + struct kvm_run *kvm_run; + + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + struct kvm_msrs *msrs; /* dynamically allocated */ + + u8 is_running; + u8 paused; + + struct kvm_coalesced_mmio_ring *ring; +}; + +#endif /* KVM__KVM_CPU_ARCH_H */ diff --git a/tools/kvm/include/kvm/mptable.h b/tools/kvm/x86/include/kvm/mptable.h similarity index 100% rename from tools/kvm/include/kvm/mptable.h rename to tools/kvm/x86/include/kvm/mptable.h diff --git a/tools/kvm/interrupt.c b/tools/kvm/x86/interrupt.c similarity index 100% rename from tools/kvm/interrupt.c rename to tools/kvm/x86/interrupt.c diff --git a/tools/kvm/x86/ioport.c b/tools/kvm/x86/ioport.c new file mode 100644 index 0000000..8a91bf2 --- /dev/null +++ b/tools/kvm/x86/ioport.c @@ -0,0 +1,59 @@ +#include "kvm/ioport.h" + +#include <stdlib.h> + +static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + exit(EXIT_SUCCESS); +} + +static struct ioport_operations debug_ops = { + .io_out = debug_io_out, +}; + +static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + return true; +} + +static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + return true; +} + +static struct ioport_operations dummy_read_write_ioport_ops = { + .io_in = dummy_io_in, + .io_out = dummy_io_out, +}; + +static struct ioport_operations dummy_write_only_ioport_ops = { + .io_out = dummy_io_out, +}; + +void ioport__setup_arch(void) +{ + /* Legacy ioport setup */ + + /* 0x0020 - 0x003F - 8259A PIC 1 */ + ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); + + /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ + ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); + + /* 0x00A0 - 0x00AF - 8259A PIC 2 */ + ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); + + /* PORT 00E0-00EF are 'motherboard specific' so we use them for our + internal debugging purposes. */ + ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); + + /* PORT 00ED - DUMMY PORT FOR DELAY??? */ + ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); + + /* 0x00F0 - 0x00FF - Math co-processor */ + ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); + + /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ + ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); + ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); +} diff --git a/tools/kvm/irq.c b/tools/kvm/x86/irq.c similarity index 100% rename from tools/kvm/irq.c rename to tools/kvm/x86/irq.c diff --git a/tools/kvm/x86/kvm-cpu.c b/tools/kvm/x86/kvm-cpu.c new file mode 100644 index 0000000..b26b208 --- /dev/null +++ b/tools/kvm/x86/kvm-cpu.c @@ -0,0 +1,383 @@ +#include "kvm/kvm-cpu.h" + +#include "kvm/symbol.h" +#include "kvm/util.h" +#include "kvm/kvm.h" + +#include <asm/msr-index.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +static int debug_fd; + +void kvm_cpu__set_debug_fd(int fd) +{ + debug_fd = fd; +} + +int kvm_cpu__get_debug_fd(void) +{ + return debug_fd; +} + +static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) +{ + return vcpu->sregs.cr0 & 0x01; +} + +static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) +{ + u64 cs; + + /* + * NOTE! We should take code segment base address into account here. + * Luckily it's usually zero because Linux uses flat memory model. + */ + if (is_in_protected_mode(vcpu)) + return ip; + + cs = vcpu->sregs.cs.selector; + + return ip + (cs << 4); +} + +static inline u32 selector_to_base(u16 selector) +{ + /* + * KVM on Intel requires 'base' to be 'selector * 16' in real mode. + */ + return (u32)selector * 16; +} + +static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) +{ + struct kvm_cpu *vcpu; + + vcpu = calloc(1, sizeof *vcpu); + if (!vcpu) + return NULL; + + vcpu->kvm = kvm; + + return vcpu; +} + +void kvm_cpu__delete(struct kvm_cpu *vcpu) +{ + if (vcpu->msrs) + free(vcpu->msrs); + + free(vcpu); +} + +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) +{ + struct kvm_cpu *vcpu; + int mmap_size; + int coalesced_offset; + + vcpu = kvm_cpu__new(kvm); + if (!vcpu) + return NULL; + + vcpu->cpu_id = cpu_id; + + vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); + if (vcpu->vcpu_fd < 0) + die_perror("KVM_CREATE_VCPU ioctl"); + + mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) + die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); + + vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); + if (vcpu->kvm_run == MAP_FAILED) + die("unable to mmap vcpu fd"); + + coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); + if (coalesced_offset) + vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); + + vcpu->is_running = true; + + return vcpu; +} + +static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) +{ + struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); + + if (!vcpu) + die("out of memory"); + + return vcpu; +} + +#define KVM_MSR_ENTRY(_index, _data) \ + (struct kvm_msr_entry) { .index = _index, .data = _data } + +static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) +{ + unsigned long ndx = 0; + + vcpu->msrs = kvm_msrs__new(100); + + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); +#ifdef CONFIG_X86_64 + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); +#endif + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_FAST_STRING); + + vcpu->msrs->nmsrs = ndx; + + if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) + die_perror("KVM_SET_MSRS failed"); +} + +static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) +{ + vcpu->fpu = (struct kvm_fpu) { + .fcw = 0x37f, + .mxcsr = 0x1f80, + }; + + if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) + die_perror("KVM_SET_FPU failed"); +} + +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) +{ + vcpu->regs = (struct kvm_regs) { + /* We start the guest in 16-bit real mode */ + .rflags = 0x0000000000000002ULL, + + .rip = vcpu->kvm->boot_ip, + .rsp = vcpu->kvm->boot_sp, + .rbp = vcpu->kvm->boot_sp, + }; + + if (vcpu->regs.rip > USHRT_MAX) + die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); + + if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) + die_perror("KVM_SET_REGS failed"); +} + +static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) +{ + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die_perror("KVM_GET_SREGS failed"); + + vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; + vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; + vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.es.selector = vcpu->kvm->boot_selector; + vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); + + if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) + die_perror("KVM_SET_SREGS failed"); +} + +/** + * kvm_cpu__reset_vcpu - reset virtual CPU to a known state + */ +void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) +{ + kvm_cpu__setup_cpuid(vcpu); + kvm_cpu__setup_sregs(vcpu); + kvm_cpu__setup_regs(vcpu); + kvm_cpu__setup_fpu(vcpu); + kvm_cpu__setup_msrs(vcpu); +} + +static void print_dtable(const char *name, struct kvm_dtable *dtable) +{ + dprintf(debug_fd, " %s %016llx %08hx\n", + name, (u64) dtable->base, (u16) dtable->limit); +} + +static void print_segment(const char *name, struct kvm_segment *seg) +{ + dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", + name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, + (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); +} + +void kvm_cpu__show_registers(struct kvm_cpu *vcpu) +{ + unsigned long cr0, cr2, cr3; + unsigned long cr4, cr8; + unsigned long rax, rbx, rcx; + unsigned long rdx, rsi, rdi; + unsigned long rbp, r8, r9; + unsigned long r10, r11, r12; + unsigned long r13, r14, r15; + unsigned long rip, rsp; + struct kvm_sregs sregs; + unsigned long rflags; + struct kvm_regs regs; + int i; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) + die("KVM_GET_REGS failed"); + + rflags = regs.rflags; + + rip = regs.rip; rsp = regs.rsp; + rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; + rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; + rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; + r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; + r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; + + dprintf(debug_fd, "\n Registers:\n"); + dprintf(debug_fd, " ----------\n"); + dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); + dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); + dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); + dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); + dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); + dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) + die("KVM_GET_REGS failed"); + + cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; + cr4 = sregs.cr4; cr8 = sregs.cr8; + + dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); + dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); + dprintf(debug_fd, "\n Segment registers:\n"); + dprintf(debug_fd, " ------------------\n"); + dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); + print_segment("cs ", &sregs.cs); + print_segment("ss ", &sregs.ss); + print_segment("ds ", &sregs.ds); + print_segment("es ", &sregs.es); + print_segment("fs ", &sregs.fs); + print_segment("gs ", &sregs.gs); + print_segment("tr ", &sregs.tr); + print_segment("ldt", &sregs.ldt); + print_dtable("gdt", &sregs.gdt); + print_dtable("idt", &sregs.idt); + + dprintf(debug_fd, "\n APIC:\n"); + dprintf(debug_fd, " -----\n"); + dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", + (u64) sregs.efer, (u64) sregs.apic_base, + (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); + + dprintf(debug_fd, "\n Interrupt bitmap:\n"); + dprintf(debug_fd, " -----------------\n"); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) + dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); + dprintf(debug_fd, "\n"); +} + +#define MAX_SYM_LEN 128 + +void kvm_cpu__show_code(struct kvm_cpu *vcpu) +{ + unsigned int code_bytes = 64; + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + char sym[MAX_SYM_LEN]; + unsigned char c; + unsigned int i; + u8 *ip; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) + die("KVM_GET_REGS failed"); + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die("KVM_GET_SREGS failed"); + + ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); + + dprintf(debug_fd, "\n Code:\n"); + dprintf(debug_fd, " -----\n"); + + symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); + + dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); + + for (i = 0; i < code_len; i++, ip++) { + if (!host_ptr_in_ram(vcpu->kvm, ip)) + break; + + c = *ip; + + if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) + dprintf(debug_fd, " <%02x>", c); + else + dprintf(debug_fd, " %02x", c); + } + + dprintf(debug_fd, "\n"); + + dprintf(debug_fd, "\n Stack:\n"); + dprintf(debug_fd, " ------\n"); + kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); +} + +void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) +{ + u64 *pte1; + u64 *pte2; + u64 *pte3; + u64 *pte4; + + if (!is_in_protected_mode(vcpu)) + return; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die("KVM_GET_SREGS failed"); + + pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); + if (!host_ptr_in_ram(vcpu->kvm, pte4)) + return; + + pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte3)) + return; + + pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte2)) + return; + + pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte1)) + return; + + dprintf(debug_fd, "Page Tables:\n"); + if (*pte2 & (1 << 7)) + dprintf(debug_fd, " pte4: %016llx pte3: %016llx" + " pte2: %016llx\n", + *pte4, *pte3, *pte2); + else + dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" + "llx pte1: %016llx\n", + *pte4, *pte3, *pte2, *pte1); +} diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c new file mode 100644 index 0000000..ac6c91e --- /dev/null +++ b/tools/kvm/x86/kvm.c @@ -0,0 +1,330 @@ +#include "kvm/kvm.h" +#include "kvm/boot-protocol.h" +#include "kvm/cpufeature.h" +#include "kvm/interrupt.h" +#include "kvm/mptable.h" +#include "kvm/util.h" + +#include <asm/bootparam.h> +#include <linux/kvm.h> + +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <stdbool.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <fcntl.h> +#include <asm/unistd.h> + +struct kvm_ext kvm_req_ext[] = { + { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, + { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, + { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, + { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, + { DEFINE_KVM_EXT(KVM_CAP_HLT) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, + { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, + { 0, 0 } +}; + +bool kvm__arch_cpu_supports_vm(void) +{ + struct cpuid_regs regs; + u32 eax_base; + int feature; + + regs = (struct cpuid_regs) { + .eax = 0x00, + }; + host_cpuid(®s); + + switch (regs.ebx) { + case CPUID_VENDOR_INTEL_1: + eax_base = 0x00; + feature = KVM__X86_FEATURE_VMX; + break; + + case CPUID_VENDOR_AMD_1: + eax_base = 0x80000000; + feature = KVM__X86_FEATURE_SVM; + break; + + default: + return false; + } + + regs = (struct cpuid_regs) { + .eax = eax_base, + }; + host_cpuid(®s); + + if (regs.eax < eax_base + 0x01) + return false; + + regs = (struct cpuid_regs) { + .eax = eax_base + 0x01 + }; + host_cpuid(®s); + + return regs.ecx & (1 << feature); +} + +/* + * Allocating RAM size bigger than 4GB requires us to leave a gap + * in the RAM which is used for PCI MMIO, hotplug, and unconfigured + * devices (see documentation of e820_setup_gap() for details). + * + * If we're required to initialize RAM bigger than 4GB, we will create + * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. + */ + +void kvm__init_ram(struct kvm *kvm) +{ + u64 phys_start, phys_size; + void *host_mem; + + if (kvm->ram_size < KVM_32BIT_GAP_START) { + /* Use a single block of RAM for 32bit RAM */ + + phys_start = 0; + phys_size = kvm->ram_size; + host_mem = kvm->ram_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + } else { + /* First RAM range from zero to the PCI gap: */ + + phys_start = 0; + phys_size = KVM_32BIT_GAP_START; + host_mem = kvm->ram_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + + /* Second RAM range from 4GB to the end of RAM: */ + + phys_start = 0x100000000ULL; + phys_size = kvm->ram_size - phys_size; + host_mem = kvm->ram_start + phys_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + } +} + +/* Architecture-specific KVM init */ +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) +{ + struct kvm_pit_config pit_config = { .flags = 0, }; + int ret; + + ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); + if (ret < 0) + die_perror("KVM_SET_TSS_ADDR ioctl"); + + ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); + if (ret < 0) + die_perror("KVM_CREATE_PIT2 ioctl"); + + kvm->ram_size = ram_size; + + if (kvm->ram_size < KVM_32BIT_GAP_START) { + kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); + } else { + kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); + if (kvm->ram_start != MAP_FAILED) { + /* + * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that + * if we accidently write to it, we will know. + */ + mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); + } + } + if (kvm->ram_start == MAP_FAILED) + die("out of memory"); + + madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); + + ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); + if (ret < 0) + die_perror("KVM_CREATE_IRQCHIP ioctl"); +} + +void kvm__irq_line(struct kvm *kvm, int irq, int level) +{ + struct kvm_irq_level irq_level; + + irq_level = (struct kvm_irq_level) { + { + .irq = irq, + }, + .level = level, + }; + + if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) + die_perror("KVM_IRQ_LINE failed"); +} + +void kvm__irq_trigger(struct kvm *kvm, int irq) +{ + kvm__irq_line(kvm, irq, 1); + kvm__irq_line(kvm, irq, 0); +} + +#define BOOT_LOADER_SELECTOR 0x1000 +#define BOOT_LOADER_IP 0x0000 +#define BOOT_LOADER_SP 0x8000 +#define BOOT_CMDLINE_OFFSET 0x20000 + +#define BOOT_PROTOCOL_REQUIRED 0x206 +#define LOAD_HIGH 0x01 + +int load_flat_binary(struct kvm *kvm, int fd) +{ + void *p; + int nr; + + if (lseek(fd, 0, SEEK_SET) < 0) + die_perror("lseek"); + + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); + + while ((nr = read(fd, p, 65536)) > 0) + p += nr; + + kvm->boot_selector = BOOT_LOADER_SELECTOR; + kvm->boot_ip = BOOT_LOADER_IP; + kvm->boot_sp = BOOT_LOADER_SP; + + return true; +} + +static const char *BZIMAGE_MAGIC = "HdrS"; + +bool load_bzimage(struct kvm *kvm, int fd_kernel, + int fd_initrd, const char *kernel_cmdline, u16 vidmode) +{ + struct boot_params *kern_boot; + unsigned long setup_sects; + struct boot_params boot; + size_t cmdline_size; + ssize_t setup_size; + void *p; + int nr; + + /* + * See Documentation/x86/boot.txt for details no bzImage on-disk and + * memory layout. + */ + + if (lseek(fd_kernel, 0, SEEK_SET) < 0) + die_perror("lseek"); + + if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) + return false; + + if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) + return false; + + if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) + die("Too old kernel"); + + if (lseek(fd_kernel, 0, SEEK_SET) < 0) + die_perror("lseek"); + + if (!boot.hdr.setup_sects) + boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; + setup_sects = boot.hdr.setup_sects + 1; + + setup_size = setup_sects << 9; + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); + + /* copy setup.bin to mem*/ + if (read(fd_kernel, p, setup_size) != setup_size) + die_perror("read"); + + /* copy vmlinux.bin to BZ_KERNEL_START*/ + p = guest_flat_to_host(kvm, BZ_KERNEL_START); + + while ((nr = read(fd_kernel, p, 65536)) > 0) + p += nr; + + p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); + if (kernel_cmdline) { + cmdline_size = strlen(kernel_cmdline) + 1; + if (cmdline_size > boot.hdr.cmdline_size) + cmdline_size = boot.hdr.cmdline_size; + + memset(p, 0, boot.hdr.cmdline_size); + memcpy(p, kernel_cmdline, cmdline_size - 1); + } + + kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); + + kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; + kern_boot->hdr.type_of_loader = 0xff; + kern_boot->hdr.heap_end_ptr = 0xfe00; + kern_boot->hdr.loadflags |= CAN_USE_HEAP; + kern_boot->hdr.vid_mode = vidmode; + + /* + * Read initrd image into guest memory + */ + if (fd_initrd >= 0) { + struct stat initrd_stat; + unsigned long addr; + + if (fstat(fd_initrd, &initrd_stat)) + die_perror("fstat"); + + addr = boot.hdr.initrd_addr_max & ~0xfffff; + for (;;) { + if (addr < BZ_KERNEL_START) + die("Not enough memory for initrd"); + else if (addr < (kvm->ram_size - initrd_stat.st_size)) + break; + addr -= 0x100000; + } + + p = guest_flat_to_host(kvm, addr); + nr = read(fd_initrd, p, initrd_stat.st_size); + if (nr != initrd_stat.st_size) + die("Failed to read initrd"); + + kern_boot->hdr.ramdisk_image = addr; + kern_boot->hdr.ramdisk_size = initrd_stat.st_size; + } + + kvm->boot_selector = BOOT_LOADER_SELECTOR; + /* + * The real-mode setup code starts at offset 0x200 of a bzImage. See + * Documentation/x86/boot.txt for details. + */ + kvm->boot_ip = BOOT_LOADER_IP + 0x200; + kvm->boot_sp = BOOT_LOADER_SP; + + return true; +} + +/** + * kvm__arch_setup_firmware - inject BIOS into guest system memory + * @kvm - guest system descriptor + * + * This function is a main routine where we poke guest memory + * and install BIOS there. + */ +void kvm__arch_setup_firmware(struct kvm *kvm) +{ + /* standart minimal configuration */ + setup_bios(kvm); + + /* FIXME: SMP, ACPI and friends here */ + + /* MP table */ + mptable_setup(kvm, kvm->nrcpus); +}
Create a new arch-specific subdirectory to contain architecture-specific code and includes. The Makefile now adds various arch-specific objects based on detected architecture. That aside, this patch should only contain code moves. These include: - x86-specific kvm_cpu setup, kernel loading, memory setup etc. now in x86/kvm{-cpu}.c - BIOS now lives in x86/bios/ - ioport setup - KVM extensions are asserted in arch-specific kvm.c now, so each architecture can manage its own dependencies. - Various architecture-specific #defines are moved into $(ARCH)/include/kvm{-cpu}.h such as struct kvm_cpu, KVM_NR_CPUS, KVM_32BIT_GAP_SIZE. Signed-off-by: Matt Evans <matt@ozlabs.org> --- tools/kvm/Makefile | 96 ++++--- tools/kvm/builtin-run.c | 6 +- tools/kvm/include/kvm/ioport.h | 2 +- tools/kvm/include/kvm/kvm-cpu.h | 27 +-- tools/kvm/include/kvm/kvm.h | 58 +--- tools/kvm/ioport.c | 54 ---- tools/kvm/kvm-cpu.c | 372 ---------------------- tools/kvm/kvm.c | 323 +------------------- tools/kvm/{ => x86}/bios.c | 0 tools/kvm/{ => x86}/bios/.gitignore | 0 tools/kvm/{ => x86}/bios/bios-rom.S | 2 +- tools/kvm/{ => x86}/bios/e820.c | 0 tools/kvm/{ => x86}/bios/entry.S | 0 tools/kvm/{ => x86}/bios/gen-offsets.sh | 0 tools/kvm/{ => x86}/bios/int10.c | 0 tools/kvm/{ => x86}/bios/int15.c | 0 tools/kvm/{ => x86}/bios/local.S | 0 tools/kvm/{ => x86}/bios/macro.S | 0 tools/kvm/{ => x86}/bios/memcpy.c | 0 tools/kvm/{ => x86}/bios/rom.ld.S | 0 tools/kvm/{ => x86}/cpuid.c | 0 tools/kvm/{ => x86}/include/kvm/assembly.h | 0 tools/kvm/{ => x86}/include/kvm/barrier.h | 0 tools/kvm/{ => x86}/include/kvm/bios-export.h | 0 tools/kvm/{ => x86}/include/kvm/bios.h | 0 tools/kvm/{ => x86}/include/kvm/boot-protocol.h | 0 tools/kvm/{ => x86}/include/kvm/cpufeature.h | 0 tools/kvm/{ => x86}/include/kvm/interrupt.h | 0 tools/kvm/x86/include/kvm/kvm-arch.h | 59 ++++ tools/kvm/x86/include/kvm/kvm-cpu-arch.h | 33 ++ tools/kvm/{ => x86}/include/kvm/mptable.h | 0 tools/kvm/{ => x86}/interrupt.c | 0 tools/kvm/x86/ioport.c | 59 ++++ tools/kvm/{ => x86}/irq.c | 0 tools/kvm/x86/kvm-cpu.c | 383 +++++++++++++++++++++++ tools/kvm/x86/kvm.c | 330 +++++++++++++++++++ tools/kvm/{ => x86}/mptable.c | 0 37 files changed, 951 insertions(+), 853 deletions(-) rename tools/kvm/{ => x86}/bios.c (100%) rename tools/kvm/{ => x86}/bios/.gitignore (100%) rename tools/kvm/{ => x86}/bios/bios-rom.S (80%) rename tools/kvm/{ => x86}/bios/e820.c (100%) rename tools/kvm/{ => x86}/bios/entry.S (100%) rename tools/kvm/{ => x86}/bios/gen-offsets.sh (100%) rename tools/kvm/{ => x86}/bios/int10.c (100%) rename tools/kvm/{ => x86}/bios/int15.c (100%) rename tools/kvm/{ => x86}/bios/local.S (100%) rename tools/kvm/{ => x86}/bios/macro.S (100%) rename tools/kvm/{ => x86}/bios/memcpy.c (100%) rename tools/kvm/{ => x86}/bios/rom.ld.S (100%) rename tools/kvm/{ => x86}/cpuid.c (100%) rename tools/kvm/{ => x86}/include/kvm/assembly.h (100%) rename tools/kvm/{ => x86}/include/kvm/barrier.h (100%) rename tools/kvm/{ => x86}/include/kvm/bios-export.h (100%) rename tools/kvm/{ => x86}/include/kvm/bios.h (100%) rename tools/kvm/{ => x86}/include/kvm/boot-protocol.h (100%) rename tools/kvm/{ => x86}/include/kvm/cpufeature.h (100%) rename tools/kvm/{ => x86}/include/kvm/interrupt.h (100%) create mode 100644 tools/kvm/x86/include/kvm/kvm-arch.h create mode 100644 tools/kvm/x86/include/kvm/kvm-cpu-arch.h rename tools/kvm/{ => x86}/include/kvm/mptable.h (100%) rename tools/kvm/{ => x86}/interrupt.c (100%) create mode 100644 tools/kvm/x86/ioport.c rename tools/kvm/{ => x86}/irq.c (100%) create mode 100644 tools/kvm/x86/kvm-cpu.c create mode 100644 tools/kvm/x86/kvm.c rename tools/kvm/{ => x86}/mptable.c (100%) diff --git a/tools/kvm/mptable.c b/tools/kvm/x86/mptable.c similarity index 100% rename from tools/kvm/mptable.c rename to tools/kvm/x86/mptable.c -- To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html