Message ID | 1369911913-10934-3-git-send-email-mst@redhat.com |
---|---|
State | New |
Headers | show |
Hi, > + } else { > + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > + (0x1ULL << 62); Doesn't this give unaligned windows? > + /* Set PCI window size the way seabios has always done it. */ > + /* TODO: consider just starting at below_4g_mem_size */ Used to be that way. Was changed for alignment reasons (i.e. 1G window starts at 1G border etc). cheers, Gerd
On Thu, May 30, 2013 at 02:16:13PM +0200, Gerd Hoffmann wrote: > Hi, > > > + } else { > > + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > > + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > > + (0x1ULL << 62); > > Doesn't this give unaligned windows? PCI Bridge windows do not need to be size aligned. In any case, the windows are *exactly* as calculated by seabios - apparently it does not size-align windows either. > > + /* Set PCI window size the way seabios has always done it. */ > > + /* TODO: consider just starting at below_4g_mem_size */ > > Used to be that way. Was changed for alignment reasons (i.e. 1G window > starts at 1G border etc). Where's the alignment requirement coming from? > > cheers, > Gerd
I can't offer any opinion about the values you put into w32 and w64, but I have some remarks. First, please consider passing -O/path/to/some/order_file to git-format-patch, so that .h files show up at the top of each patch. On 05/30/13 13:07, Michael S. Tsirkin wrote: > Will be used to pass hole ranges to guests. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > hw/i386/pc.c | 39 ++++++++++++++++++++++++++++++++++++++- > hw/i386/pc_piix.c | 14 +++++++++++++- > hw/i386/pc_q35.c | 6 +++++- > hw/pci-host/q35.c | 4 ++++ > include/hw/i386/pc.h | 19 ++++++++++++++++++- > include/hw/pci-host/q35.h | 2 ++ > include/qemu/typedefs.h | 1 + > 7 files changed, 81 insertions(+), 4 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 4844a6b..c233161 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -978,6 +978,41 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) > } > } > > +typedef struct PcGuestInfoState { > + PcGuestInfo info; > + Notifier machine_done; > +} PcGuestInfoState; > + > +static > +void pc_guest_info_machine_done(Notifier *notifier, void *data) > +{ > + PcGuestInfoState *guest_info_state = container_of(notifier, > + PcGuestInfoState, > + machine_done); > +} > + > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > + ram_addr_t above_4g_mem_size) > +{ > + PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); > + PcGuestInfo *guest_info = &guest_info_state->info; > + > + guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS; > + if (sizeof(hwaddr) == 4) { > + guest_info->pci_info.w64.begin = 0; > + guest_info->pci_info.w64.end = 0; > + } else { > + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > + (0x1ULL << 62); > + assert(guest_info->pci_info.w64.begin <= guest_info->pci_info.w64.end); > + } > + > + guest_info_state->machine_done.notify = pc_guest_info_machine_done; > + qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); > + return guest_info; > +} > + > void pc_acpi_init(const char *default_dsdt) > { > char *filename; > @@ -1019,7 +1054,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > ram_addr_t below_4g_mem_size, > ram_addr_t above_4g_mem_size, > MemoryRegion *rom_memory, > - MemoryRegion **ram_memory) > + MemoryRegion **ram_memory, > + PcGuestInfo *guest_info) > { > int linux_boot, i; > MemoryRegion *ram, *option_rom_mr; > @@ -1071,6 +1107,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > for (i = 0; i < nb_option_roms; i++) { > rom_add_option(option_rom[i].name, option_rom[i].bootindex); > } > + guest_info->fw_cfg = fw_cfg; > return fw_cfg; > } I find this suboptimal style, ie. passing "guest_info" to pc_memory_init() just so that pc_memory_init() can set guest_info->fw_cfg to fw_cfg, when pc_memory_init() returns fw_cfg anyway. More "philosophically": > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index b4c8a74..1bf5219 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -9,8 +9,20 @@ > #include "net/net.h" > #include "hw/i386/ioapic.h" > > +#include "qemu/range.h" > + > /* PC-style peripherals (also used by other machines). */ > > +typedef struct PcPciInfo { > + Range w32; > + Range w64; > +} PcPciInfo; > + > +struct PcGuestInfo { > + PcPciInfo pci_info; > + FWCfgState *fw_cfg; > +}; Are you sure you need a private link to the global fw_cfg object? The pvpanic series added a global lookup possibility in commit 10a584b2 (object_resolve_path()). Anyway these are just subjective style notes. > + > /* parallel.c */ > static inline bool parallel_init(ISABus *bus, int index, CharDriverState *chr) > { > @@ -80,6 +92,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); > void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge); > void pc_hot_add_cpu(const int64_t id, Error **errp); > void pc_acpi_init(const char *default_dsdt); > + > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > + ram_addr_t above_4g_mem_size); > + > FWCfgState *pc_memory_init(MemoryRegion *system_memory, > const char *kernel_filename, > const char *kernel_cmdline, > @@ -87,7 +103,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > ram_addr_t below_4g_mem_size, > ram_addr_t above_4g_mem_size, > MemoryRegion *rom_memory, > - MemoryRegion **ram_memory); > + MemoryRegion **ram_memory, > + PcGuestInfo *guest_info); > qemu_irq *pc_allocate_cpu_irq(void); > DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); > void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, Going forward: > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index d547548..eaff0b6 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -90,6 +90,7 @@ static void pc_init1(MemoryRegion *system_memory, > MemoryRegion *rom_memory; > DeviceState *icc_bridge; > FWCfgState *fw_cfg = NULL; > + PcGuestInfo *guest_info; > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > object_property_add_child(qdev_get_machine(), "icc-bridge", > @@ -119,12 +120,23 @@ static void pc_init1(MemoryRegion *system_memory, > rom_memory = system_memory; > } > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > + > + /* Set PCI window size the way seabios has always done it. */ > + /* TODO: consider just starting at below_4g_mem_size */ > + if (ram_size <= 0x80000000) > + guest_info->pci_info.w32.begin = 0x80000000; > + else if (ram_size <= 0xc0000000) > + guest_info->pci_info.w32.begin = 0xc0000000; > + else > + guest_info->pci_info.w32.begin = 0xe0000000; > + > /* allocate ram and load rom/bios */ > if (!xen_enabled()) { > fw_cfg = pc_memory_init(system_memory, > kernel_filename, kernel_cmdline, initrd_filename, > below_4g_mem_size, above_4g_mem_size, > - rom_memory, &ram_memory); > + rom_memory, &ram_memory, guest_info); > } > > gsi_state = g_malloc0(sizeof(*gsi_state)); On PIIX you *almost* leak the guest_info structure at once; the only link to it is the machine-done-notifier registered in pc_guest_info_init(). Is this intended? (I guess a later patch in the series will change this.) > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > index 7888dfe..32d6357 100644 > --- a/hw/i386/pc_q35.c > +++ b/hw/i386/pc_q35.c > @@ -77,6 +77,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > ICH9LPCState *ich9_lpc; > PCIDevice *ahci; > DeviceState *icc_bridge; > + PcGuestInfo *guest_info; > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > object_property_add_child(qdev_get_machine(), "icc-bridge", > @@ -105,11 +106,13 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > rom_memory = get_system_memory(); > } > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > + > /* allocate ram and load rom/bios */ > if (!xen_enabled()) { > pc_memory_init(get_system_memory(), kernel_filename, kernel_cmdline, > initrd_filename, below_4g_mem_size, above_4g_mem_size, > - rom_memory, &ram_memory); > + rom_memory, &ram_memory, guest_info); > } > > /* irq lines */ > @@ -131,6 +134,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > q35_host->mch.address_space_io = get_system_io(); > q35_host->mch.below_4g_mem_size = below_4g_mem_size; > q35_host->mch.above_4g_mem_size = above_4g_mem_size; > + q35_host->mch.guest_info = guest_info; > /* pci */ > qdev_init_nofail(DEVICE(q35_host)); > host_bus = q35_host->host.pci.bus; OK, a direct (owner) link is established here; which gives birth to the next question: > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > index 24df6b5..63c64dd 100644 > --- a/hw/pci-host/q35.c > +++ b/hw/pci-host/q35.c > @@ -244,6 +244,10 @@ static int mch_init(PCIDevice *d) > hwaddr pci_hole64_size; > MCHPCIState *mch = MCH_PCI_DEVICE(d); > > + /* Leave enough space for the biggest MCFG BAR */ > + mch->guest_info->pci_info.w32.begin = MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + > + MCH_HOST_BRIDGE_PCIEXBAR_MAX; > + > /* setup pci memory regions */ > memory_region_init_alias(&mch->pci_hole, "pci-hole", > mch->pci_address_space, > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > index e182c82..b083831 100644 > --- a/include/hw/pci-host/q35.h > +++ b/include/hw/pci-host/q35.h > @@ -55,6 +55,7 @@ typedef struct MCHPCIState { > uint8_t smm_enabled; > ram_addr_t below_4g_mem_size; > ram_addr_t above_4g_mem_size; > + PcGuestInfo *guest_info; > } MCHPCIState; ... how does this relate to migration? (I'm not suggesting anything, I'm just curious.) Thanks, Laszlo
On 05/30/13 14:19, Michael S. Tsirkin wrote: > On Thu, May 30, 2013 at 02:16:13PM +0200, Gerd Hoffmann wrote: >> Hi, >> >>> + } else { >>> + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; >>> + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + >>> + (0x1ULL << 62); >> >> Doesn't this give unaligned windows? > > PCI Bridge windows do not need to be size aligned. > > In any case, the windows are *exactly* as calculated > by seabios - apparently it does not size-align windows either. Surely not. SeaBIOS sizes the 64bit window according to the space needed by the 64bit bars it wants to map there. >>> + /* Set PCI window size the way seabios has always done it. */ >>> + /* TODO: consider just starting at below_4g_mem_size */ >> >> Used to be that way. Was changed for alignment reasons (i.e. 1G window >> starts at 1G border etc). > > Where's the alignment requirement coming from? seabios creates a mtrr entry for the window, which doesn't work in case it isn't aligned (at least not with a single entry). Also real hardware tends to do it this way. cheers, Gerd
On Thu, May 30, 2013 at 02:25:41PM +0200, Laszlo Ersek wrote: > I can't offer any opinion about the values you put into w32 and w64, but I have some remarks. > > First, please consider passing -O/path/to/some/order_file to git-format-patch, so that .h files show up at the top of each patch. > > On 05/30/13 13:07, Michael S. Tsirkin wrote: > > Will be used to pass hole ranges to guests. > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > hw/i386/pc.c | 39 ++++++++++++++++++++++++++++++++++++++- > > hw/i386/pc_piix.c | 14 +++++++++++++- > > hw/i386/pc_q35.c | 6 +++++- > > hw/pci-host/q35.c | 4 ++++ > > include/hw/i386/pc.h | 19 ++++++++++++++++++- > > include/hw/pci-host/q35.h | 2 ++ > > include/qemu/typedefs.h | 1 + > > 7 files changed, 81 insertions(+), 4 deletions(-) > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 4844a6b..c233161 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -978,6 +978,41 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) > > } > > } > > > > +typedef struct PcGuestInfoState { > > + PcGuestInfo info; > > + Notifier machine_done; > > +} PcGuestInfoState; > > + > > +static > > +void pc_guest_info_machine_done(Notifier *notifier, void *data) > > +{ > > + PcGuestInfoState *guest_info_state = container_of(notifier, > > + PcGuestInfoState, > > + machine_done); > > +} > > + > > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > > + ram_addr_t above_4g_mem_size) > > +{ > > + PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); > > + PcGuestInfo *guest_info = &guest_info_state->info; > > + > > + guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS; > > + if (sizeof(hwaddr) == 4) { > > + guest_info->pci_info.w64.begin = 0; > > + guest_info->pci_info.w64.end = 0; > > + } else { > > + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > > + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > > + (0x1ULL << 62); > > + assert(guest_info->pci_info.w64.begin <= guest_info->pci_info.w64.end); > > + } > > + > > + guest_info_state->machine_done.notify = pc_guest_info_machine_done; > > + qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); > > + return guest_info; > > +} > > + > > void pc_acpi_init(const char *default_dsdt) > > { > > char *filename; > > @@ -1019,7 +1054,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > > ram_addr_t below_4g_mem_size, > > ram_addr_t above_4g_mem_size, > > MemoryRegion *rom_memory, > > - MemoryRegion **ram_memory) > > + MemoryRegion **ram_memory, > > + PcGuestInfo *guest_info) > > { > > int linux_boot, i; > > MemoryRegion *ram, *option_rom_mr; > > @@ -1071,6 +1107,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > > for (i = 0; i < nb_option_roms; i++) { > > rom_add_option(option_rom[i].name, option_rom[i].bootindex); > > } > > + guest_info->fw_cfg = fw_cfg; > > return fw_cfg; > > } > > I find this suboptimal style, ie. passing "guest_info" to pc_memory_init() just so that pc_memory_init() can set guest_info->fw_cfg to fw_cfg, when pc_memory_init() returns fw_cfg anyway. Well otherwise all callers have to remember to set it. > More "philosophically": > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > index b4c8a74..1bf5219 100644 > > --- a/include/hw/i386/pc.h > > +++ b/include/hw/i386/pc.h > > @@ -9,8 +9,20 @@ > > #include "net/net.h" > > #include "hw/i386/ioapic.h" > > > > +#include "qemu/range.h" > > + > > /* PC-style peripherals (also used by other machines). */ > > > > +typedef struct PcPciInfo { > > + Range w32; > > + Range w64; > > +} PcPciInfo; > > + > > +struct PcGuestInfo { > > + PcPciInfo pci_info; > > + FWCfgState *fw_cfg; > > +}; > > Are you sure you need a private link to the global fw_cfg object? The pvpanic series added a global lookup possibility in commit 10a584b2 (object_resolve_path()). > > Anyway these are just subjective style notes. Yes. I personally prefer not using global lookups: passing a pointer makes dependencies clearer IMO. If we do switch to that, I think it's cleaner to have a wrapper so that everyone does not need to hard-code strings like /machine/fw_cfg. > > + > > /* parallel.c */ > > static inline bool parallel_init(ISABus *bus, int index, CharDriverState *chr) > > { > > @@ -80,6 +92,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); > > void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge); > > void pc_hot_add_cpu(const int64_t id, Error **errp); > > void pc_acpi_init(const char *default_dsdt); > > + > > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > > + ram_addr_t above_4g_mem_size); > > + > > FWCfgState *pc_memory_init(MemoryRegion *system_memory, > > const char *kernel_filename, > > const char *kernel_cmdline, > > @@ -87,7 +103,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > > ram_addr_t below_4g_mem_size, > > ram_addr_t above_4g_mem_size, > > MemoryRegion *rom_memory, > > - MemoryRegion **ram_memory); > > + MemoryRegion **ram_memory, > > + PcGuestInfo *guest_info); > > qemu_irq *pc_allocate_cpu_irq(void); > > DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); > > void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, > > Going forward: > > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > index d547548..eaff0b6 100644 > > --- a/hw/i386/pc_piix.c > > +++ b/hw/i386/pc_piix.c > > @@ -90,6 +90,7 @@ static void pc_init1(MemoryRegion *system_memory, > > MemoryRegion *rom_memory; > > DeviceState *icc_bridge; > > FWCfgState *fw_cfg = NULL; > > + PcGuestInfo *guest_info; > > > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > > object_property_add_child(qdev_get_machine(), "icc-bridge", > > @@ -119,12 +120,23 @@ static void pc_init1(MemoryRegion *system_memory, > > rom_memory = system_memory; > > } > > > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > > + > > + /* Set PCI window size the way seabios has always done it. */ > > + /* TODO: consider just starting at below_4g_mem_size */ > > + if (ram_size <= 0x80000000) > > + guest_info->pci_info.w32.begin = 0x80000000; > > + else if (ram_size <= 0xc0000000) > > + guest_info->pci_info.w32.begin = 0xc0000000; > > + else > > + guest_info->pci_info.w32.begin = 0xe0000000; > > + > > /* allocate ram and load rom/bios */ > > if (!xen_enabled()) { > > fw_cfg = pc_memory_init(system_memory, > > kernel_filename, kernel_cmdline, initrd_filename, > > below_4g_mem_size, above_4g_mem_size, > > - rom_memory, &ram_memory); > > + rom_memory, &ram_memory, guest_info); > > } > > > > gsi_state = g_malloc0(sizeof(*gsi_state)); > > On PIIX you *almost* leak the guest_info structure at once; the only link to it is the machine-done-notifier registered in pc_guest_info_init(). Is this intended? (I guess a later patch in the series will change this.) Yes. Machine done notifiers generally aren't cleaned up: there is qemu_add_machine_init_done_notifier but not qemu_del_machine_init_done_notifier, so there's no way to free it. Same applies to all other such notifiers. > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > > index 7888dfe..32d6357 100644 > > --- a/hw/i386/pc_q35.c > > +++ b/hw/i386/pc_q35.c > > @@ -77,6 +77,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > ICH9LPCState *ich9_lpc; > > PCIDevice *ahci; > > DeviceState *icc_bridge; > > + PcGuestInfo *guest_info; > > > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > > object_property_add_child(qdev_get_machine(), "icc-bridge", > > @@ -105,11 +106,13 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > rom_memory = get_system_memory(); > > } > > > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > > + > > /* allocate ram and load rom/bios */ > > if (!xen_enabled()) { > > pc_memory_init(get_system_memory(), kernel_filename, kernel_cmdline, > > initrd_filename, below_4g_mem_size, above_4g_mem_size, > > - rom_memory, &ram_memory); > > + rom_memory, &ram_memory, guest_info); > > } > > > > /* irq lines */ > > @@ -131,6 +134,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > q35_host->mch.address_space_io = get_system_io(); > > q35_host->mch.below_4g_mem_size = below_4g_mem_size; > > q35_host->mch.above_4g_mem_size = above_4g_mem_size; > > + q35_host->mch.guest_info = guest_info; > > /* pci */ > > qdev_init_nofail(DEVICE(q35_host)); > > host_bus = q35_host->host.pci.bus; > > OK, a direct (owner) link is established here; which gives birth to the next question: > > > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > > index 24df6b5..63c64dd 100644 > > --- a/hw/pci-host/q35.c > > +++ b/hw/pci-host/q35.c > > @@ -244,6 +244,10 @@ static int mch_init(PCIDevice *d) > > hwaddr pci_hole64_size; > > MCHPCIState *mch = MCH_PCI_DEVICE(d); > > > > + /* Leave enough space for the biggest MCFG BAR */ > > + mch->guest_info->pci_info.w32.begin = MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + > > + MCH_HOST_BRIDGE_PCIEXBAR_MAX; > > + > > /* setup pci memory regions */ > > memory_region_init_alias(&mch->pci_hole, "pci-hole", > > mch->pci_address_space, > > > > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > > index e182c82..b083831 100644 > > --- a/include/hw/pci-host/q35.h > > +++ b/include/hw/pci-host/q35.h > > @@ -55,6 +55,7 @@ typedef struct MCHPCIState { > > uint8_t smm_enabled; > > ram_addr_t below_4g_mem_size; > > ram_addr_t above_4g_mem_size; > > + PcGuestInfo *guest_info; > > } MCHPCIState; > > ... how does this relate to migration? (I'm not suggesting anything, I'm just curious.) > > Thanks, > Laszlo As far as I can tell it doesn't relate to migration in any way.
On Thu, May 30, 2013 at 02:32:01PM +0200, Gerd Hoffmann wrote: > On 05/30/13 14:19, Michael S. Tsirkin wrote: > > On Thu, May 30, 2013 at 02:16:13PM +0200, Gerd Hoffmann wrote: > >> Hi, > >> > >>> + } else { > >>> + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > >>> + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > >>> + (0x1ULL << 62); > >> > >> Doesn't this give unaligned windows? > > > > PCI Bridge windows do not need to be size aligned. > > > > In any case, the windows are *exactly* as calculated > > by seabios - apparently it does not size-align windows either. > > Surely not. SeaBIOS sizes the 64bit window according to the space > needed by the 64bit bars it wants to map there. Ah, it's 64 bit. True. That's a seabios bug by the way: if we add more devices by hotplug later, we want more pci memory. > >>> + /* Set PCI window size the way seabios has always done it. */ > >>> + /* TODO: consider just starting at below_4g_mem_size */ > >> > >> Used to be that way. Was changed for alignment reasons (i.e. 1G window > >> starts at 1G border etc). > > > > Where's the alignment requirement coming from? > > seabios creates a mtrr entry for the window, which doesn't work in case > it isn't aligned (at least not with a single entry). > > Also real hardware tends to do it this way. > > cheers, > Gerd I see. I'll figure out the details and add a comment to this end. But that's for the 32 bit window - I don't see it playing with mtrrs for the 64 bit ranges. So I'm guessing alignment isn't needed there, right?
On Thu, May 30, 2013 at 02:07:19PM +0300, Michael S. Tsirkin wrote: > Will be used to pass hole ranges to guests. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > --- > hw/i386/pc.c | 39 ++++++++++++++++++++++++++++++++++++++- > hw/i386/pc_piix.c | 14 +++++++++++++- > hw/i386/pc_q35.c | 6 +++++- > hw/pci-host/q35.c | 4 ++++ > include/hw/i386/pc.h | 19 ++++++++++++++++++- > include/hw/pci-host/q35.h | 2 ++ > include/qemu/typedefs.h | 1 + > 7 files changed, 81 insertions(+), 4 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 4844a6b..c233161 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -978,6 +978,41 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) > } > } > > +typedef struct PcGuestInfoState { > + PcGuestInfo info; > + Notifier machine_done; > +} PcGuestInfoState; > + > +static > +void pc_guest_info_machine_done(Notifier *notifier, void *data) > +{ > + PcGuestInfoState *guest_info_state = container_of(notifier, > + PcGuestInfoState, > + machine_done); > +} > + > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > + ram_addr_t above_4g_mem_size) > +{ > + PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); > + PcGuestInfo *guest_info = &guest_info_state->info; > + > + guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS; > + if (sizeof(hwaddr) == 4) { > + guest_info->pci_info.w64.begin = 0; > + guest_info->pci_info.w64.end = 0; > + } else { > + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; > + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + > + (0x1ULL << 62); > + assert(guest_info->pci_info.w64.begin <= guest_info->pci_info.w64.end); It would be better to do this in range_set(), thus users don't need to do it themselves everywhere. > + } > + > + guest_info_state->machine_done.notify = pc_guest_info_machine_done; > + qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); > + return guest_info; > +} > + > void pc_acpi_init(const char *default_dsdt) > { > char *filename; > @@ -1019,7 +1054,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > ram_addr_t below_4g_mem_size, > ram_addr_t above_4g_mem_size, > MemoryRegion *rom_memory, > - MemoryRegion **ram_memory) > + MemoryRegion **ram_memory, > + PcGuestInfo *guest_info) > { > int linux_boot, i; > MemoryRegion *ram, *option_rom_mr; > @@ -1071,6 +1107,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > for (i = 0; i < nb_option_roms; i++) { > rom_add_option(option_rom[i].name, option_rom[i].bootindex); > } > + guest_info->fw_cfg = fw_cfg; > return fw_cfg; > } > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index d547548..eaff0b6 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -90,6 +90,7 @@ static void pc_init1(MemoryRegion *system_memory, > MemoryRegion *rom_memory; > DeviceState *icc_bridge; > FWCfgState *fw_cfg = NULL; > + PcGuestInfo *guest_info; > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > object_property_add_child(qdev_get_machine(), "icc-bridge", > @@ -119,12 +120,23 @@ static void pc_init1(MemoryRegion *system_memory, > rom_memory = system_memory; > } > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > + > + /* Set PCI window size the way seabios has always done it. */ > + /* TODO: consider just starting at below_4g_mem_size */ > + if (ram_size <= 0x80000000) > + guest_info->pci_info.w32.begin = 0x80000000; > + else if (ram_size <= 0xc0000000) > + guest_info->pci_info.w32.begin = 0xc0000000; > + else > + guest_info->pci_info.w32.begin = 0xe0000000; > + > /* allocate ram and load rom/bios */ > if (!xen_enabled()) { > fw_cfg = pc_memory_init(system_memory, > kernel_filename, kernel_cmdline, initrd_filename, > below_4g_mem_size, above_4g_mem_size, > - rom_memory, &ram_memory); > + rom_memory, &ram_memory, guest_info); > } > > gsi_state = g_malloc0(sizeof(*gsi_state)); > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > index 7888dfe..32d6357 100644 > --- a/hw/i386/pc_q35.c > +++ b/hw/i386/pc_q35.c > @@ -77,6 +77,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > ICH9LPCState *ich9_lpc; > PCIDevice *ahci; > DeviceState *icc_bridge; > + PcGuestInfo *guest_info; > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > object_property_add_child(qdev_get_machine(), "icc-bridge", > @@ -105,11 +106,13 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > rom_memory = get_system_memory(); > } > > + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); > + > /* allocate ram and load rom/bios */ > if (!xen_enabled()) { > pc_memory_init(get_system_memory(), kernel_filename, kernel_cmdline, > initrd_filename, below_4g_mem_size, above_4g_mem_size, > - rom_memory, &ram_memory); > + rom_memory, &ram_memory, guest_info); > } > > /* irq lines */ > @@ -131,6 +134,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > q35_host->mch.address_space_io = get_system_io(); > q35_host->mch.below_4g_mem_size = below_4g_mem_size; > q35_host->mch.above_4g_mem_size = above_4g_mem_size; > + q35_host->mch.guest_info = guest_info; > /* pci */ > qdev_init_nofail(DEVICE(q35_host)); > host_bus = q35_host->host.pci.bus; > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > index 24df6b5..63c64dd 100644 > --- a/hw/pci-host/q35.c > +++ b/hw/pci-host/q35.c > @@ -244,6 +244,10 @@ static int mch_init(PCIDevice *d) > hwaddr pci_hole64_size; > MCHPCIState *mch = MCH_PCI_DEVICE(d); > > + /* Leave enough space for the biggest MCFG BAR */ > + mch->guest_info->pci_info.w32.begin = MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + > + MCH_HOST_BRIDGE_PCIEXBAR_MAX; > + I'm not sure if this is correct, but mch_update_pciexbar() will update the length according to the length field of PCIEXBAR. And, is PCI Express configuration space not considered pci hole? > /* setup pci memory regions */ > memory_region_init_alias(&mch->pci_hole, "pci-hole", > mch->pci_address_space, > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index b4c8a74..1bf5219 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -9,8 +9,20 @@ > #include "net/net.h" > #include "hw/i386/ioapic.h" > > +#include "qemu/range.h" > + > /* PC-style peripherals (also used by other machines). */ > > +typedef struct PcPciInfo { > + Range w32; > + Range w64; > +} PcPciInfo; PcPciHole as the Subject suggests? > + > +struct PcGuestInfo { > + PcPciInfo pci_info; > + FWCfgState *fw_cfg; > +}; > + > /* parallel.c */ > static inline bool parallel_init(ISABus *bus, int index, CharDriverState *chr) > { > @@ -80,6 +92,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); > void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge); > void pc_hot_add_cpu(const int64_t id, Error **errp); > void pc_acpi_init(const char *default_dsdt); > + > +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > + ram_addr_t above_4g_mem_size); > + > FWCfgState *pc_memory_init(MemoryRegion *system_memory, > const char *kernel_filename, > const char *kernel_cmdline, > @@ -87,7 +103,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > ram_addr_t below_4g_mem_size, > ram_addr_t above_4g_mem_size, > MemoryRegion *rom_memory, > - MemoryRegion **ram_memory); > + MemoryRegion **ram_memory, > + PcGuestInfo *guest_info); > qemu_irq *pc_allocate_cpu_irq(void); > DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); > void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > index e182c82..b083831 100644 > --- a/include/hw/pci-host/q35.h > +++ b/include/hw/pci-host/q35.h > @@ -55,6 +55,7 @@ typedef struct MCHPCIState { > uint8_t smm_enabled; > ram_addr_t below_4g_mem_size; > ram_addr_t above_4g_mem_size; > + PcGuestInfo *guest_info; > } MCHPCIState; > > typedef struct Q35PCIHost { > @@ -81,6 +82,7 @@ typedef struct Q35PCIHost { > #define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */ > #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */ > #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000 > +#define MCH_HOST_BRIDGE_PCIEXBAR_MAX (0x10000000) /* 256M */ > #define MCH_HOST_BRIDGE_PCIEXBAR_ADMSK Q35_MASK(64, 35, 28) > #define MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK ((uint64_t)(1 << 26)) > #define MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK ((uint64_t)(1 << 25)) > diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h > index afe4ec7..ec0d0d1 100644 > --- a/include/qemu/typedefs.h > +++ b/include/qemu/typedefs.h > @@ -62,5 +62,6 @@ typedef struct VirtIODevice VirtIODevice; > typedef struct QEMUSGList QEMUSGList; > typedef struct SHPCDevice SHPCDevice; > typedef struct FWCfgState FWCfgState; > +typedef struct PcGuestInfo PcGuestInfo; > > #endif /* QEMU_TYPEDEFS_H */ > -- > MST >
Hi, > I see. I'll figure out the details and add a comment to this end. > > But that's for the 32 bit window - I don't see it playing > with mtrrs for the 64 bit ranges. > So I'm guessing alignment isn't needed there, right? mtrr's are a 32bit thing anyway IIRC. I still would place the 64bit window aligned. Given that 36 physical address lines (-> 64G address space) used to be quite common until recently I think reserving 16G from 64G downwards would be a good pick. For guests with *lots* of memory we could move the hole to be just below 1T and maybe also make it larger. cheers, Gerd
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4844a6b..c233161 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -978,6 +978,41 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) } } +typedef struct PcGuestInfoState { + PcGuestInfo info; + Notifier machine_done; +} PcGuestInfoState; + +static +void pc_guest_info_machine_done(Notifier *notifier, void *data) +{ + PcGuestInfoState *guest_info_state = container_of(notifier, + PcGuestInfoState, + machine_done); +} + +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size) +{ + PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); + PcGuestInfo *guest_info = &guest_info_state->info; + + guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS; + if (sizeof(hwaddr) == 4) { + guest_info->pci_info.w64.begin = 0; + guest_info->pci_info.w64.end = 0; + } else { + guest_info->pci_info.w64.begin = 0x100000000ULL + above_4g_mem_size; + guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin + + (0x1ULL << 62); + assert(guest_info->pci_info.w64.begin <= guest_info->pci_info.w64.end); + } + + guest_info_state->machine_done.notify = pc_guest_info_machine_done; + qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); + return guest_info; +} + void pc_acpi_init(const char *default_dsdt) { char *filename; @@ -1019,7 +1054,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, - MemoryRegion **ram_memory) + MemoryRegion **ram_memory, + PcGuestInfo *guest_info) { int linux_boot, i; MemoryRegion *ram, *option_rom_mr; @@ -1071,6 +1107,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, for (i = 0; i < nb_option_roms; i++) { rom_add_option(option_rom[i].name, option_rom[i].bootindex); } + guest_info->fw_cfg = fw_cfg; return fw_cfg; } diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index d547548..eaff0b6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -90,6 +90,7 @@ static void pc_init1(MemoryRegion *system_memory, MemoryRegion *rom_memory; DeviceState *icc_bridge; FWCfgState *fw_cfg = NULL; + PcGuestInfo *guest_info; icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); object_property_add_child(qdev_get_machine(), "icc-bridge", @@ -119,12 +120,23 @@ static void pc_init1(MemoryRegion *system_memory, rom_memory = system_memory; } + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); + + /* Set PCI window size the way seabios has always done it. */ + /* TODO: consider just starting at below_4g_mem_size */ + if (ram_size <= 0x80000000) + guest_info->pci_info.w32.begin = 0x80000000; + else if (ram_size <= 0xc0000000) + guest_info->pci_info.w32.begin = 0xc0000000; + else + guest_info->pci_info.w32.begin = 0xe0000000; + /* allocate ram and load rom/bios */ if (!xen_enabled()) { fw_cfg = pc_memory_init(system_memory, kernel_filename, kernel_cmdline, initrd_filename, below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory); + rom_memory, &ram_memory, guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 7888dfe..32d6357 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -77,6 +77,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) ICH9LPCState *ich9_lpc; PCIDevice *ahci; DeviceState *icc_bridge; + PcGuestInfo *guest_info; icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); object_property_add_child(qdev_get_machine(), "icc-bridge", @@ -105,11 +106,13 @@ static void pc_q35_init(QEMUMachineInitArgs *args) rom_memory = get_system_memory(); } + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); + /* allocate ram and load rom/bios */ if (!xen_enabled()) { pc_memory_init(get_system_memory(), kernel_filename, kernel_cmdline, initrd_filename, below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory); + rom_memory, &ram_memory, guest_info); } /* irq lines */ @@ -131,6 +134,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) q35_host->mch.address_space_io = get_system_io(); q35_host->mch.below_4g_mem_size = below_4g_mem_size; q35_host->mch.above_4g_mem_size = above_4g_mem_size; + q35_host->mch.guest_info = guest_info; /* pci */ qdev_init_nofail(DEVICE(q35_host)); host_bus = q35_host->host.pci.bus; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 24df6b5..63c64dd 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -244,6 +244,10 @@ static int mch_init(PCIDevice *d) hwaddr pci_hole64_size; MCHPCIState *mch = MCH_PCI_DEVICE(d); + /* Leave enough space for the biggest MCFG BAR */ + mch->guest_info->pci_info.w32.begin = MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + + MCH_HOST_BRIDGE_PCIEXBAR_MAX; + /* setup pci memory regions */ memory_region_init_alias(&mch->pci_hole, "pci-hole", mch->pci_address_space, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index b4c8a74..1bf5219 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -9,8 +9,20 @@ #include "net/net.h" #include "hw/i386/ioapic.h" +#include "qemu/range.h" + /* PC-style peripherals (also used by other machines). */ +typedef struct PcPciInfo { + Range w32; + Range w64; +} PcPciInfo; + +struct PcGuestInfo { + PcPciInfo pci_info; + FWCfgState *fw_cfg; +}; + /* parallel.c */ static inline bool parallel_init(ISABus *bus, int index, CharDriverState *chr) { @@ -80,6 +92,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge); void pc_hot_add_cpu(const int64_t id, Error **errp); void pc_acpi_init(const char *default_dsdt); + +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size); + FWCfgState *pc_memory_init(MemoryRegion *system_memory, const char *kernel_filename, const char *kernel_cmdline, @@ -87,7 +103,8 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *rom_memory, - MemoryRegion **ram_memory); + MemoryRegion **ram_memory, + PcGuestInfo *guest_info); qemu_irq *pc_allocate_cpu_irq(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index e182c82..b083831 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -55,6 +55,7 @@ typedef struct MCHPCIState { uint8_t smm_enabled; ram_addr_t below_4g_mem_size; ram_addr_t above_4g_mem_size; + PcGuestInfo *guest_info; } MCHPCIState; typedef struct Q35PCIHost { @@ -81,6 +82,7 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_PCIEXBAR 0x60 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE 8 /* 64bit register */ #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT 0xb0000000 +#define MCH_HOST_BRIDGE_PCIEXBAR_MAX (0x10000000) /* 256M */ #define MCH_HOST_BRIDGE_PCIEXBAR_ADMSK Q35_MASK(64, 35, 28) #define MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK ((uint64_t)(1 << 26)) #define MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK ((uint64_t)(1 << 25)) diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index afe4ec7..ec0d0d1 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -62,5 +62,6 @@ typedef struct VirtIODevice VirtIODevice; typedef struct QEMUSGList QEMUSGList; typedef struct SHPCDevice SHPCDevice; typedef struct FWCfgState FWCfgState; +typedef struct PcGuestInfo PcGuestInfo; #endif /* QEMU_TYPEDEFS_H */
Will be used to pass hole ranges to guests. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- hw/i386/pc.c | 39 ++++++++++++++++++++++++++++++++++++++- hw/i386/pc_piix.c | 14 +++++++++++++- hw/i386/pc_q35.c | 6 +++++- hw/pci-host/q35.c | 4 ++++ include/hw/i386/pc.h | 19 ++++++++++++++++++- include/hw/pci-host/q35.h | 2 ++ include/qemu/typedefs.h | 1 + 7 files changed, 81 insertions(+), 4 deletions(-)