Message ID | 1331889732-25240-1-git-send-email-chenhui.zhao@freescale.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Kumar Gala |
Headers | show |
On 03/16/2012 04:22 AM, Zhao Chenhui wrote: > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 4eecaaa..3d4c497 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -219,7 +219,8 @@ config ARCH_HIBERNATION_POSSIBLE > config ARCH_SUSPEND_POSSIBLE > def_bool y > depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \ > - (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x > + (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \ > + || 44x || 40x > > config PPC_DCR_NATIVE > bool > @@ -330,7 +331,8 @@ config SWIOTLB > > config HOTPLUG_CPU > bool "Support for enabling/disabling CPUs" > - depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_POWERNV) > + depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \ > + PPC_PMAC || PPC_POWERNV || PPC_85xx) No e500mc exclusion on HOTPLUG_CPU? I don't see where this depends on ARCH_SUSPEND_POSSIBLE. > ---help--- > Say Y here to be able to disable and re-enable individual > CPUs at runtime on SMP machines. > diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h > index ab9e402..57b5dd7 100644 > --- a/arch/powerpc/include/asm/cacheflush.h > +++ b/arch/powerpc/include/asm/cacheflush.h > @@ -30,6 +30,12 @@ extern void flush_dcache_page(struct page *page); > #define flush_dcache_mmap_lock(mapping) do { } while (0) > #define flush_dcache_mmap_unlock(mapping) do { } while (0) > > +#ifdef CONFIG_FSL_BOOKE > +extern void flush_disable_L1(void); > +#else > +#define flush_disable_L1() do { } while (0) > +#endif When would we want this to be a no-op? Shouldn't you get an error if you try to do this without an implementation, rather than silently corrupt your cache? There's an existing __flush_disable_L1() for 6xx. Let's not introduce a different spelling of the same thing for no good reason -- even if those leading underscores are annoying and pointless. :-) > +#ifdef CONFIG_HOTPLUG_CPU > + /* Corresponding to generic_set_cpu_dead() */ > + generic_set_cpu_up(nr); > + > + if (system_state == SYSTEM_RUNNING) { > + out_be32(&spin_table->addr_l, 0); > + > + /* > + * We don't set the BPTR register here upon it points > + * to the boot page properly. > + */ > + mpic_reset_core(hw_cpu); What if we don't have an MPIC? What if MPIC support isn't present in the kernel, even if we never run this code? Also, can you limit the hard core reset to cases that really need it? > struct smp_ops_t smp_85xx_ops = { > .kick_cpu = smp_85xx_kick_cpu, > -#ifdef CONFIG_KEXEC > +#ifdef CONFIG_HOTPLUG_CPU > + .cpu_disable = generic_cpu_disable, > + .cpu_die = generic_cpu_die, > +#endif > .give_timebase = smp_generic_give_timebase, > .take_timebase = smp_generic_take_timebase, > -#endif > }; We need to stop using smp_generic_give/take_timebase, not expand its use. This stuff breaks under hypervisors where timebase can't be written. It wasn't too bad before since we generally didn't enable CONFIG_KEXEC, but we're more likely to want CONFIG_HOTPLUG_CPU. Do the timebase sync the way U-Boot does -- if you find the appropriate guts node in the device tree. > > #ifdef CONFIG_KEXEC > @@ -218,8 +283,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) > } > #endif /* CONFIG_KEXEC */ > > -static void __init > -smp_85xx_setup_cpu(int cpu_nr) > +static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) > { > if (smp_85xx_ops.probe == smp_mpic_probe) > mpic_setup_this_cpu(); > @@ -249,6 +313,10 @@ void __init mpc85xx_smp_init(void) > smp_85xx_ops.cause_ipi = doorbell_cause_ipi; > } > > +#ifdef CONFIG_HOTPLUG_CPU > + ppc_md.cpu_die = smp_85xx_mach_cpu_die; > +#endif Do not set this unconditionally without checking that you're on e500v1/e500v2 (or at least that you have the NAP cputable flag, and an MPIC if you're going to rely on that). The kconfig exclusion is at best a temporary hack. -Scott
> > struct smp_ops_t smp_85xx_ops = { > > .kick_cpu = smp_85xx_kick_cpu, > > -#ifdef CONFIG_KEXEC > > +#ifdef CONFIG_HOTPLUG_CPU > > + .cpu_disable = generic_cpu_disable, > > + .cpu_die = generic_cpu_die, > > +#endif > > .give_timebase = smp_generic_give_timebase, > > .take_timebase = smp_generic_take_timebase, > > -#endif > > }; > > We need to stop using smp_generic_give/take_timebase, not expand its use. > This stuff breaks under hypervisors where timebase can't be written. It > wasn't too bad before since we generally didn't enable CONFIG_KEXEC, but > we're more likely to want CONFIG_HOTPLUG_CPU. I understand that the guest OS shouldn't change the real timebase. But no matter what timebase syncing method we are using, the timebase need to be changed anyway for certain features. I think the better way should be trapping timebase modification in the hypervisor. > > Do the timebase sync the way U-Boot does -- if you find the appropriate > guts node in the device tree. That involves stopping timebase for a short time on all cores including the cores that are still online. Won't this be a potential issue? - Leo
On 04/17/2012 04:51 AM, Li Yang-R58472 wrote: >>> struct smp_ops_t smp_85xx_ops = { >>> .kick_cpu = smp_85xx_kick_cpu, >>> -#ifdef CONFIG_KEXEC >>> +#ifdef CONFIG_HOTPLUG_CPU >>> + .cpu_disable = generic_cpu_disable, >>> + .cpu_die = generic_cpu_die, >>> +#endif >>> .give_timebase = smp_generic_give_timebase, >>> .take_timebase = smp_generic_take_timebase, >>> -#endif >>> }; >> >> We need to stop using smp_generic_give/take_timebase, not expand its use. >> This stuff breaks under hypervisors where timebase can't be written. It >> wasn't too bad before since we generally didn't enable CONFIG_KEXEC, but >> we're more likely to want CONFIG_HOTPLUG_CPU. > > I understand that the guest OS shouldn't change the real timebase. Cannot change it, and we've seen the tbsync code loop forever when it tries (since the changes aren't taking effect). > But no matter what timebase syncing method we are using, the timebase need to be changed anyway for certain features. That's why I said to do it the way U-Boot does it. > I think the better way should be trapping timebase modification in the hypervisor. It does trap. Currently we treat it as a no-op. The only reasonable alternative is to give the guest an exception. It is simply not allowed for a guest to modify the timebase -- we are not going to break the host's timebase sync. See the virtualized implementation note in section 9.2.1 of book III-E of Power ISA 2.06B: "In virtualized implementations, TBU and TBL are read-only." >> Do the timebase sync the way U-Boot does -- if you find the appropriate >> guts node in the device tree. > > That involves stopping timebase for a short time on all cores including the cores that are still online. Won't this be a potential issue? I don't think it's a big deal in the contexts where you'd be doing this -- at least not worse than the current situation. Just make sure that you don't reset the timebase to zero or otherwise make a core see the timebase go backward. -Scott
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4eecaaa..3d4c497 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -219,7 +219,8 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \ - (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x + (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \ + || 44x || 40x config PPC_DCR_NATIVE bool @@ -330,7 +331,8 @@ config SWIOTLB config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" - depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_POWERNV) + depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \ + PPC_PMAC || PPC_POWERNV || PPC_85xx) ---help--- Say Y here to be able to disable and re-enable individual CPUs at runtime on SMP machines. diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ab9e402..57b5dd7 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,6 +30,12 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) +#ifdef CONFIG_FSL_BOOKE +extern void flush_disable_L1(void); +#else +#define flush_disable_L1() do { } while (0) +#endif + extern void __flush_icache_range(unsigned long, unsigned long); static inline void flush_icache_range(unsigned long start, unsigned long stop) { diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index adba970..7517863 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -65,6 +65,7 @@ int generic_cpu_disable(void); void generic_cpu_die(unsigned int cpu); void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); +void generic_set_cpu_up(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); #endif @@ -191,6 +192,7 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern void __early_start(void); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 28e6259..e654f97 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1004,6 +1004,34 @@ _GLOBAL(flush_dcache_L1) blr +/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */ +_GLOBAL(flush_disable_L1) + mflr r10 + bl flush_dcache_L1 /* Flush L1 d-cache */ + mtlr r10 + + mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */ + li r5, 2 + rlwimi r4, r5, 0, 3 + + msync + isync + mtspr SPRN_L1CSR0, r4 + isync + +1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */ + andi. r4, r4, 2 + bne 1b + + mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */ + li r5, 2 + rlwimi r4, r5, 0, 3 + + mtspr SPRN_L1CSR1, r4 + isync + + blr + #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46695fe..cc18a70 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -423,6 +423,16 @@ void generic_set_cpu_dead(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_DEAD; } +/* + * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise + * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), + * which makes the delay in generic_cpu_die() not happen. + */ +void generic_set_cpu_up(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; +} + int generic_check_cpu_restart(unsigned int cpu) { return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index ff42490..f24b2b4 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -2,7 +2,7 @@ * Author: Andy Fleming <afleming@freescale.com> * Kumar Gala <galak@kernel.crashing.org> * - * Copyright 2006-2008, 2011 Freescale Semiconductor Inc. + * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -17,6 +17,7 @@ #include <linux/of.h> #include <linux/kexec.h> #include <linux/highmem.h> +#include <linux/cpu.h> #include <asm/machdep.h> #include <asm/pgtable.h> @@ -29,28 +30,59 @@ #include <sysdev/mpic.h> #include "smp.h" -extern void __early_start(void); - -#define BOOT_ENTRY_ADDR_UPPER 0 -#define BOOT_ENTRY_ADDR_LOWER 1 -#define BOOT_ENTRY_R3_UPPER 2 -#define BOOT_ENTRY_R3_LOWER 3 -#define BOOT_ENTRY_RESV 4 -#define BOOT_ENTRY_PIR 5 -#define BOOT_ENTRY_R6_UPPER 6 -#define BOOT_ENTRY_R6_LOWER 7 -#define NUM_BOOT_ENTRY 8 -#define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32)) - -static int __init -smp_85xx_kick_cpu(int nr) +struct epapr_spin_table { + u32 addr_h; + u32 addr_l; + u32 r3_h; + u32 r3_l; + u32 reserved; + u32 pir; +}; + +static void __cpuinit smp_85xx_setup_cpu(int cpu_nr); + +#ifdef CONFIG_HOTPLUG_CPU +static void __cpuinit smp_85xx_mach_cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + u32 tmp; + + local_irq_disable(); + idle_task_exit(); + generic_set_cpu_dead(cpu); + mb(); + + mtspr(SPRN_TCR, 0); + + flush_disable_L1(); + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; + mb(); + isync(); + mtspr(SPRN_HID0, tmp); + isync(); + + /* Enter NAP mode. */ + tmp = mfmsr(); + tmp |= MSR_WE; + mb(); + mtmsr(tmp); + isync(); + + while (1) + ; +} +#endif + +static int __cpuinit smp_85xx_kick_cpu(int nr) + { unsigned long flags; const u64 *cpu_rel_addr; - __iomem u32 *bptr_vaddr; + __iomem struct epapr_spin_table *spin_table; struct device_node *np; - int n = 0, hw_cpu = get_hard_smp_processor_id(nr); + int hw_cpu = get_hard_smp_processor_id(nr); int ioremappable; + int ret = 0; WARN_ON(nr < 0 || nr >= NR_CPUS); WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS); @@ -75,50 +107,83 @@ smp_85xx_kick_cpu(int nr) /* Map the spin table */ if (ioremappable) - bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY); + spin_table = ioremap(*cpu_rel_addr, + sizeof(struct epapr_spin_table)); else - bptr_vaddr = phys_to_virt(*cpu_rel_addr); + spin_table = phys_to_virt(*cpu_rel_addr); local_irq_save(flags); - - out_be32(bptr_vaddr + BOOT_ENTRY_PIR, hw_cpu); #ifdef CONFIG_PPC32 - out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start)); +#ifdef CONFIG_HOTPLUG_CPU + /* Corresponding to generic_set_cpu_dead() */ + generic_set_cpu_up(nr); + + if (system_state == SYSTEM_RUNNING) { + out_be32(&spin_table->addr_l, 0); + + /* + * We don't set the BPTR register here upon it points + * to the boot page properly. + */ + mpic_reset_core(hw_cpu); + + /* wait until core is ready... */ + if (!spin_event_timeout(in_be32(&spin_table->addr_l) == 1, + 10000, 100)) { + pr_err("%s: timeout waiting for core %d to reset\n", + __func__, hw_cpu); + ret = -ENOENT; + goto out; + } + + /* clear the acknowledge status */ + __secondary_hold_acknowledge = -1; + } +#endif + out_be32(&spin_table->pir, hw_cpu); + out_be32(&spin_table->addr_l, __pa(__early_start)); if (!ioremappable) - flush_dcache_range((ulong)bptr_vaddr, - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY)); + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); /* Wait a bit for the CPU to ack. */ - while ((__secondary_hold_acknowledge != hw_cpu) && (++n < 1000)) - mdelay(1); + if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu, + 10000, 100)) { + pr_err("%s: timeout waiting for core %d to ack\n", + __func__, hw_cpu); + ret = -ENOENT; + goto out; + } +out: #else smp_generic_kick_cpu(nr); - out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER), - __pa((u64)*((unsigned long long *) generic_secondary_smp_init))); + out_be32(&spin_table->pir, hw_cpu); + out_be64((u64 *)(&spin_table->addr_h), + __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); if (!ioremappable) - flush_dcache_range((ulong)bptr_vaddr, - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY)); + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); #endif local_irq_restore(flags); if (ioremappable) - iounmap(bptr_vaddr); + iounmap(spin_table); - pr_debug("waited %d msecs for CPU #%d.\n", n, nr); - - return 0; + return ret; } struct smp_ops_t smp_85xx_ops = { .kick_cpu = smp_85xx_kick_cpu, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = generic_cpu_disable, + .cpu_die = generic_cpu_die, +#endif .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, -#endif }; #ifdef CONFIG_KEXEC @@ -218,8 +283,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) } #endif /* CONFIG_KEXEC */ -static void __init -smp_85xx_setup_cpu(int cpu_nr) +static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) { if (smp_85xx_ops.probe == smp_mpic_probe) mpic_setup_this_cpu(); @@ -249,6 +313,10 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.cause_ipi = doorbell_cause_ipi; } +#ifdef CONFIG_HOTPLUG_CPU + ppc_md.cpu_die = smp_85xx_mach_cpu_die; +#endif + smp_ops = &smp_85xx_ops; #ifdef CONFIG_KEXEC