Message ID | 20230816173420.83232-2-CFSworks@gmail.com |
---|---|
State | Superseded |
Delegated to: | Andre Przywara |
Headers | show |
Series | Allwinner R528/T113s PSCI | expand |
On Wed, 16 Aug 2023 10:34:16 -0700 Sam Edwards <cfsworks@gmail.com> wrote: Hi Sam, thanks for the update. > This patch restructures psci.c to get away from the "many different > function definitions switched by #ifdef" paradigm to the preferred style > of having a single function definition with `if (IS_ENABLED(...))` to > make the optimizer include only the appropriate function bodies instead. > > There are no functional changes here. So this diff is hard to read - not your fault, this seems to be common for those kind of refactorings - but I convinced myself by comparing old and new side-by-side and test-compiling that this doesn't introduce any functional change. The resulting object file is different (8 byte larger, even), so it's hard to prove, and I still have to actually test it on some boards, but the idea is a good one and it's the right direction, so to move things forward: > Signed-off-by: Sam Edwards <CFSworks@gmail.com> Reviewed-by: Andre Przywara <andre.przywara@arm.com> Cheers, Andre > --- > arch/arm/cpu/armv7/sunxi/psci.c | 102 +++++++++++++------------------- > 1 file changed, 42 insertions(+), 60 deletions(-) > > diff --git a/arch/arm/cpu/armv7/sunxi/psci.c b/arch/arm/cpu/armv7/sunxi/psci.c > index e1d3638b5c..7804e0933b 100644 > --- a/arch/arm/cpu/armv7/sunxi/psci.c > +++ b/arch/arm/cpu/armv7/sunxi/psci.c > @@ -76,11 +76,8 @@ static void __secure __mdelay(u32 ms) > isb(); > } > > -static void __secure clamp_release(u32 __maybe_unused *clamp) > +static void __secure clamp_release(u32 *clamp) > { > -#if defined(CONFIG_MACH_SUN6I) || defined(CONFIG_MACH_SUN7I) || \ > - defined(CONFIG_MACH_SUN8I_H3) || \ > - defined(CONFIG_MACH_SUN8I_R40) > u32 tmp = 0x1ff; > do { > tmp >>= 1; > @@ -88,83 +85,68 @@ static void __secure clamp_release(u32 __maybe_unused *clamp) > } while (tmp); > > __mdelay(10); > -#endif > } > > -static void __secure clamp_set(u32 __maybe_unused *clamp) > +static void __secure clamp_set(u32 *clamp) > { > -#if defined(CONFIG_MACH_SUN6I) || defined(CONFIG_MACH_SUN7I) || \ > - defined(CONFIG_MACH_SUN8I_H3) || \ > - defined(CONFIG_MACH_SUN8I_R40) > writel(0xff, clamp); > -#endif > } > > -static void __secure sunxi_power_switch(u32 *clamp, u32 *pwroff, bool on, > - int cpu) > +static void __secure sunxi_set_entry_address(void *entry) > { > - if (on) { > - /* Release power clamp */ > - clamp_release(clamp); > - > - /* Clear power gating */ > - clrbits_le32(pwroff, BIT(cpu)); > + /* secondary core entry address is programmed differently on R40 */ > + if (IS_ENABLED(CONFIG_MACH_SUN8I_R40)) { > + writel((u32)entry, > + SUNXI_SRAMC_BASE + SUN8I_R40_SRAMC_SOFT_ENTRY_REG0); > } else { > - /* Set power gating */ > - setbits_le32(pwroff, BIT(cpu)); > + struct sunxi_cpucfg_reg *cpucfg = > + (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; > > - /* Activate power clamp */ > - clamp_set(clamp); > + writel((u32)entry, &cpucfg->priv0); > } > } > > -#ifdef CONFIG_MACH_SUN8I_R40 > -/* secondary core entry address is programmed differently on R40 */ > -static void __secure sunxi_set_entry_address(void *entry) > -{ > - writel((u32)entry, > - SUNXI_SRAMC_BASE + SUN8I_R40_SRAMC_SOFT_ENTRY_REG0); > -} > -#else > -static void __secure sunxi_set_entry_address(void *entry) > +static void __secure sunxi_cpu_set_power(int cpu, bool on) > { > + u32 *clamp = NULL; > + u32 *pwroff; > struct sunxi_cpucfg_reg *cpucfg = > (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; > > - writel((u32)entry, &cpucfg->priv0); > -} > -#endif > + /* sun7i (A20) is different from other single cluster SoCs */ > + if (IS_ENABLED(CONFIG_MACH_SUN7I)) { > + clamp = &cpucfg->cpu1_pwr_clamp; > + pwroff = &cpucfg->cpu1_pwroff; > + } else if (IS_ENABLED(CONFIG_MACH_SUN8I_R40)) { > + clamp = (void *)cpucfg + SUN8I_R40_PWR_CLAMP(cpu); > + pwroff = (void *)cpucfg + SUN8I_R40_PWROFF; > + } else { > + struct sunxi_prcm_reg *prcm = > + (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE; > > -#ifdef CONFIG_MACH_SUN7I > -/* sun7i (A20) is different from other single cluster SoCs */ > -static void __secure sunxi_cpu_set_power(int __always_unused cpu, bool on) > -{ > - struct sunxi_cpucfg_reg *cpucfg = > - (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; > + if (IS_ENABLED(CONFIG_MACH_SUN6I) || > + IS_ENABLED(CONFIG_MACH_SUN8I_H3)) > + clamp = &prcm->cpu_pwr_clamp[cpu]; > > - sunxi_power_switch(&cpucfg->cpu1_pwr_clamp, &cpucfg->cpu1_pwroff, > - on, 0); > -} > -#elif defined CONFIG_MACH_SUN8I_R40 > -static void __secure sunxi_cpu_set_power(int cpu, bool on) > -{ > - struct sunxi_cpucfg_reg *cpucfg = > - (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; > + pwroff = &prcm->cpu_pwroff; > + } > > - sunxi_power_switch((void *)cpucfg + SUN8I_R40_PWR_CLAMP(cpu), > - (void *)cpucfg + SUN8I_R40_PWROFF, > - on, cpu); > -} > -#else /* ! CONFIG_MACH_SUN7I && ! CONFIG_MACH_SUN8I_R40 */ > -static void __secure sunxi_cpu_set_power(int cpu, bool on) > -{ > - struct sunxi_prcm_reg *prcm = > - (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE; > + if (on) { > + /* Release power clamp */ > + if (clamp) > + clamp_release(clamp); > > - sunxi_power_switch(&prcm->cpu_pwr_clamp[cpu], &prcm->cpu_pwroff, > - on, cpu); > + /* Clear power gating */ > + clrbits_le32(pwroff, BIT(cpu)); > + } else { > + /* Set power gating */ > + setbits_le32(pwroff, BIT(cpu)); > + > + /* Activate power clamp */ > + if (clamp) > + clamp_set(clamp); > + } > } > -#endif /* CONFIG_MACH_SUN7I */ > > void __secure sunxi_cpu_power_off(u32 cpuid) > {
On 8/18/23 07:11, Andre Przywara wrote: Hi Andre, > The resulting object file is different (8 byte larger, > even), so it's hard to prove I'm no stranger to reading object code. Since the output should be identical in principle, I'll spend a little bit of time today seeing if I can identify what's changing. If it's easy enough, I'd like to adjust my patch so that the optimizer does produce the same output. (Keep in mind I'm on Clang, though. If Clang already gives the same output for both, I'll just report back to use that when comparing.) >> Signed-off-by: Sam Edwards <CFSworks@gmail.com> > > Reviewed-by: Andre Przywara <andre.przywara@arm.com> Thank you! > > Cheers, > Andre Likewise, Sam
On 8/18/23 10:40, Sam Edwards wrote: > On 8/18/23 07:11, Andre Przywara wrote: > > Hi Andre, > >> The resulting object file is different (8 byte larger, >> even), so it's hard to prove > > I'm no stranger to reading object code. Since the output should be > identical in principle, I'll spend a little bit of time today seeing if > I can identify what's changing. If it's easy enough, I'd like to adjust > my patch so that the optimizer does produce the same output. (Keep in > mind I'm on Clang, though. If Clang already gives the same output for > both, I'll just report back to use that when comparing.) I built only psci.o from every ARM32 sunxi for which we have a defconfig (and for which PSCI is supported), for 81 targets total (though there are only 4 variations: R40, sun7i, H3/sun6i, and "everything else"). I am working with Clang version 16.0.6. I compared only the secure text section. The command to extract this looks like: llvm-objcopy -O binary --only-section=._secure.text psci.o text.bin This is important because there are debug sections that will change when the source file line numbers change, so we must ignore those when comparing. In the majority of cases, there are no changes to the text section introduced by this patch. In the R40 case, there's a small change where the compiler adds a NULL check onto the result of the `(void *)cpucfg + SUN8I_R40_PWR_CLAMP(cpu)` computation, which we can ignore as it won't affect anything in practice. In the sun7i case, the only changes are because I am NOT hardcoding the CPU to 0, which does look like I broke it (since that means it will use cpu=1). So I'm going to need to fix that in v3. For good measure, I also applied the same methodology to patch 2 in this series, and that introduces no text section changes whatsoever in any of the tested cases. So patch 2 (theoretically, anyway) needs no bugfixes or hardware testing. Patch 3 does cause a text section change for all targets. I will have to investigate why, in case I messed up any of the offsets when migrating off of structs. Regards, Sam
On Fri, 18 Aug 2023 14:17:07 -0700 Sam Edwards <cfsworks@gmail.com> wrote: > On 8/18/23 10:40, Sam Edwards wrote: > > On 8/18/23 07:11, Andre Przywara wrote: > > > > Hi Andre, > > > >> The resulting object file is different (8 byte larger, > >> even), so it's hard to prove > > > > I'm no stranger to reading object code. Since the output should be > > identical in principle, I'll spend a little bit of time today seeing if > > I can identify what's changing. If it's easy enough, I'd like to adjust > > my patch so that the optimizer does produce the same output. (Keep in > > mind I'm on Clang, though. If Clang already gives the same output for > > both, I'll just report back to use that when comparing.) > > I built only psci.o from every ARM32 sunxi for which we have a defconfig > (and for which PSCI is supported), for 81 targets total (though there > are only 4 variations: R40, sun7i, H3/sun6i, and "everything else"). I > am working with Clang version 16.0.6. > > I compared only the secure text section. The command to extract this > looks like: > llvm-objcopy -O binary --only-section=._secure.text psci.o text.bin > This is important because there are debug sections that will change when > the source file line numbers change, so we must ignore those when comparing. > > In the majority of cases, there are no changes to the text section > introduced by this patch. In the R40 case, there's a small change where > the compiler adds a NULL check onto the result of the `(void *)cpucfg + > SUN8I_R40_PWR_CLAMP(cpu)` computation, which we can ignore as it won't > affect anything in practice. In the sun7i case, the only changes are > because I am NOT hardcoding the CPU to 0, which does look like I broke > it (since that means it will use cpu=1). So I'm going to need to fix > that in v3. ^^^^^^^ Do you have an update on this? I will try to test it on an R40 ASAP. Cheers, Andre > For good measure, I also applied the same methodology to patch 2 in this > series, and that introduces no text section changes whatsoever in any of > the tested cases. So patch 2 (theoretically, anyway) needs no bugfixes > or hardware testing. > > Patch 3 does cause a text section change for all targets. I will have to > investigate why, in case I messed up any of the offsets when migrating > off of structs. > > Regards, > Sam
On 9/27/23 10:34, Andre Przywara wrote: >> In the majority of cases, there are no changes to the text section >> introduced by this patch. In the R40 case, there's a small change where >> the compiler adds a NULL check onto the result of the `(void *)cpucfg + >> SUN8I_R40_PWR_CLAMP(cpu)` computation, which we can ignore as it won't >> affect anything in practice. In the sun7i case, the only changes are >> because I am NOT hardcoding the CPU to 0, which does look like I broke >> it (since that means it will use cpu=1). So I'm going to need to fix >> that in v3. > > ^^^^^^^ > Do you have an update on this? I will try to test it on an R40 ASAP. In my (not yet submitted) v3, I have the following change done: if (IS_ENABLED(CONFIG_MACH_SUN7I)) { clamp = &cpucfg->cpu1_pwr_clamp; pwroff = &cpucfg->cpu1_pwroff; + cpu = 0; } else if (IS_ENABLED(CONFIG_MACH_SUN8I_R40)) { That does negate any binary change in the SUN7I case. R40 is the only one that needs testing still. If you feel like testing on any SUN7Is just for good measure, you'll want to add in that missing line. > Cheers, > Andre Likewise, Sam
diff --git a/arch/arm/cpu/armv7/sunxi/psci.c b/arch/arm/cpu/armv7/sunxi/psci.c index e1d3638b5c..7804e0933b 100644 --- a/arch/arm/cpu/armv7/sunxi/psci.c +++ b/arch/arm/cpu/armv7/sunxi/psci.c @@ -76,11 +76,8 @@ static void __secure __mdelay(u32 ms) isb(); } -static void __secure clamp_release(u32 __maybe_unused *clamp) +static void __secure clamp_release(u32 *clamp) { -#if defined(CONFIG_MACH_SUN6I) || defined(CONFIG_MACH_SUN7I) || \ - defined(CONFIG_MACH_SUN8I_H3) || \ - defined(CONFIG_MACH_SUN8I_R40) u32 tmp = 0x1ff; do { tmp >>= 1; @@ -88,83 +85,68 @@ static void __secure clamp_release(u32 __maybe_unused *clamp) } while (tmp); __mdelay(10); -#endif } -static void __secure clamp_set(u32 __maybe_unused *clamp) +static void __secure clamp_set(u32 *clamp) { -#if defined(CONFIG_MACH_SUN6I) || defined(CONFIG_MACH_SUN7I) || \ - defined(CONFIG_MACH_SUN8I_H3) || \ - defined(CONFIG_MACH_SUN8I_R40) writel(0xff, clamp); -#endif } -static void __secure sunxi_power_switch(u32 *clamp, u32 *pwroff, bool on, - int cpu) +static void __secure sunxi_set_entry_address(void *entry) { - if (on) { - /* Release power clamp */ - clamp_release(clamp); - - /* Clear power gating */ - clrbits_le32(pwroff, BIT(cpu)); + /* secondary core entry address is programmed differently on R40 */ + if (IS_ENABLED(CONFIG_MACH_SUN8I_R40)) { + writel((u32)entry, + SUNXI_SRAMC_BASE + SUN8I_R40_SRAMC_SOFT_ENTRY_REG0); } else { - /* Set power gating */ - setbits_le32(pwroff, BIT(cpu)); + struct sunxi_cpucfg_reg *cpucfg = + (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; - /* Activate power clamp */ - clamp_set(clamp); + writel((u32)entry, &cpucfg->priv0); } } -#ifdef CONFIG_MACH_SUN8I_R40 -/* secondary core entry address is programmed differently on R40 */ -static void __secure sunxi_set_entry_address(void *entry) -{ - writel((u32)entry, - SUNXI_SRAMC_BASE + SUN8I_R40_SRAMC_SOFT_ENTRY_REG0); -} -#else -static void __secure sunxi_set_entry_address(void *entry) +static void __secure sunxi_cpu_set_power(int cpu, bool on) { + u32 *clamp = NULL; + u32 *pwroff; struct sunxi_cpucfg_reg *cpucfg = (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; - writel((u32)entry, &cpucfg->priv0); -} -#endif + /* sun7i (A20) is different from other single cluster SoCs */ + if (IS_ENABLED(CONFIG_MACH_SUN7I)) { + clamp = &cpucfg->cpu1_pwr_clamp; + pwroff = &cpucfg->cpu1_pwroff; + } else if (IS_ENABLED(CONFIG_MACH_SUN8I_R40)) { + clamp = (void *)cpucfg + SUN8I_R40_PWR_CLAMP(cpu); + pwroff = (void *)cpucfg + SUN8I_R40_PWROFF; + } else { + struct sunxi_prcm_reg *prcm = + (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE; -#ifdef CONFIG_MACH_SUN7I -/* sun7i (A20) is different from other single cluster SoCs */ -static void __secure sunxi_cpu_set_power(int __always_unused cpu, bool on) -{ - struct sunxi_cpucfg_reg *cpucfg = - (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; + if (IS_ENABLED(CONFIG_MACH_SUN6I) || + IS_ENABLED(CONFIG_MACH_SUN8I_H3)) + clamp = &prcm->cpu_pwr_clamp[cpu]; - sunxi_power_switch(&cpucfg->cpu1_pwr_clamp, &cpucfg->cpu1_pwroff, - on, 0); -} -#elif defined CONFIG_MACH_SUN8I_R40 -static void __secure sunxi_cpu_set_power(int cpu, bool on) -{ - struct sunxi_cpucfg_reg *cpucfg = - (struct sunxi_cpucfg_reg *)SUNXI_CPUCFG_BASE; + pwroff = &prcm->cpu_pwroff; + } - sunxi_power_switch((void *)cpucfg + SUN8I_R40_PWR_CLAMP(cpu), - (void *)cpucfg + SUN8I_R40_PWROFF, - on, cpu); -} -#else /* ! CONFIG_MACH_SUN7I && ! CONFIG_MACH_SUN8I_R40 */ -static void __secure sunxi_cpu_set_power(int cpu, bool on) -{ - struct sunxi_prcm_reg *prcm = - (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE; + if (on) { + /* Release power clamp */ + if (clamp) + clamp_release(clamp); - sunxi_power_switch(&prcm->cpu_pwr_clamp[cpu], &prcm->cpu_pwroff, - on, cpu); + /* Clear power gating */ + clrbits_le32(pwroff, BIT(cpu)); + } else { + /* Set power gating */ + setbits_le32(pwroff, BIT(cpu)); + + /* Activate power clamp */ + if (clamp) + clamp_set(clamp); + } } -#endif /* CONFIG_MACH_SUN7I */ void __secure sunxi_cpu_power_off(u32 cpuid) {
This patch restructures psci.c to get away from the "many different function definitions switched by #ifdef" paradigm to the preferred style of having a single function definition with `if (IS_ENABLED(...))` to make the optimizer include only the appropriate function bodies instead. There are no functional changes here. Signed-off-by: Sam Edwards <CFSworks@gmail.com> --- arch/arm/cpu/armv7/sunxi/psci.c | 102 +++++++++++++------------------- 1 file changed, 42 insertions(+), 60 deletions(-)