@@ -38,6 +38,7 @@ static struct lock reinit_lock = LOCK_UNLOCKED;
static bool radix_supported;
static unsigned long hid0_hile;
static unsigned long hid0_attn;
+static bool reconfigure_idle = false;
static bool sreset_enabled;
static bool ipi_enabled;
static bool pm_enabled;
@@ -397,7 +398,7 @@ static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
sync();
/* Check for jobs again */
- if (cpu_check_jobs(cpu) || !pm_enabled)
+ if (cpu_check_jobs(cpu) || reconfigure_idle)
goto skip_sleep;
/* Setup wakup cause in LPCR: EE (for IPI) */
@@ -412,7 +413,7 @@ static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
sync();
/* Check if PM got disabled */
- if (!pm_enabled)
+ if (reconfigure_idle)
goto skip_sleep;
/* EE and DEC */
@@ -453,7 +454,7 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
sync();
/* Check for jobs again */
- if (cpu_check_jobs(cpu) || !pm_enabled)
+ if (cpu_check_jobs(cpu) || reconfigure_idle)
goto skip_sleep;
/* HV DBELL for IPI */
@@ -466,7 +467,7 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
sync();
/* Check if PM got disabled */
- if (!pm_enabled)
+ if (reconfigure_idle)
goto skip_sleep;
/* HV DBELL and DEC */
@@ -540,69 +541,70 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on)
}
}
-void cpu_idle_job(void)
+static struct lock idle_lock = LOCK_UNLOCKED;
+static int nr_cpus_idle = 0;
+
+static void enter_idle(void)
{
- if (pm_enabled) {
- cpu_idle_pm(cpu_wake_on_job);
- } else {
- struct cpu_thread *cpu = this_cpu();
+ for (;;) {
+ lock(&idle_lock);
+ if (!reconfigure_idle) {
+ nr_cpus_idle++;
+ break;
+ }
+ unlock(&idle_lock);
+ /* Another CPU is reconfiguring idle */
smt_lowest();
- /* Check for jobs again */
- while (!cpu_check_jobs(cpu)) {
- if (pm_enabled)
- break;
+ while (reconfigure_idle)
barrier();
- }
smt_medium();
}
+
+ unlock(&idle_lock);
}
-void cpu_idle_delay(unsigned long delay)
+static void exit_idle(void)
{
- unsigned long now = mftb();
- unsigned long end = now + delay;
- unsigned long min_pm = usecs_to_tb(10);
-
- if (pm_enabled && delay > min_pm) {
-pm:
- for (;;) {
- if (delay >= 0x7fffffff)
- delay = 0x7fffffff;
- mtspr(SPR_DEC, delay);
+ lock(&idle_lock);
+ assert(nr_cpus_idle > 0);
+ nr_cpus_idle--;
+ unlock(&idle_lock);
+}
- cpu_idle_pm(cpu_wake_on_dec);
+static void reconfigure_idle_start(void)
+{
+ struct cpu_thread *cpu;
- now = mftb();
- if (tb_compare(now, end) == TB_AAFTERB)
- break;
- delay = end - now;
- if (!(pm_enabled && delay > min_pm))
- goto no_pm;
+ /*
+ * First, make sure we are exclusive in reconfiguring by taking
+ * reconfigure_idle from false to true.
+ */
+ for (;;) {
+ lock(&idle_lock);
+ if (!reconfigure_idle) {
+ reconfigure_idle = true;
+ break;
}
- } else {
-no_pm:
+ unlock(&idle_lock);
+
+ /* Someone else is reconfiguring */
smt_lowest();
- for (;;) {
- now = mftb();
- if (tb_compare(now, end) == TB_AAFTERB)
- break;
- delay = end - now;
- if (pm_enabled && delay > min_pm) {
- smt_medium();
- goto pm;
- }
- }
+ while (reconfigure_idle)
+ barrier();
smt_medium();
}
-}
-static void cpu_pm_disable(void)
-{
- struct cpu_thread *cpu;
- unsigned int timeout;
+ unlock(&idle_lock);
- pm_enabled = false;
+ /*
+ * Then kick everyone out of idle.
+ */
+
+ /*
+ * Order earlier store to reconfigure_idle=true vs load from
+ * cpu->in_sleep and cpu->in_idle.
+ */
sync();
if (proc_gen == proc_gen_p8) {
@@ -617,29 +619,107 @@ static void cpu_pm_disable(void)
if (cpu->in_sleep || cpu->in_idle)
p9_dbell_send(cpu->pir);
}
+ }
- /* This code is racy with cpus entering idle, late ones miss the dbell */
+ /*
+ * Then wait for all other CPUs to leave idle. Now they will see
+ * reconfigure_idle==true and not re-enter idle.
+ */
+ smt_lowest();
+ while (nr_cpus_idle != 0)
+ barrier();
+ smt_medium();
- smt_lowest();
- for_each_available_cpu(cpu) {
- timeout = 0x08000000;
- while ((cpu->in_sleep || cpu->in_idle) && --timeout)
+ /*
+ * Order load of nr_cpus_idle with later loads of data that other
+ * CPUs might have stored-to before coming out of idle.
+ */
+ lwsync();
+}
+
+static void reconfigure_idle_end(void)
+{
+ assert(reconfigure_idle);
+ lock(&idle_lock);
+ reconfigure_idle = false;
+ unlock(&idle_lock);
+}
+
+void cpu_idle_job(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ do {
+ enter_idle();
+
+ if (pm_enabled) {
+ cpu_idle_pm(cpu_wake_on_job);
+ } else {
+ smt_lowest();
+ for (;;) {
+ if (cpu_check_jobs(cpu))
+ break;
+ if (reconfigure_idle)
+ break;
barrier();
- if (!timeout) {
- prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n",
- cpu->pir);
- p9_dbell_send(cpu->pir);
- }
+ }
+ smt_medium();
}
- smt_medium();
- }
+
+ exit_idle();
+
+ } while (!cpu_check_jobs(cpu));
}
-void cpu_set_sreset_enable(bool enabled)
+void cpu_idle_delay(unsigned long delay)
+{
+ unsigned long now = mftb();
+ unsigned long end = now + delay;
+ unsigned long min_pm = usecs_to_tb(10);
+
+ do {
+ enter_idle();
+
+ delay = end - now;
+
+ if (pm_enabled && delay > min_pm) {
+ if (delay >= 0x7fffffff)
+ delay = 0x7fffffff;
+ mtspr(SPR_DEC, delay);
+
+ cpu_idle_pm(cpu_wake_on_dec);
+ } else {
+ smt_lowest();
+ for (;;) {
+ if (tb_compare(mftb(), end) == TB_AAFTERB)
+ break;
+ if (reconfigure_idle)
+ break;
+ barrier();
+ }
+ smt_medium();
+ }
+
+ exit_idle();
+
+ now = mftb();
+
+ } while (tb_compare(now, end) != TB_AAFTERB);
+}
+
+static void recalc_pm_enabled(void)
{
- if (proc_chip_quirks & QUIRK_AWAN)
+ if (chip_quirk(QUIRK_AWAN))
return;
+ if (proc_gen == proc_gen_p8)
+ pm_enabled = ipi_enabled && sreset_enabled;
+ else
+ pm_enabled = ipi_enabled;
+}
+
+void cpu_set_sreset_enable(bool enabled)
+{
if (sreset_enabled == enabled)
return;
@@ -647,28 +727,15 @@ void cpu_set_sreset_enable(bool enabled)
/* Public P8 Mambo has broken NAP */
if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
return;
+ }
- sreset_enabled = enabled;
- sync();
+ reconfigure_idle_start();
- if (!enabled) {
- cpu_pm_disable();
- } else {
- if (ipi_enabled)
- pm_enabled = true;
- }
+ sreset_enabled = enabled;
- } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
- sreset_enabled = enabled;
- sync();
- /*
- * Kick everybody out of PM so they can adjust the PM
- * mode they are using (EC=0/1).
- */
- cpu_pm_disable();
- if (ipi_enabled)
- pm_enabled = true;
- }
+ recalc_pm_enabled();
+
+ reconfigure_idle_end();
}
void cpu_set_ipi_enable(bool enabled)
@@ -676,24 +743,13 @@ void cpu_set_ipi_enable(bool enabled)
if (ipi_enabled == enabled)
return;
- if (proc_gen == proc_gen_p8) {
- ipi_enabled = enabled;
- sync();
- if (!enabled) {
- cpu_pm_disable();
- } else {
- if (sreset_enabled)
- pm_enabled = true;
- }
+ reconfigure_idle_start();
- } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
- ipi_enabled = enabled;
- sync();
- if (!enabled)
- cpu_pm_disable();
- else if (!chip_quirk(QUIRK_AWAN))
- pm_enabled = true;
- }
+ ipi_enabled = enabled;
+
+ recalc_pm_enabled();
+
+ reconfigure_idle_end();
}
void cpu_process_local_jobs(void)
Idle reconfiguration is difficult to follow and verify as correct because it can occur while CPUs are in low-level idle routines. For example pm_enabled can change while CPUs are idle. If nothing else, this can result in "cpu_idle_p9 called pm disabled" messages. This changes the idle reconfiuration to always kick all other CPUs out of idle code first whenever idle settings (pm_enabled, IPIs, sreset, etc.) are to be changed. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- core/cpu.c | 256 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 156 insertions(+), 100 deletions(-)