diff mbox series

[v2,1/6] core/cpu: rewrite idle synchronisation

Message ID 20211217021724.709370-2-npiggin@gmail.com
State Accepted
Headers show
Series idle synchronisation improvements | expand

Commit Message

Nicholas Piggin Dec. 17, 2021, 2:17 a.m. UTC
Idle reconfiguration is difficult to follow and verify as correct
because it can occur while CPUs are in low-level idle routines. For
example pm_enabled can change while CPUs are idle. If nothing else, this
can result in "cpu_idle_p9 called pm disabled" messages.

This changes the idle reconfiuration to always kick all other CPUs out
of idle code first whenever idle settings (pm_enabled, IPIs, sreset,
etc.) are to be changed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 core/cpu.c | 256 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 156 insertions(+), 100 deletions(-)
diff mbox series

Patch

diff --git a/core/cpu.c b/core/cpu.c
index 6f37358a9..1371cd34e 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -38,6 +38,7 @@  static struct lock reinit_lock = LOCK_UNLOCKED;
 static bool radix_supported;
 static unsigned long hid0_hile;
 static unsigned long hid0_attn;
+static bool reconfigure_idle = false;
 static bool sreset_enabled;
 static bool ipi_enabled;
 static bool pm_enabled;
@@ -397,7 +398,7 @@  static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check for jobs again */
-		if (cpu_check_jobs(cpu) || !pm_enabled)
+		if (cpu_check_jobs(cpu) || reconfigure_idle)
 			goto skip_sleep;
 
 		/* Setup wakup cause in LPCR: EE (for IPI) */
@@ -412,7 +413,7 @@  static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check if PM got disabled */
-		if (!pm_enabled)
+		if (reconfigure_idle)
 			goto skip_sleep;
 
 		/* EE and DEC */
@@ -453,7 +454,7 @@  static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check for jobs again */
-		if (cpu_check_jobs(cpu) || !pm_enabled)
+		if (cpu_check_jobs(cpu) || reconfigure_idle)
 			goto skip_sleep;
 
 		/* HV DBELL for IPI */
@@ -466,7 +467,7 @@  static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check if PM got disabled */
-		if (!pm_enabled)
+		if (reconfigure_idle)
 			goto skip_sleep;
 
 		/* HV DBELL and DEC */
@@ -540,69 +541,70 @@  static void cpu_idle_pm(enum cpu_wake_cause wake_on)
 	}
 }
 
-void cpu_idle_job(void)
+static struct lock idle_lock = LOCK_UNLOCKED;
+static int nr_cpus_idle = 0;
+
+static void enter_idle(void)
 {
-	if (pm_enabled) {
-		cpu_idle_pm(cpu_wake_on_job);
-	} else {
-		struct cpu_thread *cpu = this_cpu();
+	for (;;) {
+		lock(&idle_lock);
+		if (!reconfigure_idle) {
+			nr_cpus_idle++;
+			break;
+		}
+		unlock(&idle_lock);
 
+		/* Another CPU is reconfiguring idle */
 		smt_lowest();
-		/* Check for jobs again */
-		while (!cpu_check_jobs(cpu)) {
-			if (pm_enabled)
-				break;
+		while (reconfigure_idle)
 			barrier();
-		}
 		smt_medium();
 	}
+
+	unlock(&idle_lock);
 }
 
-void cpu_idle_delay(unsigned long delay)
+static void exit_idle(void)
 {
-	unsigned long now = mftb();
-	unsigned long end = now + delay;
-	unsigned long min_pm = usecs_to_tb(10);
-
-	if (pm_enabled && delay > min_pm) {
-pm:
-		for (;;) {
-			if (delay >= 0x7fffffff)
-				delay = 0x7fffffff;
-			mtspr(SPR_DEC, delay);
+	lock(&idle_lock);
+	assert(nr_cpus_idle > 0);
+	nr_cpus_idle--;
+	unlock(&idle_lock);
+}
 
-			cpu_idle_pm(cpu_wake_on_dec);
+static void reconfigure_idle_start(void)
+{
+	struct cpu_thread *cpu;
 
-			now = mftb();
-			if (tb_compare(now, end) == TB_AAFTERB)
-				break;
-			delay = end - now;
-			if (!(pm_enabled && delay > min_pm))
-				goto no_pm;
+	/*
+	 * First, make sure we are exclusive in reconfiguring by taking
+	 * reconfigure_idle from false to true.
+	 */
+	for (;;) {
+		lock(&idle_lock);
+		if (!reconfigure_idle) {
+			reconfigure_idle = true;
+			break;
 		}
-	} else {
-no_pm:
+		unlock(&idle_lock);
+
+		/* Someone else is reconfiguring */
 		smt_lowest();
-		for (;;) {
-			now = mftb();
-			if (tb_compare(now, end) == TB_AAFTERB)
-				break;
-			delay = end - now;
-			if (pm_enabled && delay > min_pm) {
-				smt_medium();
-				goto pm;
-			}
-		}
+		while (reconfigure_idle)
+			barrier();
 		smt_medium();
 	}
-}
 
-static void cpu_pm_disable(void)
-{
-	struct cpu_thread *cpu;
-	unsigned int timeout;
+	unlock(&idle_lock);
 
-	pm_enabled = false;
+	/*
+	 * Then kick everyone out of idle.
+	 */
+
+	/*
+	 * Order earlier store to reconfigure_idle=true vs load from
+	 * cpu->in_sleep and cpu->in_idle.
+	 */
 	sync();
 
 	if (proc_gen == proc_gen_p8) {
@@ -617,29 +619,107 @@  static void cpu_pm_disable(void)
 			if (cpu->in_sleep || cpu->in_idle)
 				p9_dbell_send(cpu->pir);
 		}
+	}
 
-		/*  This code is racy with cpus entering idle, late ones miss the dbell */
+	/*
+	 * Then wait for all other CPUs to leave idle. Now they will see
+	 * reconfigure_idle==true and not re-enter idle.
+	 */
+	smt_lowest();
+	while (nr_cpus_idle != 0)
+		barrier();
+	smt_medium();
 
-		smt_lowest();
-		for_each_available_cpu(cpu) {
-			timeout = 0x08000000;
-			while ((cpu->in_sleep || cpu->in_idle) && --timeout)
+	/*
+	 * Order load of nr_cpus_idle with later loads of data that other
+	 * CPUs might have stored-to before coming out of idle.
+	 */
+	lwsync();
+}
+
+static void reconfigure_idle_end(void)
+{
+	assert(reconfigure_idle);
+	lock(&idle_lock);
+	reconfigure_idle = false;
+	unlock(&idle_lock);
+}
+
+void cpu_idle_job(void)
+{
+	struct cpu_thread *cpu = this_cpu();
+
+	do {
+		enter_idle();
+
+		if (pm_enabled) {
+			cpu_idle_pm(cpu_wake_on_job);
+		} else {
+			smt_lowest();
+			for (;;) {
+				if (cpu_check_jobs(cpu))
+					break;
+				if (reconfigure_idle)
+					break;
 				barrier();
-			if (!timeout) {
-				prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n",
-				      cpu->pir);
-                                p9_dbell_send(cpu->pir);
-                        }
+			}
+			smt_medium();
 		}
-		smt_medium();
-	}
+
+		exit_idle();
+
+	} while (!cpu_check_jobs(cpu));
 }
 
-void cpu_set_sreset_enable(bool enabled)
+void cpu_idle_delay(unsigned long delay)
+{
+	unsigned long now = mftb();
+	unsigned long end = now + delay;
+	unsigned long min_pm = usecs_to_tb(10);
+
+	do {
+		enter_idle();
+
+		delay = end - now;
+
+		if (pm_enabled && delay > min_pm) {
+			if (delay >= 0x7fffffff)
+				delay = 0x7fffffff;
+			mtspr(SPR_DEC, delay);
+
+			cpu_idle_pm(cpu_wake_on_dec);
+		} else {
+			smt_lowest();
+			for (;;) {
+				if (tb_compare(mftb(), end) == TB_AAFTERB)
+					break;
+				if (reconfigure_idle)
+					break;
+				barrier();
+			}
+			smt_medium();
+		}
+
+		exit_idle();
+
+		now = mftb();
+
+	} while (tb_compare(now, end) != TB_AAFTERB);
+}
+
+static void recalc_pm_enabled(void)
 {
-	if (proc_chip_quirks & QUIRK_AWAN)
+	if (chip_quirk(QUIRK_AWAN))
 		return;
 
+	if (proc_gen == proc_gen_p8)
+		pm_enabled = ipi_enabled && sreset_enabled;
+	else
+		pm_enabled = ipi_enabled;
+}
+
+void cpu_set_sreset_enable(bool enabled)
+{
 	if (sreset_enabled == enabled)
 		return;
 
@@ -647,28 +727,15 @@  void cpu_set_sreset_enable(bool enabled)
 		/* Public P8 Mambo has broken NAP */
 		if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
 			return;
+	}
 
-		sreset_enabled = enabled;
-		sync();
+	reconfigure_idle_start();
 
-		if (!enabled) {
-			cpu_pm_disable();
-		} else {
-			if (ipi_enabled)
-				pm_enabled = true;
-		}
+	sreset_enabled = enabled;
 
-	} else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
-		sreset_enabled = enabled;
-		sync();
-		/*
-		 * Kick everybody out of PM so they can adjust the PM
-		 * mode they are using (EC=0/1).
-		 */
-		cpu_pm_disable();
-		if (ipi_enabled)
-			pm_enabled = true;
-	}
+	recalc_pm_enabled();
+
+	reconfigure_idle_end();
 }
 
 void cpu_set_ipi_enable(bool enabled)
@@ -676,24 +743,13 @@  void cpu_set_ipi_enable(bool enabled)
 	if (ipi_enabled == enabled)
 		return;
 
-	if (proc_gen == proc_gen_p8) {
-		ipi_enabled = enabled;
-		sync();
-		if (!enabled) {
-			cpu_pm_disable();
-		} else {
-			if (sreset_enabled)
-				pm_enabled = true;
-		}
+	reconfigure_idle_start();
 
-	} else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
-		ipi_enabled = enabled;
-		sync();
-		if (!enabled)
-			cpu_pm_disable();
-		else if (!chip_quirk(QUIRK_AWAN))
-			pm_enabled = true;
-	}
+	ipi_enabled = enabled;
+
+	recalc_pm_enabled();
+
+	reconfigure_idle_end();
 }
 
 void cpu_process_local_jobs(void)