diff mbox series

[v1,1/6] core/cpu: rewrite idle synchronisation

Message ID 20210811054851.861482-2-npiggin@gmail.com
State Superseded
Headers show
Series idle synchronisation improvements | expand

Commit Message

Nicholas Piggin Aug. 11, 2021, 5:48 a.m. UTC
Idle reconfiguration is somewhat racy and not obviously correct, with
pm_enabled changing while in low level idle routines, which can result in
messages like "cpu_idle_p9 called pm disabled".

This changes CPU idle synchronisation to always kick all other CPUs
out of idle code first whenever idle settings (pm_enabled, IPIs, sreset
etc) are to be changed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 core/cpu.c | 243 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 143 insertions(+), 100 deletions(-)
diff mbox series

Patch

diff --git a/core/cpu.c b/core/cpu.c
index d4d33b836..0dc013b5b 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -38,6 +38,7 @@  static struct lock reinit_lock = LOCK_UNLOCKED;
 static bool radix_supported;
 static unsigned long hid0_hile;
 static unsigned long hid0_attn;
+static bool reconfigure_idle = false;
 static bool sreset_enabled;
 static bool ipi_enabled;
 static bool pm_enabled;
@@ -391,7 +392,7 @@  static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check for jobs again */
-		if (cpu_check_jobs(cpu) || !pm_enabled)
+		if (cpu_check_jobs(cpu) || reconfigure_idle)
 			goto skip_sleep;
 
 		/* Setup wakup cause in LPCR: EE (for IPI) */
@@ -406,7 +407,7 @@  static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check if PM got disabled */
-		if (!pm_enabled)
+		if (reconfigure_idle)
 			goto skip_sleep;
 
 		/* EE and DEC */
@@ -447,7 +448,7 @@  static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check for jobs again */
-		if (cpu_check_jobs(cpu) || !pm_enabled)
+		if (cpu_check_jobs(cpu) || reconfigure_idle)
 			goto skip_sleep;
 
 		/* HV DBELL for IPI */
@@ -460,7 +461,7 @@  static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
 		sync();
 
 		/* Check if PM got disabled */
-		if (!pm_enabled)
+		if (reconfigure_idle)
 			goto skip_sleep;
 
 		/* HV DBELL and DEC */
@@ -534,70 +535,66 @@  static void cpu_idle_pm(enum cpu_wake_cause wake_on)
 	}
 }
 
-void cpu_idle_job(void)
+static struct lock idle_lock = LOCK_UNLOCKED;
+static int nr_cpus_idle = 0;
+
+static void enter_idle(void)
 {
-	if (pm_enabled) {
-		cpu_idle_pm(cpu_wake_on_job);
-	} else {
-		struct cpu_thread *cpu = this_cpu();
+	for (;;) {
+		lock(&idle_lock);
+		if (!reconfigure_idle) {
+			nr_cpus_idle++;
+			break;
+		}
+		unlock(&idle_lock);
 
+		/* Another CPU is reconfiguring idle */
 		smt_lowest();
-		/* Check for jobs again */
-		while (!cpu_check_jobs(cpu)) {
-			if (pm_enabled)
-				break;
+		while (reconfigure_idle)
 			cpu_relax();
-			barrier();
-		}
 		smt_medium();
 	}
+
+	unlock(&idle_lock);
 }
 
-void cpu_idle_delay(unsigned long delay)
+static void exit_idle(void)
 {
-	unsigned long now = mftb();
-	unsigned long end = now + delay;
-	unsigned long min_pm = usecs_to_tb(10);
-
-	if (pm_enabled && delay > min_pm) {
-pm:
-		for (;;) {
-			if (delay >= 0x7fffffff)
-				delay = 0x7fffffff;
-			mtspr(SPR_DEC, delay);
+	lock(&idle_lock);
+	assert(nr_cpus_idle > 0);
+	nr_cpus_idle--;
+	unlock(&idle_lock);
+}
 
-			cpu_idle_pm(cpu_wake_on_dec);
+static void reconfigure_idle_start(void)
+{
+	struct cpu_thread *cpu;
 
-			now = mftb();
-			if (tb_compare(now, end) == TB_AAFTERB)
-				break;
-			delay = end - now;
-			if (!(pm_enabled && delay > min_pm))
-				goto no_pm;
+	for (;;) {
+		lock(&idle_lock);
+		if (!reconfigure_idle) {
+			reconfigure_idle = true;
+			break;
 		}
-	} else {
-no_pm:
+		unlock(&idle_lock);
+
+		/* Someone else is reconfiguring */
 		smt_lowest();
-		for (;;) {
-			now = mftb();
-			if (tb_compare(now, end) == TB_AAFTERB)
-				break;
-			delay = end - now;
-			if (pm_enabled && delay > min_pm) {
-				smt_medium();
-				goto pm;
-			}
-		}
+		while (reconfigure_idle)
+			cpu_relax();
 		smt_medium();
 	}
-}
 
-static void cpu_pm_disable(void)
-{
-	struct cpu_thread *cpu;
-	unsigned int timeout;
+	unlock(&idle_lock);
 
-	pm_enabled = false;
+	/*
+	 * Now kick everyone out of idle.
+	 */
+
+	/*
+	 * Order earlier store to reconfigure_idle=true vs load from
+	 * cpu->in_sleep and cpu->in_idle.
+	 */
 	sync();
 
 	if (proc_gen == proc_gen_p8) {
@@ -612,22 +609,88 @@  static void cpu_pm_disable(void)
 			if (cpu->in_sleep || cpu->in_idle)
 				p9_dbell_send(cpu->pir);
 		}
+	}
 
-		/*  This code is racy with cpus entering idle, late ones miss the dbell */
+	smt_lowest();
+	while (nr_cpus_idle != 0)
+		cpu_relax();
+	smt_medium();
 
-		smt_lowest();
-		for_each_available_cpu(cpu) {
-			timeout = 0x08000000;
-			while ((cpu->in_sleep || cpu->in_idle) && --timeout)
-				barrier();
-			if (!timeout) {
-				prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n",
-				      cpu->pir);
-                                p9_dbell_send(cpu->pir);
-                        }
+	/*
+	 * Order load of nr_cpus_idle with loads of data the idle CPUs
+	 * might have previously stored to before coming out of idle.
+	 */
+	lwsync();
+}
+
+static void reconfigure_idle_end(void)
+{
+	assert(reconfigure_idle);
+	lock(&idle_lock);
+	reconfigure_idle = false;
+	unlock(&idle_lock);
+}
+
+void cpu_idle_job(void)
+{
+	struct cpu_thread *cpu = this_cpu();
+
+	do {
+		enter_idle();
+
+		if (pm_enabled) {
+			cpu_idle_pm(cpu_wake_on_job);
+		} else {
+			smt_lowest();
+			for (;;) {
+				if (cpu_check_jobs(cpu))
+					break;
+				if (reconfigure_idle)
+					break;
+				cpu_relax();
+			}
+			smt_medium();
 		}
-		smt_medium();
-	}
+
+		exit_idle();
+
+	} while (!cpu_check_jobs(cpu));
+}
+
+void cpu_idle_delay(unsigned long delay)
+{
+	unsigned long now = mftb();
+	unsigned long end = now + delay;
+	unsigned long min_pm = usecs_to_tb(10);
+
+	do {
+		enter_idle();
+
+		delay = end - now;
+
+		if (pm_enabled && delay > min_pm) {
+			if (delay >= 0x7fffffff)
+				delay = 0x7fffffff;
+			mtspr(SPR_DEC, delay);
+
+			cpu_idle_pm(cpu_wake_on_dec);
+		} else {
+			smt_lowest();
+			for (;;) {
+				if (tb_compare(mftb(), end) == TB_AAFTERB)
+					break;
+				if (reconfigure_idle)
+					break;
+				cpu_relax();
+			}
+			smt_medium();
+		}
+
+		exit_idle();
+
+		now = mftb();
+
+	} while (tb_compare(now, end) != TB_AAFTERB);
 }
 
 void cpu_set_sreset_enable(bool enabled)
@@ -639,28 +702,16 @@  void cpu_set_sreset_enable(bool enabled)
 		/* Public P8 Mambo has broken NAP */
 		if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
 			return;
+	}
 
-		sreset_enabled = enabled;
-		sync();
+	reconfigure_idle_start();
 
-		if (!enabled) {
-			cpu_pm_disable();
-		} else {
-			if (ipi_enabled)
-				pm_enabled = true;
-		}
+	sreset_enabled = enabled;
 
-	} else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
-		sreset_enabled = enabled;
-		sync();
-		/*
-		 * Kick everybody out of PM so they can adjust the PM
-		 * mode they are using (EC=0/1).
-		 */
-		cpu_pm_disable();
-		if (ipi_enabled)
-			pm_enabled = true;
-	}
+	if (proc_gen == proc_gen_p8)
+		pm_enabled = ipi_enabled && sreset_enabled;
+
+	reconfigure_idle_end();
 }
 
 void cpu_set_ipi_enable(bool enabled)
@@ -668,24 +719,16 @@  void cpu_set_ipi_enable(bool enabled)
 	if (ipi_enabled == enabled)
 		return;
 
-	if (proc_gen == proc_gen_p8) {
-		ipi_enabled = enabled;
-		sync();
-		if (!enabled) {
-			cpu_pm_disable();
-		} else {
-			if (sreset_enabled)
-				pm_enabled = true;
-		}
+	reconfigure_idle_start();
 
-	} else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) {
-		ipi_enabled = enabled;
-		sync();
-		if (!enabled)
-			cpu_pm_disable();
-		else
-			pm_enabled = true;
-	}
+	ipi_enabled = enabled;
+
+	if (proc_gen == proc_gen_p8)
+		pm_enabled = ipi_enabled && sreset_enabled;
+	else
+		pm_enabled = ipi_enabled;
+
+	reconfigure_idle_end();
 }
 
 void cpu_process_local_jobs(void)