[v2] opal-prd: Have a worker process handle page offlining

Message ID	20200923061220.231458-1-oohall@gmail.com
State	Accepted
Headers	show Return-Path: <skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org> From: Oliver O'Halloran <oohall@gmail.com> To: skiboot@lists.ozlabs.org Date: Wed, 23 Sep 2020 16:12:20 +1000 Message-Id: <20200923061220.231458-1-oohall@gmail.com> MIME-Version: 1.0 Subject: [Skiboot] [PATCH v2] opal-prd: Have a worker process handle page offlining Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Skiboot" <skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>
Series	[v2] opal-prd: Have a worker process handle page offlining \| expand [v2] opal-prd: Have a worker process handle page offlining

Message ID

20200923061220.231458-1-oohall@gmail.com

State

Accepted

Headers

From: Oliver O'Halloran <oohall@gmail.com>
To: skiboot@lists.ozlabs.org
Date: Wed, 23 Sep 2020 16:12:20 +1000
Message-Id: <20200923061220.231458-1-oohall@gmail.com>
MIME-Version: 1.0
Subject: [Skiboot] [PATCH v2] opal-prd: Have a worker process handle page
 offlining
Precedence: list
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org
Sender: "Skiboot"
 <skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>

Series

[v2] opal-prd: Have a worker process handle page offlining | expand

Checks

Context	Check	Description
snowpatch_ozlabs/apply_patch	success	Successfully applied on branch master (d362ae4f4c521a7faffb1befe2fbba467f2c4d18)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot	success	Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco	success	Signed-off-by present

Context

Check

Description

snowpatch_ozlabs/apply_patch

success

Successfully applied on branch master (d362ae4f4c521a7faffb1befe2fbba467f2c4d18)

snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot

success

Test snowpatch/job/snowpatch-skiboot on branch master

snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco

success

Signed-off-by present

Commit Message

Oliver O'Halloran Sept. 23, 2020, 6:12 a.m. UTC

The memory_error() hservice interface expects the memory_error() call to
just accept the offline request and return without actually offlining the
memory. Currently we will attempt to offline the marked pages before
returning to HBRT which can result in an excessively long time spent in the
memory_error() hservice call which blocks HBRT from processing other
errors. Fix this by adding a worker process which performs the page
offlining via the sysfs memory error interfaces.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
v2: switched to using sigaction() to auto-reap the workers
    and handle system call restarting, mainly for poll().
---
 external/opal-prd/opal-prd.c | 83 +++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 19 deletions(-)

Comments

Vasant Hegde Sept. 24, 2020, 5:44 a.m. UTC | #1

On 9/23/20 11:42 AM, Oliver O'Halloran wrote:
> The memory_error() hservice interface expects the memory_error() call to
> just accept the offline request and return without actually offlining the
> memory. Currently we will attempt to offline the marked pages before
> returning to HBRT which can result in an excessively long time spent in the
> memory_error() hservice call which blocks HBRT from processing other
> errors. Fix this by adding a worker process which performs the page
> offlining via the sysfs memory error interfaces.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>

Looks good to me.

Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>

-Vasant

diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
index 40e5a9841427..d74d80398da0 100644
--- a/external/opal-prd/opal-prd.c
+++ b/external/opal-prd/opal-prd.c
@@ -27,6 +27,7 @@ 
 #include <stdarg.h>
 #include <time.h>
 #include <poll.h>
+#include <signal.h>
 #include <dirent.h>
 
 #include <endian.h>
@@ -696,13 +697,42 @@  out:
 	return rc;
 }
 
+static int memory_error_worker(const char *sysfsfile, const char *type,
+			       uint64_t i_start_addr, uint64_t i_endAddr)
+{
+	int memfd, rc, n, ret = 0;
+	char buf[ADDR_STRING_SZ];
+	uint64_t addr;
+
+	memfd = open(sysfsfile, O_WRONLY);
+	if (memfd < 0) {
+		pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
+				"Unable to open sysfs node %s: %m", sysfsfile);
+		return -1;
+	}
+
+	for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
+		n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
+		rc = write(memfd, buf, n);
+		if (rc != n) {
+			pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
+					"page addr: %016lx type: %s: %m",
+				addr, type);
+			ret = 1;
+		}
+	}
+	pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
+			i_start_addr, addr, type);
+
+	close(memfd);
+	return ret;
+}
+
 int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
 		enum MemoryError_t i_errorType)
 {
 	const char *sysfsfile, *typestr;
-	char buf[ADDR_STRING_SZ];
-	int memfd, rc, n, ret = 0;
-	uint64_t addr;
+	pid_t pid;
 
 	switch(i_errorType) {
 	case MEMORY_ERROR_CE:
@@ -722,26 +752,21 @@  int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
 	pr_log(LOG_ERR, "MEM: Memory error: range %016lx-%016lx, type: %s",
 			i_start_addr, i_endAddr, typestr);
 
+	/*
+	 * HBRT expects the memory offlining process to happen in the background
+	 * after the notification is delivered.
+	 */
+	pid = fork();
+	if (pid > 0)
+		exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
 
-	memfd = open(sysfsfile, O_WRONLY);
-	if (memfd < 0) {
-		pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
-				"Unable to open sysfs node %s: %m", sysfsfile);
+	if (pid < 0) {
+		perror("MEM: unable to fork worker to offline memory!\n");
 		return -1;
 	}
 
-	for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
-		n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
-		rc = write(memfd, buf, n);
-		if (rc != n) {
-			pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
-					"page addr: %016lx type: %d: %m",
-				addr, i_errorType);
-			ret = rc;
-		}
-	}
-
-	return ret;
+	pr_log(LOG_INFO, "MEM: forked off %d to handle mem error\n", pid);
+	return 0;
 }
 
 uint64_t hservice_get_interface_capabilities(uint64_t set)
@@ -2112,6 +2137,10 @@  static int init_control_socket(struct opal_prd_ctx *ctx)
 	return 0;
 }
 
+static struct sigaction sigchild_action = {
+	.sa_flags = SA_NOCLDWAIT | SA_RESTART,
+	.sa_handler = SIG_DFL,
+};
 
 static int run_prd_daemon(struct opal_prd_ctx *ctx)
 {
@@ -2243,6 +2272,22 @@  static int run_prd_daemon(struct opal_prd_ctx *ctx)
 		pr_debug("SCOM:  f00f: %lx", be64toh(val));
 	}
 
+	/*
+	 * Setup the SIGCHLD handler to automatically reap the worker threads
+	 * we use for memory offlining. We can't do this earlier since the
+	 * modprobe helper spawns workers and wants to check their exit status
+	 * with waitpid(). Auto-reaping breaks that so enable it just before
+	 * entering the attn loop.
+	 *
+	 * We also setup system call restarting on SIGCHLD since opal-prd
+	 * doesn't make any real attempt to handle blocking functions exiting
+	 * due to EINTR.
+	 */
+	if (sigaction(SIGCHLD, &sigchild_action, NULL)) {
+		pr_log(LOG_ERR, "CTRL: Failed to register signal handler %m\n");
+		return -1;
+	}
+
 	run_attn_loop(ctx);
 	rc = 0;

[v2] opal-prd: Have a worker process handle page offlining

Checks

Commit Message

Comments

Patch