diff mbox series

read_all: limit sysfs tpm entries to single worker

Message ID 36c63ee52ce1e7ab1f6ce90bc6a4c58272130bee.1729590080.git.jstancek@redhat.com
State Accepted, archived
Headers show
Series read_all: limit sysfs tpm entries to single worker | expand

Commit Message

Jan Stancek Oct. 22, 2024, 9:44 a.m. UTC
Repeated reads from TPM entries (tcg_operations, vs_operations,
caps,..) are causing big delays with 3 or more repetitions,
which has signs of some kind of rate-limitting on firmware side.

This patch introduces a new kind of blacklist, which doesn't
completely skips the entry, but assigns it to only single
worker.

Signed-off-by: Jan Stancek <jstancek@redhat.com>
---
 testcases/kernel/fs/read_all/read_all.c | 49 +++++++++++++++++++++----
 1 file changed, 41 insertions(+), 8 deletions(-)

Comments

Cyril Hrubis Oct. 22, 2024, 12:05 p.m. UTC | #1
Hi!
Reviewed-by: Cyril Hrubis <chrubis@suse.cz>
Li Wang Oct. 22, 2024, 1:04 p.m. UTC | #2
On Tue, Oct 22, 2024 at 5:45 PM Jan Stancek <jstancek@redhat.com> wrote:

> Repeated reads from TPM entries (tcg_operations, vs_operations,
> caps,..) are causing big delays with 3 or more repetitions,
> which has signs of some kind of rate-limitting on firmware side.
>
> This patch introduces a new kind of blacklist, which doesn't
> completely skips the entry, but assigns it to only single
> worker.
>

Did you test that a single worker is faster enough with the
default '.max_runtime = 100' on those kind of systems?

If that's still timeout on the rate-limited files, what should we do (move
to blacklist)?

Anyway, the patch itself looks good to me.
Reviewed-by: Li Wang <liwang@redhat.com>
Jan Stancek Oct. 22, 2024, 1:21 p.m. UTC | #3
On Tue, Oct 22, 2024 at 3:05 PM Li Wang <liwan@redhat.com> wrote:
>
>
>
> On Tue, Oct 22, 2024 at 5:45 PM Jan Stancek <jstancek@redhat.com> wrote:
>>
>> Repeated reads from TPM entries (tcg_operations, vs_operations,
>> caps,..) are causing big delays with 3 or more repetitions,
>> which has signs of some kind of rate-limitting on firmware side.
>>
>> This patch introduces a new kind of blacklist, which doesn't
>> completely skips the entry, but assigns it to only single
>> worker.
>
>
> Did you test that a single worker is faster enough with the
> default '.max_runtime = 100' on those kind of systems?

I did. It was only 3rd or 4th iteration that starts getting painfully slow.
It doesn't seem to matter if it's running parallel or sequentially.

For example, tcg_operations after certain point starts returning
"Blocked for OS by BIOS" (very slowly).

# time cat /sys/devices/pnp0/00:0a/tpm/tpm0/ppi/tcg_operations | grep Blocked

real    0m3.406s
user    0m0.000s
sys     0m3.631s

# time cat /sys/devices/pnp0/00:0a/tpm/tpm0/ppi/tcg_operations | grep
Blocked | wc -l
102

real    1m44.298s
user    0m0.000s
sys     1m45.316s


>
> If that's still timeout on the rate-limited files, what should we do (move to blacklist)?

Yes, I'd blacklist those where single read hangs for dozens of seconds.

>
> Anyway, the patch itself looks good to me.
> Reviewed-by: Li Wang <liwang@redhat.com>

Thanks for review.
Petr Vorel Oct. 22, 2024, 3:59 p.m. UTC | #4
Hi Jan,

LGTM.
Reviewed-by: Petr Vorel <pvorel@suse.cz>

Kind regards,
Petr
Jan Stancek Oct. 23, 2024, 2:29 p.m. UTC | #5
Pushed.

On Tue, Oct 22, 2024 at 5:59 PM Petr Vorel <pvorel@suse.cz> wrote:
>
> Hi Jan,
>
> LGTM.
> Reviewed-by: Petr Vorel <pvorel@suse.cz>
>
> Kind regards,
> Petr
>
diff mbox series

Patch

diff --git a/testcases/kernel/fs/read_all/read_all.c b/testcases/kernel/fs/read_all/read_all.c
index 266678ea7794..9c58b5e85bbf 100644
--- a/testcases/kernel/fs/read_all/read_all.c
+++ b/testcases/kernel/fs/read_all/read_all.c
@@ -94,11 +94,17 @@  static int worker_timeout;
 static int timeout_warnings_left = 15;
 
 static char *blacklist[] = {
-	NULL, /* reserved for -e parameter */
+	"/reserved/", /* reserved for -e parameter */
 	"/sys/kernel/debug/*",
 	"/sys/devices/platform/*/eeprom",
 	"/sys/devices/platform/*/nvmem",
 	"/sys/*/cpu??*(?)/*",	/* cpu* entries with 2 or more digits */
+	NULL
+};
+
+static char *ratelimit_list[] = {
+	"/sys/devices/*/tpm*",
+	NULL,
 };
 
 static long long epoch;
@@ -193,19 +199,43 @@  static void sanitize_str(char *buf, ssize_t count)
 		strcpy(buf + MAX_DISPLAY, "...");
 }
 
-static int is_blacklisted(const char *path)
+static int is_onlist(const char *path, char *list[])
 {
-	unsigned int i;
+	unsigned int i = 0;
+
+	while (1) {
+		const char *pattern = list[i++];
 
-	for (i = 0; i < ARRAY_SIZE(blacklist); i++) {
-		if (blacklist[i] && !fnmatch(blacklist[i], path, FNM_EXTMATCH)) {
-			if (verbose)
-				tst_res(TINFO, "Ignoring %s", path);
+		if (!pattern)
+			break;
+		if (!fnmatch(pattern, path, FNM_EXTMATCH))
 			return 1;
-		}
 	}
 
 	return 0;
+
+}
+
+static int is_blacklisted(const char *path)
+{
+	int ret;
+
+	ret = is_onlist(path, blacklist);
+	if (ret && verbose)
+		tst_res(TINFO, "Ignoring %s", path);
+
+	return ret;
+}
+
+static int is_ratelimitted(const char *path)
+{
+	int ret;
+
+	ret = is_onlist(path, ratelimit_list);
+	if (ret && verbose)
+		tst_res(TINFO, "Limiting to single worker %s", path);
+
+	return ret;
 }
 
 static void worker_heartbeat(const int worker)
@@ -503,6 +533,9 @@  static int sched_work(const int first_worker,
 	int min_ttl = worker_timeout, sleep_time = 1;
 	int pushed, workers_pushed = 0;
 
+	if (is_ratelimitted(path))
+		repetitions = 1;
+
 	for (i = 0, j = first_worker; i < repetitions; j++) {
 		if (j >= worker_count)
 			j = 0;