Message ID | 20240523133132.13978-3-chrubis@suse.cz |
---|---|
State | Accepted |
Headers | show |
Series | Fix msgstress01 timeouts | expand |
Hi, for both patches: Reviewed-by: Martin Doucha <mdoucha@suse.cz> On 23. 05. 24 15:31, Cyril Hrubis wrote: > Make the test exit if runtime has been exhausted before we finished the > requested amount of iterations. > > For that to happen we let the main test process to loop while checking > the runtime and set the stop flag if runtime was exhausted. We also need > to separte the stop and fail flag and add counter for finished children. > > Also if we exhaust our runtime during the initial fork phase we print a > warning, since we hardly did a meaningful testing in that case. > > The changes can be tested with -I parameter, e.g. -I 5 should trigger > the TWARN message and you should be able to get the test to stop in the > message sending phase with larger -I value. > > Signed-off-by: Cyril Hrubis <chrubis@suse.cz> > --- > .../syscalls/ipc/msgstress/msgstress01.c | 53 ++++++++++++++++--- > 1 file changed, 47 insertions(+), 6 deletions(-) > > diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c > index 62ffcf63b..fb1d4263d 100644 > --- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c > +++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c > @@ -50,6 +50,9 @@ static char *str_num_iterations; > static int num_messages = 1000; > static int num_iterations = MAXNREPS; > static volatile int *stop; > +static volatile int *fail; > +static int *finished; > +static int *flags; > > static int get_used_sysvipc(void) > { > @@ -77,6 +80,10 @@ static void reset_messages(void) > > for (int i = 0; i < num_messages; i++) > ipc_data[i].id = -1; > + > + *stop = 0; > + *fail = 0; > + *finished = 0; > } > > static int create_message(const int id) > @@ -112,6 +119,8 @@ static void writer(const int id, const int pos) > tst_brk(TBROK | TERRNO, "msgsnd() failed"); > } > } > + > + tst_atomic_inc(finished); > } > > static void reader(const int id, const int pos) > @@ -138,6 +147,7 @@ static void reader(const int id, const int pos) > tst_res(TFAIL, "Received the wrong message type"); > > *stop = 1; > + *fail = 1; > return; > } > > @@ -145,6 +155,7 @@ static void reader(const int id, const int pos) > tst_res(TFAIL, "Received the wrong message data length"); > > *stop = 1; > + *fail = 1; > return; > } > > @@ -155,6 +166,7 @@ static void reader(const int id, const int pos) > buff->msg.data.pbytes[i]); > > *stop = 1; > + *fail = 1; > return; > } > } > @@ -163,6 +175,8 @@ static void reader(const int id, const int pos) > tst_res(TDEBUG, "msg_recv.type = %ld", msg_recv.type); > tst_res(TDEBUG, "msg_recv.data.len = %d", msg_recv.data.len); > } > + > + tst_atomic_inc(finished); > } > > static void remove_queues(void) > @@ -196,12 +210,37 @@ static void run(void) > > if (*stop) > break; > + > + if (!tst_remaining_runtime()) { > + tst_res(TWARN, "Out of runtime during forking..."); > + *stop = 1; > + break; > + } > + } > + > + if (!(*stop)) > + tst_res(TINFO, "All processes running."); > + > + for (;;) { > + if (tst_atomic_load(finished) == 2 * num_messages) > + break; > + > + if (*stop) > + break; > + > + if (!tst_remaining_runtime()) { > + tst_res(TINFO, "Out of runtime, stopping processes..."); > + *stop = 1; > + break; > + } > + > + sleep(1); > } > > tst_reap_children(); > remove_queues(); > > - if (!(*stop)) > + if (!(*fail)) > tst_res(TPASS, "Test passed. All messages have been received"); > } > > @@ -242,14 +281,16 @@ static void setup(void) > MAP_SHARED | MAP_ANONYMOUS, > -1, 0); > > - stop = SAFE_MMAP( > + flags = SAFE_MMAP( > NULL, > - sizeof(int), > + sizeof(int) * 3, > PROT_READ | PROT_WRITE, > MAP_SHARED | MAP_ANONYMOUS, > -1, 0); > > - reset_messages(); > + stop = &flags[0]; > + fail = &flags[1]; > + finished = &flags[2]; > } > > static void cleanup(void) > @@ -260,7 +301,7 @@ static void cleanup(void) > remove_queues(); > > SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages); > - SAFE_MUNMAP((void *)stop, sizeof(int)); > + SAFE_MUNMAP(flags, sizeof(int) * 3); > } > > static struct tst_test test = { > @@ -271,7 +312,7 @@ static struct tst_test test = { > .max_runtime = 180, > .options = (struct tst_option[]) { > {"n:", &str_num_messages, "Number of messages to send (default: 1000)"}, > - {"l:", &str_num_iterations, "Number iterations per message (default: 10000)"}, > + {"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"}, > {}, > }, > };
Hi Cyril, > Make the test exit if runtime has been exhausted before we finished the > requested amount of iterations. > For that to happen we let the main test process to loop while checking > the runtime and set the stop flag if runtime was exhausted. We also need > to separte the stop and fail flag and add counter for finished children. nit: s/separte/separate/ ... > static void remove_queues(void) > @@ -196,12 +210,37 @@ static void run(void) > if (*stop) > break; > + > + if (!tst_remaining_runtime()) { > + tst_res(TWARN, "Out of runtime during forking..."); I tested the patchset on various VMs (various kernels), with both 1 or 2 CPU. Indeed it fixes the problem. IMHO it can quite easily get KVM host overloaded enough to get the TWARN out of runtime during forking, but we can't do anything about it. msgstress01.c:215: TWARN: Out of runtime during forking... msgstress01.c:244: TPASS: Test passed. All messages have been received > + *stop = 1; > + break; > + } > + } > + > + if (!(*stop)) > + tst_res(TINFO, "All processes running."); very nit: I'd remove dot at the end. > + > + for (;;) { > + if (tst_atomic_load(finished) == 2 * num_messages) > + break; > + > + if (*stop) > + break; > + > + if (!tst_remaining_runtime()) { > + tst_res(TINFO, "Out of runtime, stopping processes..."); > + *stop = 1; > + break; > + } > + > + sleep(1); > } > tst_reap_children(); > remove_queues(); > - if (!(*stop)) > + if (!(*fail)) > tst_res(TPASS, "Test passed. All messages have been received"); > } > @@ -242,14 +281,16 @@ static void setup(void) > MAP_SHARED | MAP_ANONYMOUS, > -1, 0); > - stop = SAFE_MMAP( > + flags = SAFE_MMAP( > NULL, > - sizeof(int), > + sizeof(int) * 3, > PROT_READ | PROT_WRITE, > MAP_SHARED | MAP_ANONYMOUS, > -1, 0); > - reset_messages(); > + stop = &flags[0]; > + fail = &flags[1]; > + finished = &flags[2]; > } > static void cleanup(void) > @@ -260,7 +301,7 @@ static void cleanup(void) > remove_queues(); > SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages); > - SAFE_MUNMAP((void *)stop, sizeof(int)); > + SAFE_MUNMAP(flags, sizeof(int) * 3); > } > static struct tst_test test = { > @@ -271,7 +312,7 @@ static struct tst_test test = { > .max_runtime = 180, > .options = (struct tst_option[]) { > {"n:", &str_num_messages, "Number of messages to send (default: 1000)"}, > - {"l:", &str_num_iterations, "Number iterations per message (default: 10000)"}, > + {"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"}, > {}, very nit: too long line Reviewed-by: Petr Vorel <pvorel@suse.cz> Kind regards, Petr
Hi! > > Make the test exit if runtime has been exhausted before we finished the > > requested amount of iterations. > > > For that to happen we let the main test process to loop while checking > > the runtime and set the stop flag if runtime was exhausted. We also need > > to separte the stop and fail flag and add counter for finished children. > nit: s/separte/separate/ > > ... > > static void remove_queues(void) > > @@ -196,12 +210,37 @@ static void run(void) > > > if (*stop) > > break; > > + > > + if (!tst_remaining_runtime()) { > > + tst_res(TWARN, "Out of runtime during forking..."); > > I tested the patchset on various VMs (various kernels), with both 1 or 2 CPU. > Indeed it fixes the problem. IMHO it can quite easily get KVM host overloaded > enough to get the TWARN out of runtime during forking, but we can't do anything > about it. And I think that it's fair to signal to the user that the machine is too overloaded to run a meaningful test in this case. > msgstress01.c:215: TWARN: Out of runtime during forking... > msgstress01.c:244: TPASS: Test passed. All messages have been received > > > + *stop = 1; > > + break; > > + } > > + } > > + > > + if (!(*stop)) > > + tst_res(TINFO, "All processes running."); > very nit: I'd remove dot at the end. > > > + > > + for (;;) { > > + if (tst_atomic_load(finished) == 2 * num_messages) > > + break; > > + > > + if (*stop) > > + break; > > + > > + if (!tst_remaining_runtime()) { > > + tst_res(TINFO, "Out of runtime, stopping processes..."); > > + *stop = 1; > > + break; > > + } > > + > > + sleep(1); > > } > > > tst_reap_children(); > > remove_queues(); > > > - if (!(*stop)) > > + if (!(*fail)) > > tst_res(TPASS, "Test passed. All messages have been received"); > > } > > > @@ -242,14 +281,16 @@ static void setup(void) > > MAP_SHARED | MAP_ANONYMOUS, > > -1, 0); > > > - stop = SAFE_MMAP( > > + flags = SAFE_MMAP( > > NULL, > > - sizeof(int), > > + sizeof(int) * 3, > > PROT_READ | PROT_WRITE, > > MAP_SHARED | MAP_ANONYMOUS, > > -1, 0); > > > - reset_messages(); > > + stop = &flags[0]; > > + fail = &flags[1]; > > + finished = &flags[2]; > > } > > > static void cleanup(void) > > @@ -260,7 +301,7 @@ static void cleanup(void) > > remove_queues(); > > > SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages); > > - SAFE_MUNMAP((void *)stop, sizeof(int)); > > + SAFE_MUNMAP(flags, sizeof(int) * 3); > > } > > > static struct tst_test test = { > > @@ -271,7 +312,7 @@ static struct tst_test test = { > > .max_runtime = 180, > > .options = (struct tst_option[]) { > > {"n:", &str_num_messages, "Number of messages to send (default: 1000)"}, > > - {"l:", &str_num_iterations, "Number iterations per message (default: 10000)"}, > > + {"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"}, > > {}, > very nit: too long line Feel free to fix the minor nits and push the patch.
Hi, BTW I have found problem on Alpine: tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/user.slice/user-1000.slice/pids.max' tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/pids/user.slice/user-1000.slice/pids.max' msgstress01.c:269: TINFO: Available messages slots: 5530 msgstress01.c:164: TFAIL: Received wrong data at index 99: ffffffa3 != 19 It looks to me it's not related to using busybox init system (e.g. IMHO not related to the Cannot read session user limits, but I can be wrong). Anyway, this is for sure not a blocker for the release (and probably wait for LTP musl users). Kind regards, Petr
> Hi, > BTW I have found problem on Alpine: OK, it's not Alpine related, but 32 bit related: $ ./msgstress01 tst_test.c:1733: TINFO: LTP version: 20240129-290-g37fd02a1c tst_test.c:1619: TINFO: Timeout per run is 0h 03m 30s tst_pid.c:94: TINFO: Found limit of processes 40844 (from /sys/fs/cgroup/user.slice/user-1000.slice/pids.max) msgstress01.c:269: TINFO: Available messages slots: 19045 msgstress01.c:166: TFAIL: Received wrong data at index 99: 6c != 1 Maybe worth of fixing. Regression from 90f80322a. Kind regards, Petr
Hi Cyril, patchset merged, thanks! Your quick fix for this test on 32bit will be the last commit before release tomorrow. Kind regards, Petr
Hi! > BTW I have found problem on Alpine: > > tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/user.slice/user-1000.slice/pids.max' > tst_pid.c:84: TINFO: Cannot read session user limits from '/sys/fs/cgroup/pids/user.slice/user-1000.slice/pids.max' > msgstress01.c:269: TINFO: Available messages slots: 5530 > msgstress01.c:164: TFAIL: Received wrong data at index 99: ffffffa3 != 19 > > It looks to me it's not related to using busybox init system (e.g. IMHO not > related to the Cannot read session user limits, but I can be wrong). > Anyway, this is for sure not a blocker for the release (and probably wait for > LTP musl users). So looks like we are using wrong message size for the message buffer. We are using size returned from the msgrcv() that returns the size of the payload but the payload looks like: struct { char len; char pbytes[99]; } data; This means that the size returned from msgrcv() is one more byte longer than the pbytes array. Quick fix would be: diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c index 5c84957b3..b0d945a11 100644 --- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c +++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c @@ -131,7 +131,7 @@ static void reader(const int id, const int pos) return; } - for (int i = 0; i < size; i++) { + for (int i = 0; i < msg_recv.data.len; i++) { if (msg_recv.data.pbytes[i] != buff->msg.data.pbytes[i]) { tst_res(TFAIL, "Received wrong data at index %d: %x != %x", i, msg_recv.data.pbytes[i], Better fix would remove the len from the sysv_msg structure and use the size only, but I would go for the easy fix before the release and do cleanup later on. I will send a proper patch.
diff --git a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c index 62ffcf63b..fb1d4263d 100644 --- a/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c +++ b/testcases/kernel/syscalls/ipc/msgstress/msgstress01.c @@ -50,6 +50,9 @@ static char *str_num_iterations; static int num_messages = 1000; static int num_iterations = MAXNREPS; static volatile int *stop; +static volatile int *fail; +static int *finished; +static int *flags; static int get_used_sysvipc(void) { @@ -77,6 +80,10 @@ static void reset_messages(void) for (int i = 0; i < num_messages; i++) ipc_data[i].id = -1; + + *stop = 0; + *fail = 0; + *finished = 0; } static int create_message(const int id) @@ -112,6 +119,8 @@ static void writer(const int id, const int pos) tst_brk(TBROK | TERRNO, "msgsnd() failed"); } } + + tst_atomic_inc(finished); } static void reader(const int id, const int pos) @@ -138,6 +147,7 @@ static void reader(const int id, const int pos) tst_res(TFAIL, "Received the wrong message type"); *stop = 1; + *fail = 1; return; } @@ -145,6 +155,7 @@ static void reader(const int id, const int pos) tst_res(TFAIL, "Received the wrong message data length"); *stop = 1; + *fail = 1; return; } @@ -155,6 +166,7 @@ static void reader(const int id, const int pos) buff->msg.data.pbytes[i]); *stop = 1; + *fail = 1; return; } } @@ -163,6 +175,8 @@ static void reader(const int id, const int pos) tst_res(TDEBUG, "msg_recv.type = %ld", msg_recv.type); tst_res(TDEBUG, "msg_recv.data.len = %d", msg_recv.data.len); } + + tst_atomic_inc(finished); } static void remove_queues(void) @@ -196,12 +210,37 @@ static void run(void) if (*stop) break; + + if (!tst_remaining_runtime()) { + tst_res(TWARN, "Out of runtime during forking..."); + *stop = 1; + break; + } + } + + if (!(*stop)) + tst_res(TINFO, "All processes running."); + + for (;;) { + if (tst_atomic_load(finished) == 2 * num_messages) + break; + + if (*stop) + break; + + if (!tst_remaining_runtime()) { + tst_res(TINFO, "Out of runtime, stopping processes..."); + *stop = 1; + break; + } + + sleep(1); } tst_reap_children(); remove_queues(); - if (!(*stop)) + if (!(*fail)) tst_res(TPASS, "Test passed. All messages have been received"); } @@ -242,14 +281,16 @@ static void setup(void) MAP_SHARED | MAP_ANONYMOUS, -1, 0); - stop = SAFE_MMAP( + flags = SAFE_MMAP( NULL, - sizeof(int), + sizeof(int) * 3, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); - reset_messages(); + stop = &flags[0]; + fail = &flags[1]; + finished = &flags[2]; } static void cleanup(void) @@ -260,7 +301,7 @@ static void cleanup(void) remove_queues(); SAFE_MUNMAP(ipc_data, sizeof(struct sysv_data) * num_messages); - SAFE_MUNMAP((void *)stop, sizeof(int)); + SAFE_MUNMAP(flags, sizeof(int) * 3); } static struct tst_test test = { @@ -271,7 +312,7 @@ static struct tst_test test = { .max_runtime = 180, .options = (struct tst_option[]) { {"n:", &str_num_messages, "Number of messages to send (default: 1000)"}, - {"l:", &str_num_iterations, "Number iterations per message (default: 10000)"}, + {"l:", &str_num_iterations, "Number iterations per message (default: " TST_TO_STR(MAXNREPS) ")"}, {}, }, };
Make the test exit if runtime has been exhausted before we finished the requested amount of iterations. For that to happen we let the main test process to loop while checking the runtime and set the stop flag if runtime was exhausted. We also need to separte the stop and fail flag and add counter for finished children. Also if we exhaust our runtime during the initial fork phase we print a warning, since we hardly did a meaningful testing in that case. The changes can be tested with -I parameter, e.g. -I 5 should trigger the TWARN message and you should be able to get the test to stop in the message sending phase with larger -I value. Signed-off-by: Cyril Hrubis <chrubis@suse.cz> --- .../syscalls/ipc/msgstress/msgstress01.c | 53 ++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-)