diff mbox series

[v2,4/4] tests/qtest/migration-test: Add a postcopy memfile test

Message ID 20240530095408.31608-5-npiggin@gmail.com
State New
Headers show
Series tests/qtest/migration-test: Improve shared | expand

Commit Message

Nicholas Piggin May 30, 2024, 9:54 a.m. UTC
Postcopy requires userfaultfd support, which requires tmpfs if a memory
file is used.

This adds back support for /dev/shm memory files, but adds preallocation
to skip environments where that mount is limited in size.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 tests/qtest/migration-test.c | 77 ++++++++++++++++++++++++++++++++----
 1 file changed, 69 insertions(+), 8 deletions(-)

Comments

Peter Xu May 31, 2024, 1:34 p.m. UTC | #1
On Thu, May 30, 2024 at 07:54:07PM +1000, Nicholas Piggin wrote:
> Postcopy requires userfaultfd support, which requires tmpfs if a memory
> file is used.
> 
> This adds back support for /dev/shm memory files, but adds preallocation
> to skip environments where that mount is limited in size.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Thanks for doing this regardless.

> ---
>  tests/qtest/migration-test.c | 77 ++++++++++++++++++++++++++++++++----
>  1 file changed, 69 insertions(+), 8 deletions(-)
> 
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 86eace354e..5078033ded 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -11,6 +11,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  
>  #include "libqtest.h"
>  #include "qapi/qmp/qdict.h"
> @@ -553,6 +554,7 @@ typedef struct {
>       */
>      bool hide_stderr;
>      bool use_memfile;
> +    bool use_shm_memfile;

Nitpick: when having both, it's slightly confusing on the name, e.g. not
clear whether use_memfile needs to be set to true too if use_shm_memfile=true.

Maybe "use_tmpfs_memfile" and "use_shm_memfile"?

>      /* only launch the target process */
>      bool only_target;
>      /* Use dirty ring if true; dirty logging otherwise */
> @@ -739,7 +741,62 @@ static int test_migrate_start(QTestState **from, QTestState **to,
>          ignore_stderr = "";
>      }
>  
> -    if (args->use_memfile) {
> +    if (!qtest_has_machine(machine_alias)) {
> +        g_autofree char *msg = g_strdup_printf("machine %s not supported",
> +                                               machine_alias);
> +        g_test_skip(msg);
> +        return -1;
> +    }
> +
> +    if (args->use_shm_memfile) {
> +#if defined(__NR_userfaultfd) && defined(__linux__)

IIUC we only need defined(__linux__) as there's nothing to do with uffd yet?

> +        int fd;
> +        uint64_t size;
> +
> +        if (getenv("GITLAB_CI")) {
> +            /*
> +             * Gitlab runners are limited to 64MB shm size and despite
> +             * pre-allocation there is concern that concurrent tests
> +             * could result in nondeterministic failures. Until all shm
> +             * usage in all CI tests is found to fail gracefully on
> +             * ENOSPC, it is safer to avoid large allocations for now.
> +             *
> +             * https://lore.kernel.org/qemu-devel/875xuwg4mx.fsf@suse.de/
> +             */
> +            g_test_skip("shm tests are not supported in Gitlab CI environment");
> +            return -1;
> +        }

I'm not sure whether this is Fabiano's intention.  I'm wondering whether we
can drop this and just let it still run there.

Other tests not detecting avaiablility of shmem looks like a separate issue
to be fixed to me, regardless of this.

My wild guess is since we're doing memory_size+64K below then if test won't
fail others won't either, as normally the shmem quota should normally be
power of 2 anyway.. then it should always fit another few MBs if this one.
While this test is ready to fail gracefully now.

> +
> +        if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
> +            g_test_skip("/dev/shm does not exist or is not a directory");
> +            return -1;
> +        }
> +
> +        /*
> +         * Pre-create and allocate the file here, because /dev/shm/
> +         * is known to be limited in size in some places (e.g., Gitlab CI).
> +         */
> +        memfile_path = g_strdup_printf("/dev/shm/qemu-%d", getpid());
> +        fd = open(memfile_path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
> +        if (fd == -1) {
> +            g_test_skip("/dev/shm file could not be created");
> +            return -1;
> +        }
> +
> +        g_assert(qemu_strtosz(memory_size, NULL, &size) == 0);
> +        size += 64*1024; /* QEMU may map a bit more memory for a guard page */
> +
> +        if (fallocate(fd, 0, 0, size) == -1) {
> +            unlink(memfile_path);
> +            perror("could not alloc"); exit(1);
> +            g_test_skip("Could not allocate machine memory in /dev/shm");
> +            return -1;
> +        }
> +        close(fd);
> +#else
> +        g_test_skip("userfaultfd is not supported");

"/dev/shm not available" instead?

> +#endif
> +    } else if (args->use_memfile) {
>          memfile_path = g_strdup_printf("/%s/qemu-%d", tmpfs, getpid());
>          memfile_opts = g_strdup_printf(
>              "-object memory-backend-file,id=mem0,size=%s"
> @@ -751,12 +808,6 @@ static int test_migrate_start(QTestState **from, QTestState **to,
>          kvm_opts = ",dirty-ring-size=4096";
>      }
>  
> -    if (!qtest_has_machine(machine_alias)) {
> -        g_autofree char *msg = g_strdup_printf("machine %s not supported", machine_alias);
> -        g_test_skip(msg);
> -        return -1;
> -    }
> -
>      machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
>                                        QEMU_ENV_DST);
>  
> @@ -807,7 +858,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
>       * Remove shmem file immediately to avoid memory leak in test failed case.
>       * It's valid because QEMU has already opened this file
>       */
> -    if (args->use_memfile) {
> +    if (args->use_memfile || args->use_shm_memfile) {
>          unlink(memfile_path);
>      }
>  
> @@ -1275,6 +1326,15 @@ static void test_postcopy(void)
>      test_postcopy_common(&args);
>  }
>  
> +static void test_postcopy_memfile(void)
> +{

IMHO the defined(__NR_userfaultfd) should be here to guard if needed.

Or rather, we don't need to care about uffd yet? As what we already do with
test_postcopy().

I'm guessing the test just doesn't run on !linux, while compilation always
works with/without that.

Thanks,

> +    MigrateCommon args = {
> +        .start.use_shm_memfile = true,
> +    };
> +
> +    test_postcopy_common(&args);
> +}
> +
>  static void test_postcopy_suspend(void)
>  {
>      MigrateCommon args = {
> @@ -3441,6 +3501,7 @@ int main(int argc, char **argv)
>  
>      if (has_uffd) {
>          migration_test_add("/migration/postcopy/plain", test_postcopy);
> +        migration_test_add("/migration/postcopy/memfile", test_postcopy_memfile);
>          migration_test_add("/migration/postcopy/recovery/plain",
>                             test_postcopy_recovery);
>          migration_test_add("/migration/postcopy/preempt/plain",
> -- 
> 2.43.0
>
Fabiano Rosas May 31, 2024, 4:01 p.m. UTC | #2
Peter Xu <peterx@redhat.com> writes:

> On Thu, May 30, 2024 at 07:54:07PM +1000, Nicholas Piggin wrote:
>> Postcopy requires userfaultfd support, which requires tmpfs if a memory
>> file is used.
>> 
>> This adds back support for /dev/shm memory files, but adds preallocation
>> to skip environments where that mount is limited in size.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Thanks for doing this regardless.
>
>> ---
>>  tests/qtest/migration-test.c | 77 ++++++++++++++++++++++++++++++++----
>>  1 file changed, 69 insertions(+), 8 deletions(-)
>> 
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 86eace354e..5078033ded 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -11,6 +11,7 @@
>>   */
>>  
>>  #include "qemu/osdep.h"
>> +#include "qemu/cutils.h"
>>  
>>  #include "libqtest.h"
>>  #include "qapi/qmp/qdict.h"
>> @@ -553,6 +554,7 @@ typedef struct {
>>       */
>>      bool hide_stderr;
>>      bool use_memfile;
>> +    bool use_shm_memfile;
>
> Nitpick: when having both, it's slightly confusing on the name, e.g. not
> clear whether use_memfile needs to be set to true too if use_shm_memfile=true.
>
> Maybe "use_tmpfs_memfile" and "use_shm_memfile"?
>
>>      /* only launch the target process */
>>      bool only_target;
>>      /* Use dirty ring if true; dirty logging otherwise */
>> @@ -739,7 +741,62 @@ static int test_migrate_start(QTestState **from, QTestState **to,
>>          ignore_stderr = "";
>>      }
>>  
>> -    if (args->use_memfile) {
>> +    if (!qtest_has_machine(machine_alias)) {
>> +        g_autofree char *msg = g_strdup_printf("machine %s not supported",
>> +                                               machine_alias);
>> +        g_test_skip(msg);
>> +        return -1;
>> +    }
>> +
>> +    if (args->use_shm_memfile) {
>> +#if defined(__NR_userfaultfd) && defined(__linux__)
>
> IIUC we only need defined(__linux__) as there's nothing to do with uffd yet?
>
>> +        int fd;
>> +        uint64_t size;
>> +
>> +        if (getenv("GITLAB_CI")) {
>> +            /*
>> +             * Gitlab runners are limited to 64MB shm size and despite
>> +             * pre-allocation there is concern that concurrent tests
>> +             * could result in nondeterministic failures. Until all shm
>> +             * usage in all CI tests is found to fail gracefully on
>> +             * ENOSPC, it is safer to avoid large allocations for now.
>> +             *
>> +             * https://lore.kernel.org/qemu-devel/875xuwg4mx.fsf@suse.de/
>> +             */
>> +            g_test_skip("shm tests are not supported in Gitlab CI environment");
>> +            return -1;
>> +        }
>
> I'm not sure whether this is Fabiano's intention.  I'm wondering whether we
> can drop this and just let it still run there.
>

It was my intention. But I overlooked the fact that the current shm
cannot even run one migration test already.

> Other tests not detecting avaiablility of shmem looks like a separate issue
> to be fixed to me, regardless of this.
>
> My wild guess is since we're doing memory_size+64K below then if test won't
> fail others won't either, as normally the shmem quota should normally be
> power of 2 anyway.. then it should always fit another few MBs if this one.
> While this test is ready to fail gracefully now.
>

I agree. Let's drop this part then.
Nicholas Piggin June 3, 2024, 6:02 a.m. UTC | #3
On Fri May 31, 2024 at 11:34 PM AEST, Peter Xu wrote:
> On Thu, May 30, 2024 at 07:54:07PM +1000, Nicholas Piggin wrote:
> > Postcopy requires userfaultfd support, which requires tmpfs if a memory
> > file is used.
> > 
> > This adds back support for /dev/shm memory files, but adds preallocation
> > to skip environments where that mount is limited in size.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Thanks for doing this regardless.
>
> > ---
> >  tests/qtest/migration-test.c | 77 ++++++++++++++++++++++++++++++++----
> >  1 file changed, 69 insertions(+), 8 deletions(-)
> > 
> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > index 86eace354e..5078033ded 100644
> > --- a/tests/qtest/migration-test.c
> > +++ b/tests/qtest/migration-test.c
> > @@ -11,6 +11,7 @@
> >   */
> >  
> >  #include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> >  
> >  #include "libqtest.h"
> >  #include "qapi/qmp/qdict.h"
> > @@ -553,6 +554,7 @@ typedef struct {
> >       */
> >      bool hide_stderr;
> >      bool use_memfile;
> > +    bool use_shm_memfile;
>
> Nitpick: when having both, it's slightly confusing on the name, e.g. not
> clear whether use_memfile needs to be set to true too if use_shm_memfile=true.
>
> Maybe "use_tmpfs_memfile" and "use_shm_memfile"?

Could be easy to confuse. It's not actually "tmpfs", it is the fs that
is mounted on /tmp :) tmpfs *is* shmfs in Linux. The intention was just
that if you don't specify then it's because you don't have a particular
requirement other than enough space.

> >      /* only launch the target process */
> >      bool only_target;
> >      /* Use dirty ring if true; dirty logging otherwise */
> > @@ -739,7 +741,62 @@ static int test_migrate_start(QTestState **from, QTestState **to,
> >          ignore_stderr = "";
> >      }
> >  
> > -    if (args->use_memfile) {
> > +    if (!qtest_has_machine(machine_alias)) {
> > +        g_autofree char *msg = g_strdup_printf("machine %s not supported",
> > +                                               machine_alias);
> > +        g_test_skip(msg);
> > +        return -1;
> > +    }
> > +
> > +    if (args->use_shm_memfile) {
> > +#if defined(__NR_userfaultfd) && defined(__linux__)
>
> IIUC we only need defined(__linux__) as there's nothing to do with uffd yet?

I thought it was polite since it uses a few other Linux (or at least
POSIX) calls directly rather than go via the abstraction layer. Probably
it would never happen that something defines __NR_userfaultfd that does
not also have open and fallocate, but no harm?

> > +        int fd;
> > +        uint64_t size;
> > +
> > +        if (getenv("GITLAB_CI")) {
> > +            /*
> > +             * Gitlab runners are limited to 64MB shm size and despite
> > +             * pre-allocation there is concern that concurrent tests
> > +             * could result in nondeterministic failures. Until all shm
> > +             * usage in all CI tests is found to fail gracefully on
> > +             * ENOSPC, it is safer to avoid large allocations for now.
> > +             *
> > +             * https://lore.kernel.org/qemu-devel/875xuwg4mx.fsf@suse.de/
> > +             */
> > +            g_test_skip("shm tests are not supported in Gitlab CI environment");
> > +            return -1;
> > +        }
>
> I'm not sure whether this is Fabiano's intention.  I'm wondering whether we
> can drop this and just let it still run there.
>
> Other tests not detecting avaiablility of shmem looks like a separate issue
> to be fixed to me, regardless of this.
>
> My wild guess is since we're doing memory_size+64K below then if test won't
> fail others won't either, as normally the shmem quota should normally be
> power of 2 anyway.. then it should always fit another few MBs if this one.
> While this test is ready to fail gracefully now.

Well if CI runners got upgraded to 256MB shm space under us, we might
start to succeed? I _think_ fallocate on tmpfs should be relatively
atomic in that it doesn't try to allocate space if the fs was too small,
but at least naive implementations can do it too, so I didn't want to
rely on it.

I'm fine to do what you and Fabiano prefer. If we commit this then later
remove this hunk, it would be easy to revert if it started to show up
fails. OTOH not too hard to add it in later either.

> > +
> > +        if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
> > +            g_test_skip("/dev/shm does not exist or is not a directory");
> > +            return -1;
> > +        }
> > +
> > +        /*
> > +         * Pre-create and allocate the file here, because /dev/shm/
> > +         * is known to be limited in size in some places (e.g., Gitlab CI).
> > +         */
> > +        memfile_path = g_strdup_printf("/dev/shm/qemu-%d", getpid());
> > +        fd = open(memfile_path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
> > +        if (fd == -1) {
> > +            g_test_skip("/dev/shm file could not be created");
> > +            return -1;
> > +        }
> > +
> > +        g_assert(qemu_strtosz(memory_size, NULL, &size) == 0);
> > +        size += 64*1024; /* QEMU may map a bit more memory for a guard page */
> > +
> > +        if (fallocate(fd, 0, 0, size) == -1) {
> > +            unlink(memfile_path);
> > +            perror("could not alloc"); exit(1);
> > +            g_test_skip("Could not allocate machine memory in /dev/shm");
> > +            return -1;
> > +        }
> > +        close(fd);
> > +#else
> > +        g_test_skip("userfaultfd is not supported");
>
> "/dev/shm not available" instead?

Ah yes, that was for the userfaultfd not supported case, but indeed
since renaming it from uffd to shm, it is better to move the ifdef
guard to where you say.

> > +#endif
> > +    } else if (args->use_memfile) {
> >          memfile_path = g_strdup_printf("/%s/qemu-%d", tmpfs, getpid());
> >          memfile_opts = g_strdup_printf(
> >              "-object memory-backend-file,id=mem0,size=%s"
> > @@ -751,12 +808,6 @@ static int test_migrate_start(QTestState **from, QTestState **to,
> >          kvm_opts = ",dirty-ring-size=4096";
> >      }
> >  
> > -    if (!qtest_has_machine(machine_alias)) {
> > -        g_autofree char *msg = g_strdup_printf("machine %s not supported", machine_alias);
> > -        g_test_skip(msg);
> > -        return -1;
> > -    }
> > -
> >      machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
> >                                        QEMU_ENV_DST);
> >  
> > @@ -807,7 +858,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
> >       * Remove shmem file immediately to avoid memory leak in test failed case.
> >       * It's valid because QEMU has already opened this file
> >       */
> > -    if (args->use_memfile) {
> > +    if (args->use_memfile || args->use_shm_memfile) {
> >          unlink(memfile_path);
> >      }
> >  
> > @@ -1275,6 +1326,15 @@ static void test_postcopy(void)
> >      test_postcopy_common(&args);
> >  }
> >  
> > +static void test_postcopy_memfile(void)
> > +{
>
> IMHO the defined(__NR_userfaultfd) should be here to guard if needed.
>
> Or rather, we don't need to care about uffd yet? As what we already do with
> test_postcopy().
>
> I'm guessing the test just doesn't run on !linux, while compilation always
> works with/without that.

Yeah they fall under has_uffd, so no ifdef required.

Thanks,
Nick
Peter Xu June 3, 2024, 3:50 p.m. UTC | #4
On Mon, Jun 03, 2024 at 04:02:42PM +1000, Nicholas Piggin wrote:
> On Fri May 31, 2024 at 11:34 PM AEST, Peter Xu wrote:
> > On Thu, May 30, 2024 at 07:54:07PM +1000, Nicholas Piggin wrote:
> > > Postcopy requires userfaultfd support, which requires tmpfs if a memory
> > > file is used.
> > > 
> > > This adds back support for /dev/shm memory files, but adds preallocation
> > > to skip environments where that mount is limited in size.
> > > 
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >
> > Thanks for doing this regardless.
> >
> > > ---
> > >  tests/qtest/migration-test.c | 77 ++++++++++++++++++++++++++++++++----
> > >  1 file changed, 69 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > > index 86eace354e..5078033ded 100644
> > > --- a/tests/qtest/migration-test.c
> > > +++ b/tests/qtest/migration-test.c
> > > @@ -11,6 +11,7 @@
> > >   */
> > >  
> > >  #include "qemu/osdep.h"
> > > +#include "qemu/cutils.h"
> > >  
> > >  #include "libqtest.h"
> > >  #include "qapi/qmp/qdict.h"
> > > @@ -553,6 +554,7 @@ typedef struct {
> > >       */
> > >      bool hide_stderr;
> > >      bool use_memfile;
> > > +    bool use_shm_memfile;
> >
> > Nitpick: when having both, it's slightly confusing on the name, e.g. not
> > clear whether use_memfile needs to be set to true too if use_shm_memfile=true.
> >
> > Maybe "use_tmpfs_memfile" and "use_shm_memfile"?
> 
> Could be easy to confuse. It's not actually "tmpfs", it is the fs that
> is mounted on /tmp :) tmpfs *is* shmfs in Linux. The intention was just
> that if you don't specify then it's because you don't have a particular
> requirement other than enough space.

Ah sorry, yeah I meant use_tmp_memfile..

> 
> > >      /* only launch the target process */
> > >      bool only_target;
> > >      /* Use dirty ring if true; dirty logging otherwise */
> > > @@ -739,7 +741,62 @@ static int test_migrate_start(QTestState **from, QTestState **to,
> > >          ignore_stderr = "";
> > >      }
> > >  
> > > -    if (args->use_memfile) {
> > > +    if (!qtest_has_machine(machine_alias)) {
> > > +        g_autofree char *msg = g_strdup_printf("machine %s not supported",
> > > +                                               machine_alias);
> > > +        g_test_skip(msg);
> > > +        return -1;
> > > +    }
> > > +
> > > +    if (args->use_shm_memfile) {
> > > +#if defined(__NR_userfaultfd) && defined(__linux__)
> >
> > IIUC we only need defined(__linux__) as there's nothing to do with uffd yet?
> 
> I thought it was polite since it uses a few other Linux (or at least
> POSIX) calls directly rather than go via the abstraction layer. Probably
> it would never happen that something defines __NR_userfaultfd that does
> not also have open and fallocate, but no harm?

It's about when there're shmem tests added without postcopy, we may want
the host to run these tests even if that host doesn't support userfault
syscall.

$ grep -iE "(SHMEM|USERFAULTFD)=" .config
CONFIG_SHMEM=y
CONFIG_USERFAULTFD=y

So I want to make sure the test runs the right thing always, irrelevant of
which arch it ran on, or kernel config.

I agree that's not a huge deal, but still I wanted to remove the
collreation that userfault and shmem is closely related - they're just
totally irrelevant to me, e.g., we can have shmem test/hostconfig without
userfault, we can also have userfault test/hostconfig without shmem.

> 
> > > +        int fd;
> > > +        uint64_t size;
> > > +
> > > +        if (getenv("GITLAB_CI")) {
> > > +            /*
> > > +             * Gitlab runners are limited to 64MB shm size and despite
> > > +             * pre-allocation there is concern that concurrent tests
> > > +             * could result in nondeterministic failures. Until all shm
> > > +             * usage in all CI tests is found to fail gracefully on
> > > +             * ENOSPC, it is safer to avoid large allocations for now.
> > > +             *
> > > +             * https://lore.kernel.org/qemu-devel/875xuwg4mx.fsf@suse.de/
> > > +             */
> > > +            g_test_skip("shm tests are not supported in Gitlab CI environment");
> > > +            return -1;
> > > +        }
> >
> > I'm not sure whether this is Fabiano's intention.  I'm wondering whether we
> > can drop this and just let it still run there.
> >
> > Other tests not detecting avaiablility of shmem looks like a separate issue
> > to be fixed to me, regardless of this.
> >
> > My wild guess is since we're doing memory_size+64K below then if test won't
> > fail others won't either, as normally the shmem quota should normally be
> > power of 2 anyway.. then it should always fit another few MBs if this one.
> > While this test is ready to fail gracefully now.
> 
> Well if CI runners got upgraded to 256MB shm space under us, we might
> start to succeed? I _think_ fallocate on tmpfs should be relatively

Yep, and IMHO that's also why I think we can start trying this even on
GITLAB_CI if it can support it.

> atomic in that it doesn't try to allocate space if the fs was too small,
> but at least naive implementations can do it too, so I didn't want to
> rely on it.
> 
> I'm fine to do what you and Fabiano prefer. If we commit this then later
> remove this hunk, it would be easy to revert if it started to show up
> fails. OTOH not too hard to add it in later either.

The question is why it can fail even with fallocate() around.  I just don't
see how it can fail besides either succeed or skip.

Maybe I missed something alone the lines.. though.

The other test (e.g. ivshmem) can fail, but those can fail with/without
the change here.  That's also why I think we need to fix them separately,
rather than trying to detect GITLAB_CI only because we know they _might_ be
running concurrently - if all tests have protection over shmem allocation
then IIUC we shouldn't fail anymore, but only success/skip.

I agree this doesn't look good at all, as it's not predictable on what test
will be covered in a CI run.. but again IMHO that's yet another separate
issue on how we can make CI support enough shmem usages.

Thanks,
diff mbox series

Patch

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 86eace354e..5078033ded 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -11,6 +11,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 
 #include "libqtest.h"
 #include "qapi/qmp/qdict.h"
@@ -553,6 +554,7 @@  typedef struct {
      */
     bool hide_stderr;
     bool use_memfile;
+    bool use_shm_memfile;
     /* only launch the target process */
     bool only_target;
     /* Use dirty ring if true; dirty logging otherwise */
@@ -739,7 +741,62 @@  static int test_migrate_start(QTestState **from, QTestState **to,
         ignore_stderr = "";
     }
 
-    if (args->use_memfile) {
+    if (!qtest_has_machine(machine_alias)) {
+        g_autofree char *msg = g_strdup_printf("machine %s not supported",
+                                               machine_alias);
+        g_test_skip(msg);
+        return -1;
+    }
+
+    if (args->use_shm_memfile) {
+#if defined(__NR_userfaultfd) && defined(__linux__)
+        int fd;
+        uint64_t size;
+
+        if (getenv("GITLAB_CI")) {
+            /*
+             * Gitlab runners are limited to 64MB shm size and despite
+             * pre-allocation there is concern that concurrent tests
+             * could result in nondeterministic failures. Until all shm
+             * usage in all CI tests is found to fail gracefully on
+             * ENOSPC, it is safer to avoid large allocations for now.
+             *
+             * https://lore.kernel.org/qemu-devel/875xuwg4mx.fsf@suse.de/
+             */
+            g_test_skip("shm tests are not supported in Gitlab CI environment");
+            return -1;
+        }
+
+        if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
+            g_test_skip("/dev/shm does not exist or is not a directory");
+            return -1;
+        }
+
+        /*
+         * Pre-create and allocate the file here, because /dev/shm/
+         * is known to be limited in size in some places (e.g., Gitlab CI).
+         */
+        memfile_path = g_strdup_printf("/dev/shm/qemu-%d", getpid());
+        fd = open(memfile_path, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+        if (fd == -1) {
+            g_test_skip("/dev/shm file could not be created");
+            return -1;
+        }
+
+        g_assert(qemu_strtosz(memory_size, NULL, &size) == 0);
+        size += 64*1024; /* QEMU may map a bit more memory for a guard page */
+
+        if (fallocate(fd, 0, 0, size) == -1) {
+            unlink(memfile_path);
+            perror("could not alloc"); exit(1);
+            g_test_skip("Could not allocate machine memory in /dev/shm");
+            return -1;
+        }
+        close(fd);
+#else
+        g_test_skip("userfaultfd is not supported");
+#endif
+    } else if (args->use_memfile) {
         memfile_path = g_strdup_printf("/%s/qemu-%d", tmpfs, getpid());
         memfile_opts = g_strdup_printf(
             "-object memory-backend-file,id=mem0,size=%s"
@@ -751,12 +808,6 @@  static int test_migrate_start(QTestState **from, QTestState **to,
         kvm_opts = ",dirty-ring-size=4096";
     }
 
-    if (!qtest_has_machine(machine_alias)) {
-        g_autofree char *msg = g_strdup_printf("machine %s not supported", machine_alias);
-        g_test_skip(msg);
-        return -1;
-    }
-
     machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
                                       QEMU_ENV_DST);
 
@@ -807,7 +858,7 @@  static int test_migrate_start(QTestState **from, QTestState **to,
      * Remove shmem file immediately to avoid memory leak in test failed case.
      * It's valid because QEMU has already opened this file
      */
-    if (args->use_memfile) {
+    if (args->use_memfile || args->use_shm_memfile) {
         unlink(memfile_path);
     }
 
@@ -1275,6 +1326,15 @@  static void test_postcopy(void)
     test_postcopy_common(&args);
 }
 
+static void test_postcopy_memfile(void)
+{
+    MigrateCommon args = {
+        .start.use_shm_memfile = true,
+    };
+
+    test_postcopy_common(&args);
+}
+
 static void test_postcopy_suspend(void)
 {
     MigrateCommon args = {
@@ -3441,6 +3501,7 @@  int main(int argc, char **argv)
 
     if (has_uffd) {
         migration_test_add("/migration/postcopy/plain", test_postcopy);
+        migration_test_add("/migration/postcopy/memfile", test_postcopy_memfile);
         migration_test_add("/migration/postcopy/recovery/plain",
                            test_postcopy_recovery);
         migration_test_add("/migration/postcopy/preempt/plain",