@@ -2380,6 +2380,15 @@ bool migrate_postcopy_blocktime(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
}
+bool migrate_postcopy_uffd_usermode_only(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_UFFD_USERMODE_ONLY];
+}
+
bool migrate_use_compression(void)
{
MigrationState *s;
@@ -358,6 +358,7 @@ int migrate_decompress_threads(void);
bool migrate_use_events(void);
bool migrate_postcopy_blocktime(void);
bool migrate_background_snapshot(void);
+bool migrate_postcopy_uffd_usermode_only(void);
/* Sending on the return path - generic and then for each message type */
void migrate_send_rp_shut(MigrationIncomingState *mis,
@@ -206,9 +206,14 @@ static bool receive_ufd_features(uint64_t *features)
struct uffdio_api api_struct = {0};
int ufd;
bool ret = true;
+ int flags;
+
+ flags = O_CLOEXEC;
+ if (migrate_postcopy_uffd_usermode_only())
+ flags |= UFFD_USER_MODE_ONLY;
/* if we are here __NR_userfaultfd should exists */
- ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ ufd = syscall(__NR_userfaultfd, flags);
if (ufd == -1) {
error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
strerror(errno));
@@ -352,13 +357,18 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
struct uffdio_range range_struct;
uint64_t feature_mask;
Error *local_err = NULL;
+ int flags;
if (qemu_target_page_size() > pagesize) {
error_report("Target page size bigger than host page size");
goto out;
}
- ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ flags = O_CLOEXEC;
+ if (migrate_postcopy_uffd_usermode_only())
+ flags |= UFFD_USER_MODE_ONLY;
+
+ ufd = syscall(__NR_userfaultfd, flags);
if (ufd == -1) {
error_report("%s: userfaultfd not available: %s", __func__,
strerror(errno));
@@ -1064,8 +1074,14 @@ retry:
int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
+ int flags;
+
+ flags = O_CLOEXEC | O_NONBLOCK;
+ if (migrate_postcopy_uffd_usermode_only())
+ flags |= UFFD_USER_MODE_ONLY;
+
/* Open the fd for the kernel to give us userfaults */
- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ mis->userfault_fd = syscall(__NR_userfaultfd, flags);
if (mis->userfault_fd == -1) {
error_report("%s: Failed to open userfault fd: %s", __func__,
strerror(errno));
@@ -452,6 +452,11 @@
# procedure starts. The VM RAM is saved with running VM.
# (since 6.0)
#
+# @postcopy-uffd-usermode-only: If enabled, It allows unprivileged users to use
+# userfaultfd but with the restriction that page
+# faults from only user mode can be handled.
+# (since 6.2.0)
+#
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
@@ -459,7 +464,8 @@
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
'block', 'return-path', 'pause-before-switchover', 'multifd',
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
- 'x-ignore-shared', 'validate-uuid', 'background-snapshot'] }
+ 'x-ignore-shared', 'validate-uuid', 'background-snapshot',
+ 'postcopy-uffd-usermode-only'] }
##
# @MigrationCapabilityStatus:
The default value of unprivileged_userfaultfd sysctl knob was changed to 0 since kernel v5.11 by commit d0d4730a: userfaultfd: add user-mode only option to unprivileged_userfaultfd sysctl knob. In this mode, An unprivileged user (without SYS_CAP_PTRACE capability) must pass UFFD_USER_MODE_ONLY to userfaultd or the API will fail with EPERM. So add a capability to pass UFFD_USER_MODE_ONLY to support it. Signed-off-by: Lin Ma <lma@suse.com> --- migration/migration.c | 9 +++++++++ migration/migration.h | 1 + migration/postcopy-ram.c | 22 +++++++++++++++++++--- qapi/migration.json | 8 +++++++- 4 files changed, 36 insertions(+), 4 deletions(-)