diff mbox series

qemu_init: increase NOFILE soft limit on POSIX

Message ID 20231212143250.677668-1-f.ebner@proxmox.com
State New
Headers show
Series qemu_init: increase NOFILE soft limit on POSIX | expand

Commit Message

Fiona Ebner Dec. 12, 2023, 2:32 p.m. UTC
In many configurations, e.g. multiple vNICs with multiple queues or
with many Ceph OSDs, the default soft limit of 1024 is not enough.
QEMU is supposed to work fine with file descriptors >= 1024 and does
not use select() on POSIX. Bump the soft limit to the allowed hard
limit to avoid issues with the aforementioned configurations.

Of course the limit could be raised from the outside, but the man page
of systemd.exec states about 'LimitNOFILE=':

> Don't use.
> [...]
> Typically applications should increase their soft limit to the hard
> limit on their own, if they are OK with working with file
> descriptors above 1023,

Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 include/sysemu/os-posix.h |  1 +
 include/sysemu/os-win32.h |  5 +++++
 os-posix.c                | 18 ++++++++++++++++++
 system/vl.c               |  2 ++
 4 files changed, 26 insertions(+)

Comments

Daniel P. Berrangé Dec. 15, 2023, 11:41 a.m. UTC | #1
On Tue, Dec 12, 2023 at 03:32:50PM +0100, Fiona Ebner wrote:
> In many configurations, e.g. multiple vNICs with multiple queues or
> with many Ceph OSDs, the default soft limit of 1024 is not enough.
> QEMU is supposed to work fine with file descriptors >= 1024 and does
> not use select() on POSIX. Bump the soft limit to the allowed hard
> limit to avoid issues with the aforementioned configurations.
> 
> Of course the limit could be raised from the outside, but the man page
> of systemd.exec states about 'LimitNOFILE=':
> 
> > Don't use.
> > [...]
> > Typically applications should increase their soft limit to the hard
> > limit on their own, if they are OK with working with file
> > descriptors above 1023,
> 
> Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  include/sysemu/os-posix.h |  1 +
>  include/sysemu/os-win32.h |  5 +++++
>  os-posix.c                | 18 ++++++++++++++++++
>  system/vl.c               |  2 ++
>  4 files changed, 26 insertions(+)
> 
> diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
> index dff32ae185..b881ac6c6f 100644
> --- a/include/sysemu/os-posix.h
> +++ b/include/sysemu/os-posix.h
> @@ -51,6 +51,7 @@ bool is_daemonized(void);
>  void os_daemonize(void);
>  bool os_set_runas(const char *user_id);
>  void os_set_chroot(const char *path);
> +void os_setup_limits(void);
>  void os_setup_post(void);
>  int os_mlock(void);
>  
> diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
> index 1047d260cb..106f155037 100644
> --- a/include/sysemu/os-win32.h
> +++ b/include/sysemu/os-win32.h
> @@ -128,6 +128,11 @@ static inline int os_mlock(void)
>      return -ENOSYS;
>  }
>  
> +void os_setup_limits(void)
> +{
> +    return;
> +}
> +
>  #define fsync _commit
>  
>  #if !defined(lseek)
> diff --git a/os-posix.c b/os-posix.c
> index 52ef6990ff..eb55473140 100644
> --- a/os-posix.c
> +++ b/os-posix.c
> @@ -24,6 +24,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include <sys/resource.h>
>  #include <sys/wait.h>
>  #include <pwd.h>
>  #include <grp.h>
> @@ -256,6 +257,23 @@ void os_daemonize(void)
>      }
>  }
>  
> +void os_setup_limits(void)
> +{
> +    struct rlimit nofile;
> +
> +    if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) {
> +        warn_report("unable to query NOFILE limit: %s", strerror(errno));
> +        return;
> +    }
> +

I'd suggest to return here, if cur == max, to avoid the
redundant setrlimit call. This will avoid a needless
warning message if someone used a strict seccomp filter
to block setrlimit, and had raisd NOFILE before QEMU was
execd

> +    nofile.rlim_cur = nofile.rlim_max;
> +
> +    if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) {
> +        warn_report("unable to set NOFILE limit: %s", strerror(errno));
> +        return;
> +    }
> +}
> +
>  void os_setup_post(void)
>  {
>      int fd = 0;
> diff --git a/system/vl.c b/system/vl.c
> index 2bcd9efb9a..6f42f37200 100644
> --- a/system/vl.c
> +++ b/system/vl.c
> @@ -2774,6 +2774,8 @@ void qemu_init(int argc, char **argv)
>      error_init(argv[0]);
>      qemu_init_exec_dir(argv[0]);
>  
> +    os_setup_limits();
> +
>      qemu_init_arch_modules();
>  
>      qemu_init_subsystems();
> -- 
> 2.39.2
> 
> 
> 

With regards,
Daniel
diff mbox series

Patch

diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index dff32ae185..b881ac6c6f 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -51,6 +51,7 @@  bool is_daemonized(void);
 void os_daemonize(void);
 bool os_set_runas(const char *user_id);
 void os_set_chroot(const char *path);
+void os_setup_limits(void);
 void os_setup_post(void);
 int os_mlock(void);
 
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 1047d260cb..106f155037 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -128,6 +128,11 @@  static inline int os_mlock(void)
     return -ENOSYS;
 }
 
+void os_setup_limits(void)
+{
+    return;
+}
+
 #define fsync _commit
 
 #if !defined(lseek)
diff --git a/os-posix.c b/os-posix.c
index 52ef6990ff..eb55473140 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -24,6 +24,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include <sys/resource.h>
 #include <sys/wait.h>
 #include <pwd.h>
 #include <grp.h>
@@ -256,6 +257,23 @@  void os_daemonize(void)
     }
 }
 
+void os_setup_limits(void)
+{
+    struct rlimit nofile;
+
+    if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+        warn_report("unable to query NOFILE limit: %s", strerror(errno));
+        return;
+    }
+
+    nofile.rlim_cur = nofile.rlim_max;
+
+    if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) {
+        warn_report("unable to set NOFILE limit: %s", strerror(errno));
+        return;
+    }
+}
+
 void os_setup_post(void)
 {
     int fd = 0;
diff --git a/system/vl.c b/system/vl.c
index 2bcd9efb9a..6f42f37200 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2774,6 +2774,8 @@  void qemu_init(int argc, char **argv)
     error_init(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
+    os_setup_limits();
+
     qemu_init_arch_modules();
 
     qemu_init_subsystems();