Message ID | xnv8vxwh1e.fsf@greed.delorie.com |
---|---|
State | New |
Headers | show |
Series | [v6] Allow for unpriviledged nested containers | expand |
On 3/28/22 23:53, DJ Delorie wrote: > > [changes since v5: whether or not to isolate the pid namespace is now a > test-specific "pidns" option, support_needs_proc() takes a "why" message > for future readers] > > If the build itself is run in a container, we may not be able to > fully set up a nested container for test-container testing. > Notably is the mounting of /proc, since it's critical that it > be mounted from within the same PID namespace as its users, and > thus cannot be bind mounted from outside the container like other > mounts. > > This patch defaults to using the parent's PID namespace instead of > creating a new one, as this is more likely to be allowed. > > If the test needs an isolated PID namespace, it should add the "pidns" > command to its init script. LGTM. Reviewed-by: Carlos O'Donell <carlos@redhat.com> > diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c > index f31f9956faa..378262504a5 100644 > --- a/elf/tst-pldd.c > +++ b/elf/tst-pldd.c > @@ -85,6 +85,8 @@ in_str_list (const char *libname, const char *const strlist[]) > static int > do_test (void) > { > + support_need_proc ("needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child"); > + OK. Nice use of a message to print. > /* Check if our subprocess can be debugged with ptrace. */ > { > int ptrace_scope = support_ptrace_scope (); > diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c > index d2ebf308ae7..3f6f76ea83b 100644 > --- a/nptl/tst-pthread-getattr.c > +++ b/nptl/tst-pthread-getattr.c > @@ -28,6 +28,8 @@ > #include <unistd.h> > #include <inttypes.h> > > +#include <support/support.h> > + > /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes > returned as unlimited when it is not, which may cause this test to fail. > There is also the other case where RLIMIT_STACK is intentionally set as > @@ -153,6 +155,8 @@ check_stack_top (void) > static int > do_test (void) > { > + support_need_proc ("Reads /proc/self/maps to get stack size."); OK. > + > pagesize = sysconf (_SC_PAGESIZE); > return check_stack_top (); > } > diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c > index fb3b94a1fab..7df0ca740b6 100644 > --- a/nss/tst-reload2.c > +++ b/nss/tst-reload2.c > @@ -95,6 +95,8 @@ do_test (void) > char buf1[PATH_MAX]; > char buf2[PATH_MAX]; > > + support_need_proc ("Our xmkdirp fails if we can't map our uid, which requires /proc."); > + OK. > sprintf (buf1, "/subdir%s", support_slibdir_prefix); > xmkdirp (buf1, 0777); > > diff --git a/support/Makefile b/support/Makefile > index 5ddcb8d1581..f036a813048 100644 > --- a/support/Makefile > +++ b/support/Makefile > @@ -64,6 +64,7 @@ libsupport-routines = \ > support_format_netent \ > support_isolate_in_subprocess \ > support_mutex_pi_monotonic \ > + support_need_proc \ OK. > support_path_support_time64 \ > support_process_state \ > support_ptrace \ > diff --git a/support/support.h b/support/support.h > index 73b9fc48f01..d20051da4d4 100644 > --- a/support/support.h > +++ b/support/support.h > @@ -91,6 +91,11 @@ char *support_quote_string (const char *); > regular file open for writing, and initially empty. */ > int support_descriptor_supports_holes (int fd); > > +/* Predicates that a test requires a working /proc filesystem. This > + call will exit with UNSUPPORTED if /proc is not available, printing > + WHY_MSG as part of the diagnostic. */ > +void support_need_proc (const char *why_msg); OK. > + > /* Error-checking wrapper functions which terminate the process on > error. */ > > diff --git a/support/support_need_proc.c b/support/support_need_proc.c > new file mode 100644 > index 00000000000..9b4eab7539b > --- /dev/null > +++ b/support/support_need_proc.c > @@ -0,0 +1,35 @@ > +/* Indicate that a test requires a working /proc. > + Copyright (C) 2022 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <unistd.h> > +#include <support/check.h> > +#include <support/support.h> > + > +/* We test for /proc/self/maps since that's one of the files that one > + of our tests actually uses, but the general idea is if Linux's > + /proc/ (procfs) filesystem is mounted. If not, the process exits > + with an UNSUPPORTED result code. */ > + > +void > +support_need_proc (const char *why_msg) > +{ > +#ifdef __linux__ > + if (access ("/proc/self/maps", R_OK)) > + FAIL_UNSUPPORTED ("/proc is not available, %s", why_msg); > +#endif OK. Only does anything on linux (fixed from v5). > +} > diff --git a/support/test-container.c b/support/test-container.c > index 25e7f142193..c837c4d7580 100644 > --- a/support/test-container.c > +++ b/support/test-container.c > @@ -97,6 +97,7 @@ int verbose = 0; > * mytest.root/mytest.script has a list of "commands" to run: > syntax: > # comment > + pidns <comment> Perfect. This is a great way to avoid bitrot of this code while still making it functional. > su > mv FILE FILE > cp FILE FILE > @@ -122,6 +123,8 @@ int verbose = 0; > > details: > - '#': A comment. > + - 'pidns': Require a separate PID namespace, prints comment if it can't > + (default is a shared pid namespace) OK. > - 'su': Enables running test as root in the container. > - 'mv': A minimal move files command. > - 'cp': A minimal copy files command. > @@ -148,7 +151,7 @@ int verbose = 0; > * Simple, easy to review code (i.e. prefer simple naive code over > complex efficient code) > > - * The current implementation ist parallel-make-safe, but only in > + * The current implementation is parallel-make-safe, but only in > that it uses a lock to prevent parallel access to the testroot. */ > > > @@ -227,11 +230,37 @@ concat (const char *str, ...) > return bufs[n]; > } > > +/* Like the above, but put spaces between words. Caller frees. */ > +static char * > +concat_words (char **words, int num_words) > +{ > + int len = 0; > + int i; > + char *rv, *p; > + > + for (i = 0; i < num_words; i ++) > + { > + len += strlen (words[i]); > + len ++; > + } > + > + p = rv = (char *) xmalloc (len); > + > + for (i = 0; i < num_words; i ++) > + { > + if (i > 0) > + p = stpcpy (p, " "); > + p = stpcpy (p, words[i]); > + } > + > + return rv; > +} > + > /* Try to mount SRC onto DEST. */ > static void > trymount (const char *src, const char *dest) > { > - if (mount (src, dest, "", MS_BIND, NULL) < 0) > + if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0) > FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest); > } > > @@ -726,6 +755,9 @@ main (int argc, char **argv) > gid_t original_gid; > /* If set, the test runs as root instead of the user running the testsuite. */ > int be_su = 0; > + int require_pidns = 0; > + const char *pidns_comment = NULL; > + int do_proc_mounts = 0; > int UMAP; > int GMAP; > /* Used for "%lld %lld 1" so need not be large. */ > @@ -1011,6 +1043,12 @@ main (int argc, char **argv) > { > be_su = 1; > } > + else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0) > + { > + require_pidns = 1; > + if (nt > 1) > + pidns_comment = concat_words (the_words + 1, nt - 1); OK. > + } > else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0) > { > long int m; > @@ -1068,7 +1106,8 @@ main (int argc, char **argv) > > #ifdef CLONE_NEWNS > /* The unshare here gives us our own spaces and capabilities. */ > - if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0) > + if (unshare (CLONE_NEWUSER | CLONE_NEWNS > + | (require_pidns ? CLONE_NEWPID : 0)) < 0) > { > /* Older kernels may not support all the options, or security > policy may block this call. */ > @@ -1079,6 +1118,11 @@ main (int argc, char **argv) > check_for_unshare_hints (); > FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno)); > } > + /* We're about to exit anyway, it's "safe" to call unshare again > + just to see if the CLONE_NEWPID caused the error. */ > + else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0) > + FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno), > + pidns_comment ? pidns_comment : "required by test"); > else > FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno)); > } > @@ -1094,6 +1138,15 @@ main (int argc, char **argv) > trymount (support_srcdir_root, new_srcdir_path); > trymount (support_objdir_root, new_objdir_path); > > + /* It may not be possible to mount /proc directly. */ > + if (! require_pidns) > + { > + char *new_proc = concat (new_root_path, "/proc", NULL); > + xmkdirp (new_proc, 0755); > + trymount ("/proc", new_proc); > + do_proc_mounts = 1; > + } > + > xmkdirp (concat (new_root_path, "/dev", NULL), 0755); > devmount (new_root_path, "null"); > devmount (new_root_path, "zero"); > @@ -1163,42 +1216,60 @@ main (int argc, char **argv) > > maybe_xmkdir ("/tmp", 0755); > > - /* Now that we're pid 1 (effectively "root") we can mount /proc */ > - maybe_xmkdir ("/proc", 0777); > - if (mount ("proc", "/proc", "proc", 0, NULL) < 0) > - FAIL_EXIT1 ("Unable to mount /proc: "); > - > - /* We map our original UID to the same UID in the container so we > - can own our own files normally. */ > - UMAP = open ("/proc/self/uid_map", O_WRONLY); > - if (UMAP < 0) > - FAIL_EXIT1 ("can't write to /proc/self/uid_map\n"); > - > - sprintf (tmp, "%lld %lld 1\n", > - (long long) (be_su ? 0 : original_uid), (long long) original_uid); > - write (UMAP, tmp, strlen (tmp)); > - xclose (UMAP); > - > - /* We must disable setgroups () before we can map our groups, else we > - get EPERM. */ > - GMAP = open ("/proc/self/setgroups", O_WRONLY); > - if (GMAP >= 0) > + if (require_pidns) > { > - /* We support kernels old enough to not have this. */ > - write (GMAP, "deny\n", 5); > - xclose (GMAP); > + /* Now that we're pid 1 (effectively "root") we can mount /proc */ > + maybe_xmkdir ("/proc", 0777); > + if (mount ("proc", "/proc", "proc", 0, NULL) != 0) > + { > + /* This happens if we're trying to create a nested container, > + like if the build is running under podman, and we lack > + priviledges. > + > + Ideally we would WARN here, but that would just add noise to > + *every* test-container test, and the ones that care should > + have their own relevent diagnostics. > + > + FAIL_EXIT1 ("Unable to mount /proc: "); */ > + } > + else > + do_proc_mounts = 1; > } > > - /* We map our original GID to the same GID in the container so we > - can own our own files normally. */ > - GMAP = open ("/proc/self/gid_map", O_WRONLY); > - if (GMAP < 0) > - FAIL_EXIT1 ("can't write to /proc/self/gid_map\n"); > + if (do_proc_mounts) > + { > + /* We map our original UID to the same UID in the container so we > + can own our own files normally. */ > + UMAP = open ("/proc/self/uid_map", O_WRONLY); > + if (UMAP < 0) > + FAIL_EXIT1 ("can't write to /proc/self/uid_map\n"); > + > + sprintf (tmp, "%lld %lld 1\n", > + (long long) (be_su ? 0 : original_uid), (long long) original_uid); > + write (UMAP, tmp, strlen (tmp)); > + xclose (UMAP); > + > + /* We must disable setgroups () before we can map our groups, else we > + get EPERM. */ > + GMAP = open ("/proc/self/setgroups", O_WRONLY); > + if (GMAP >= 0) > + { > + /* We support kernels old enough to not have this. */ > + write (GMAP, "deny\n", 5); > + xclose (GMAP); > + } > > - sprintf (tmp, "%lld %lld 1\n", > - (long long) (be_su ? 0 : original_gid), (long long) original_gid); > - write (GMAP, tmp, strlen (tmp)); > - xclose (GMAP); > + /* We map our original GID to the same GID in the container so we > + can own our own files normally. */ > + GMAP = open ("/proc/self/gid_map", O_WRONLY); > + if (GMAP < 0) > + FAIL_EXIT1 ("can't write to /proc/self/gid_map\n"); > + > + sprintf (tmp, "%lld %lld 1\n", > + (long long) (be_su ? 0 : original_gid), (long long) original_gid); > + write (GMAP, tmp, strlen (tmp)); > + xclose (GMAP); > + } > > if (change_cwd) > { >
Thanks! Pushed.
On Mon, 4 Apr 2022, Carlos O'Donell via Libc-alpha wrote: > > @@ -726,6 +755,9 @@ main (int argc, char **argv) > > gid_t original_gid; > > /* If set, the test runs as root instead of the user running the testsuite. */ > > int be_su = 0; > > + int require_pidns = 0; > > + const char *pidns_comment = NULL; > > + int do_proc_mounts = 0; I'm seeing build failures for i686-gnu: test-container.c: In function 'main': test-container.c:759:15: error: variable 'pidns_comment' set but not used [-Werror=unused-but-set-variable] 759 | const char *pidns_comment = NULL; | ^~~~~~~~~~~~~ cc1: all warnings being treated as errors https://sourceware.org/pipermail/libc-testresults/2022q2/009529.html
Maybe we need a Hurd trybot? ;-) I see two options: #ifdef the variable, or mark it attribute-unused. Preferences? diff --git a/support/test-container.c b/support/test-container.c index c837c4d7580..79d0e052d1c 100644 --- a/support/test-container.c +++ b/support/test-container.c @@ -756,7 +756,9 @@ main (int argc, char **argv) /* If set, the test runs as root instead of the user running the testsuite. */ int be_su = 0; int require_pidns = 0; +#ifdef CLONE_NEWNS const char *pidns_comment = NULL; +#endif int do_proc_mounts = 0; int UMAP; int GMAP; @@ -1046,8 +1048,10 @@ main (int argc, char **argv) else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0) { require_pidns = 1; +#ifdef CLONE_NEWNS if (nt > 1) pidns_comment = concat_words (the_words + 1, nt - 1); +#endif } else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0) {
* DJ Delorie via Libc-alpha: > Maybe we need a Hurd trybot? ;-) > > I see two options: #ifdef the variable, or mark it attribute-unused. > Preferences? > > diff --git a/support/test-container.c b/support/test-container.c > index c837c4d7580..79d0e052d1c 100644 > --- a/support/test-container.c > +++ b/support/test-container.c > @@ -756,7 +756,9 @@ main (int argc, char **argv) > /* If set, the test runs as root instead of the user running the testsuite. */ > int be_su = 0; > int require_pidns = 0; > +#ifdef CLONE_NEWNS > const char *pidns_comment = NULL; > +#endif > int do_proc_mounts = 0; > int UMAP; > int GMAP; > @@ -1046,8 +1048,10 @@ main (int argc, char **argv) > else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0) > { > require_pidns = 1; > +#ifdef CLONE_NEWNS > if (nt > 1) > pidns_comment = concat_words (the_words + 1, nt - 1); > +#endif > } > else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0) > { This also needs: diff --git a/support/test-container.c b/support/test-container.c index 79d0e052d1..9dde784bf0 100644 --- a/support/test-container.c +++ b/support/test-container.c @@ -231,7 +231,7 @@ concat (const char *str, ...) } /* Like the above, but put spaces between words. Caller frees. */ -static char * +static inline char * concat_words (char **words, int num_words) { int len = 0; Or something like it. Thanks, Florian
diff --git a/elf/tst-pldd.c b/elf/tst-pldd.c index f31f9956faa..378262504a5 100644 --- a/elf/tst-pldd.c +++ b/elf/tst-pldd.c @@ -85,6 +85,8 @@ in_str_list (const char *libname, const char *const strlist[]) static int do_test (void) { + support_need_proc ("needs /proc/sys/kernel/yama/ptrace_scope and /proc/$child"); + /* Check if our subprocess can be debugged with ptrace. */ { int ptrace_scope = support_ptrace_scope (); diff --git a/nptl/tst-pthread-getattr.c b/nptl/tst-pthread-getattr.c index d2ebf308ae7..3f6f76ea83b 100644 --- a/nptl/tst-pthread-getattr.c +++ b/nptl/tst-pthread-getattr.c @@ -28,6 +28,8 @@ #include <unistd.h> #include <inttypes.h> +#include <support/support.h> + /* There is an obscure bug in the kernel due to which RLIMIT_STACK is sometimes returned as unlimited when it is not, which may cause this test to fail. There is also the other case where RLIMIT_STACK is intentionally set as @@ -153,6 +155,8 @@ check_stack_top (void) static int do_test (void) { + support_need_proc ("Reads /proc/self/maps to get stack size."); + pagesize = sysconf (_SC_PAGESIZE); return check_stack_top (); } diff --git a/nss/tst-reload2.c b/nss/tst-reload2.c index fb3b94a1fab..7df0ca740b6 100644 --- a/nss/tst-reload2.c +++ b/nss/tst-reload2.c @@ -95,6 +95,8 @@ do_test (void) char buf1[PATH_MAX]; char buf2[PATH_MAX]; + support_need_proc ("Our xmkdirp fails if we can't map our uid, which requires /proc."); + sprintf (buf1, "/subdir%s", support_slibdir_prefix); xmkdirp (buf1, 0777); diff --git a/support/Makefile b/support/Makefile index 5ddcb8d1581..f036a813048 100644 --- a/support/Makefile +++ b/support/Makefile @@ -64,6 +64,7 @@ libsupport-routines = \ support_format_netent \ support_isolate_in_subprocess \ support_mutex_pi_monotonic \ + support_need_proc \ support_path_support_time64 \ support_process_state \ support_ptrace \ diff --git a/support/support.h b/support/support.h index 73b9fc48f01..d20051da4d4 100644 --- a/support/support.h +++ b/support/support.h @@ -91,6 +91,11 @@ char *support_quote_string (const char *); regular file open for writing, and initially empty. */ int support_descriptor_supports_holes (int fd); +/* Predicates that a test requires a working /proc filesystem. This + call will exit with UNSUPPORTED if /proc is not available, printing + WHY_MSG as part of the diagnostic. */ +void support_need_proc (const char *why_msg); + /* Error-checking wrapper functions which terminate the process on error. */ diff --git a/support/support_need_proc.c b/support/support_need_proc.c new file mode 100644 index 00000000000..9b4eab7539b --- /dev/null +++ b/support/support_need_proc.c @@ -0,0 +1,35 @@ +/* Indicate that a test requires a working /proc. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <support/check.h> +#include <support/support.h> + +/* We test for /proc/self/maps since that's one of the files that one + of our tests actually uses, but the general idea is if Linux's + /proc/ (procfs) filesystem is mounted. If not, the process exits + with an UNSUPPORTED result code. */ + +void +support_need_proc (const char *why_msg) +{ +#ifdef __linux__ + if (access ("/proc/self/maps", R_OK)) + FAIL_UNSUPPORTED ("/proc is not available, %s", why_msg); +#endif +} diff --git a/support/test-container.c b/support/test-container.c index 25e7f142193..c837c4d7580 100644 --- a/support/test-container.c +++ b/support/test-container.c @@ -97,6 +97,7 @@ int verbose = 0; * mytest.root/mytest.script has a list of "commands" to run: syntax: # comment + pidns <comment> su mv FILE FILE cp FILE FILE @@ -122,6 +123,8 @@ int verbose = 0; details: - '#': A comment. + - 'pidns': Require a separate PID namespace, prints comment if it can't + (default is a shared pid namespace) - 'su': Enables running test as root in the container. - 'mv': A minimal move files command. - 'cp': A minimal copy files command. @@ -148,7 +151,7 @@ int verbose = 0; * Simple, easy to review code (i.e. prefer simple naive code over complex efficient code) - * The current implementation ist parallel-make-safe, but only in + * The current implementation is parallel-make-safe, but only in that it uses a lock to prevent parallel access to the testroot. */ @@ -227,11 +230,37 @@ concat (const char *str, ...) return bufs[n]; } +/* Like the above, but put spaces between words. Caller frees. */ +static char * +concat_words (char **words, int num_words) +{ + int len = 0; + int i; + char *rv, *p; + + for (i = 0; i < num_words; i ++) + { + len += strlen (words[i]); + len ++; + } + + p = rv = (char *) xmalloc (len); + + for (i = 0; i < num_words; i ++) + { + if (i > 0) + p = stpcpy (p, " "); + p = stpcpy (p, words[i]); + } + + return rv; +} + /* Try to mount SRC onto DEST. */ static void trymount (const char *src, const char *dest) { - if (mount (src, dest, "", MS_BIND, NULL) < 0) + if (mount (src, dest, "", MS_BIND | MS_REC, NULL) < 0) FAIL_EXIT1 ("can't mount %s onto %s\n", src, dest); } @@ -726,6 +755,9 @@ main (int argc, char **argv) gid_t original_gid; /* If set, the test runs as root instead of the user running the testsuite. */ int be_su = 0; + int require_pidns = 0; + const char *pidns_comment = NULL; + int do_proc_mounts = 0; int UMAP; int GMAP; /* Used for "%lld %lld 1" so need not be large. */ @@ -1011,6 +1043,12 @@ main (int argc, char **argv) { be_su = 1; } + else if (nt >= 1 && strcmp (the_words[0], "pidns") == 0) + { + require_pidns = 1; + if (nt > 1) + pidns_comment = concat_words (the_words + 1, nt - 1); + } else if (nt == 3 && strcmp (the_words[0], "mkdirp") == 0) { long int m; @@ -1068,7 +1106,8 @@ main (int argc, char **argv) #ifdef CLONE_NEWNS /* The unshare here gives us our own spaces and capabilities. */ - if (unshare (CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS) < 0) + if (unshare (CLONE_NEWUSER | CLONE_NEWNS + | (require_pidns ? CLONE_NEWPID : 0)) < 0) { /* Older kernels may not support all the options, or security policy may block this call. */ @@ -1079,6 +1118,11 @@ main (int argc, char **argv) check_for_unshare_hints (); FAIL_UNSUPPORTED ("unable to unshare user/fs: %s", strerror (saved_errno)); } + /* We're about to exit anyway, it's "safe" to call unshare again + just to see if the CLONE_NEWPID caused the error. */ + else if (require_pidns && unshare (CLONE_NEWUSER | CLONE_NEWNS) >= 0) + FAIL_EXIT1 ("unable to unshare pid ns: %s : %s", strerror (errno), + pidns_comment ? pidns_comment : "required by test"); else FAIL_EXIT1 ("unable to unshare user/fs: %s", strerror (errno)); } @@ -1094,6 +1138,15 @@ main (int argc, char **argv) trymount (support_srcdir_root, new_srcdir_path); trymount (support_objdir_root, new_objdir_path); + /* It may not be possible to mount /proc directly. */ + if (! require_pidns) + { + char *new_proc = concat (new_root_path, "/proc", NULL); + xmkdirp (new_proc, 0755); + trymount ("/proc", new_proc); + do_proc_mounts = 1; + } + xmkdirp (concat (new_root_path, "/dev", NULL), 0755); devmount (new_root_path, "null"); devmount (new_root_path, "zero"); @@ -1163,42 +1216,60 @@ main (int argc, char **argv) maybe_xmkdir ("/tmp", 0755); - /* Now that we're pid 1 (effectively "root") we can mount /proc */ - maybe_xmkdir ("/proc", 0777); - if (mount ("proc", "/proc", "proc", 0, NULL) < 0) - FAIL_EXIT1 ("Unable to mount /proc: "); - - /* We map our original UID to the same UID in the container so we - can own our own files normally. */ - UMAP = open ("/proc/self/uid_map", O_WRONLY); - if (UMAP < 0) - FAIL_EXIT1 ("can't write to /proc/self/uid_map\n"); - - sprintf (tmp, "%lld %lld 1\n", - (long long) (be_su ? 0 : original_uid), (long long) original_uid); - write (UMAP, tmp, strlen (tmp)); - xclose (UMAP); - - /* We must disable setgroups () before we can map our groups, else we - get EPERM. */ - GMAP = open ("/proc/self/setgroups", O_WRONLY); - if (GMAP >= 0) + if (require_pidns) { - /* We support kernels old enough to not have this. */ - write (GMAP, "deny\n", 5); - xclose (GMAP); + /* Now that we're pid 1 (effectively "root") we can mount /proc */ + maybe_xmkdir ("/proc", 0777); + if (mount ("proc", "/proc", "proc", 0, NULL) != 0) + { + /* This happens if we're trying to create a nested container, + like if the build is running under podman, and we lack + priviledges. + + Ideally we would WARN here, but that would just add noise to + *every* test-container test, and the ones that care should + have their own relevent diagnostics. + + FAIL_EXIT1 ("Unable to mount /proc: "); */ + } + else + do_proc_mounts = 1; } - /* We map our original GID to the same GID in the container so we - can own our own files normally. */ - GMAP = open ("/proc/self/gid_map", O_WRONLY); - if (GMAP < 0) - FAIL_EXIT1 ("can't write to /proc/self/gid_map\n"); + if (do_proc_mounts) + { + /* We map our original UID to the same UID in the container so we + can own our own files normally. */ + UMAP = open ("/proc/self/uid_map", O_WRONLY); + if (UMAP < 0) + FAIL_EXIT1 ("can't write to /proc/self/uid_map\n"); + + sprintf (tmp, "%lld %lld 1\n", + (long long) (be_su ? 0 : original_uid), (long long) original_uid); + write (UMAP, tmp, strlen (tmp)); + xclose (UMAP); + + /* We must disable setgroups () before we can map our groups, else we + get EPERM. */ + GMAP = open ("/proc/self/setgroups", O_WRONLY); + if (GMAP >= 0) + { + /* We support kernels old enough to not have this. */ + write (GMAP, "deny\n", 5); + xclose (GMAP); + } - sprintf (tmp, "%lld %lld 1\n", - (long long) (be_su ? 0 : original_gid), (long long) original_gid); - write (GMAP, tmp, strlen (tmp)); - xclose (GMAP); + /* We map our original GID to the same GID in the container so we + can own our own files normally. */ + GMAP = open ("/proc/self/gid_map", O_WRONLY); + if (GMAP < 0) + FAIL_EXIT1 ("can't write to /proc/self/gid_map\n"); + + sprintf (tmp, "%lld %lld 1\n", + (long long) (be_su ? 0 : original_gid), (long long) original_gid); + write (GMAP, tmp, strlen (tmp)); + xclose (GMAP); + } if (change_cwd) {