Message ID | 20220205212402.GA5233@altlinux.org |
---|---|
State | New |
Headers | show |
Series | linux: fix accuracy of get_nprocs and get_nprocs_conf [BZ #28865] | expand |
On 05/02/2022 18:24, Dmitry V. Levin wrote: > get_nprocs() and get_nprocs_conf() use various methods to obtain an > accurate number of processors. Re-introduce __get_nprocs_sched() as > a source of information, and fix the order in which these methods are > used to return the most accurate information. The primary source of > information used in both functions remains unchanged. > > This also changes __get_nprocs_sched() error return value from 2 to 0, > but all its users are already prepared to handle that. > > Old behavior: > get_nprocs: > /sys/devices/system/cpu/online -> /proc/stat -> 2 > get_nprocs_conf: > /sys/devices/system/cpu/ -> /proc/stat -> 2 > > New behavior: > get_nprocs: > /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 > get_nprocs_conf: > /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 > > Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") > Closes: BZ #28865 I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because it introduced regression on some monitoring tools [3]. In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use sched_getaffinity because since some container environment returns a synthetic mask that might break some programs. Also, sched_getaffinity returns a 'per-process' mask instead of system-wide as we discussed in previous threads. It should be ok to get adjusting internal tuning (as for malloc). [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645 [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310 [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5 [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624 > --- > sysdeps/unix/sysv/linux/getsysstats.c | 101 ++++++++++++++++++-------- > 1 file changed, 70 insertions(+), 31 deletions(-) > > diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c > index c98c8ce3d4..e1ed96070c 100644 > --- a/sysdeps/unix/sysv/linux/getsysstats.c > +++ b/sysdeps/unix/sysv/linux/getsysstats.c > @@ -50,9 +50,8 @@ __get_nprocs_sched (void) > is an arbitrary values assuming such systems should be rare and there > is no offline cpus. */ > return max_num_cpus; > - /* Some other error. 2 is conservative (not a uniprocessor system, so > - atomics are needed). */ > - return 2; > + /* Some other error. */ > + return 0; > } > > static char * > @@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re, > } > > static int > -get_nproc_stat (char *buffer, size_t buffer_size) > +get_nproc_stat (void) > { > + enum { buffer_size = 1024 }; > + char buffer[buffer_size]; > char *buffer_end = buffer + buffer_size; > char *cp = buffer_end; > char *re = buffer_end; > - > - /* Default to an SMP system in case we cannot obtain an accurate > - number. */ > - int result = 2; > + int result = 0; > > const int flags = O_RDONLY | O_CLOEXEC; > int fd = __open_nocancel ("/proc/stat", flags); > if (fd != -1) > { > - result = 0; > - > char *l; > while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL) > /* The current format of /proc/stat has all the cpu* entries > @@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size) > return result; > } > > -int > -__get_nprocs (void) > +static int > +get_nprocs_cpu_online (void) > { > enum { buffer_size = 1024 }; > char buffer[buffer_size]; > @@ -179,7 +175,8 @@ __get_nprocs (void) > } > } > > - result += m - n + 1; > + if (m >= n) > + result += m - n + 1; > > l = endp; > if (l < re && *l == ',') > @@ -188,28 +185,18 @@ __get_nprocs (void) > while (l < re && *l != '\n'); > > __close_nocancel_nostatus (fd); > - > - if (result > 0) > - return result; > } > > - return get_nproc_stat (buffer, buffer_size); > + return result; > } > -libc_hidden_def (__get_nprocs) > -weak_alias (__get_nprocs, get_nprocs) > - > > -/* On some architectures it is possible to distinguish between configured > - and active cpus. */ > -int > -__get_nprocs_conf (void) > +static int > +get_nprocs_cpu (void) > { > - /* Try to use the sysfs filesystem. It has actual information about > - online processors. */ > + int count = 0; > DIR *dir = __opendir ("/sys/devices/system/cpu"); > if (dir != NULL) > { > - int count = 0; > struct dirent64 *d; > > while ((d = __readdir64 (dir)) != NULL) > @@ -224,12 +211,64 @@ __get_nprocs_conf (void) > > __closedir (dir); > > - return count; > } > + return count; > +} > > - enum { buffer_size = 1024 }; > - char buffer[buffer_size]; > - return get_nproc_stat (buffer, buffer_size); > +int > +__get_nprocs (void) > +{ > + int result; > + > + /* Try /sys/devices/system/cpu/online first. */ > + result = get_nprocs_cpu_online (); > + if (result) > + return result; > + > + /* Try sched_getaffinity(2). */ > + result = __get_nprocs_sched (); > + if (result) > + return result; > + > + /* Try /proc/stat. */ > + result = get_nproc_stat (); > + if (result) > + return result; > + > + /* We failed to obtain an accurate number. Be conservative: return > + the smallest number meaning that this is not a uniprocessor system, > + so atomics are needed. */ > + return 2; > +} > +libc_hidden_def (__get_nprocs) > +weak_alias (__get_nprocs, get_nprocs) > + > +/* On some architectures it is possible to distinguish between configured > + and active cpus. */ > +int > +__get_nprocs_conf (void) > +{ > + int result; > + > + /* Try /sys/devices/system/cpu/ first. */ > + result = get_nprocs_cpu (); > + if (result) > + return result; > + > + /* Try /proc/stat. */ > + result = get_nproc_stat (); > + if (result) > + return result; > + > + /* Try sched_getaffinity(2). */ > + result = __get_nprocs_sched (); > + if (result) > + return result; > + > + /* We failed to obtain an accurate number. Be conservative: return > + the smallest number meaning that this is not a uniprocessor system, > + so atomics are needed. */ > + return 2; > } > libc_hidden_def (__get_nprocs_conf) > weak_alias (__get_nprocs_conf, get_nprocs_conf) >
* Adhemerval Zanella via Libc-alpha: > On 05/02/2022 18:24, Dmitry V. Levin wrote: >> get_nprocs() and get_nprocs_conf() use various methods to obtain an >> accurate number of processors. Re-introduce __get_nprocs_sched() as >> a source of information, and fix the order in which these methods are >> used to return the most accurate information. The primary source of >> information used in both functions remains unchanged. >> >> This also changes __get_nprocs_sched() error return value from 2 to 0, >> but all its users are already prepared to handle that. >> >> Old behavior: >> get_nprocs: >> /sys/devices/system/cpu/online -> /proc/stat -> 2 >> get_nprocs_conf: >> /sys/devices/system/cpu/ -> /proc/stat -> 2 >> >> New behavior: >> get_nprocs: >> /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 >> get_nprocs_conf: >> /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 >> >> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") >> Closes: BZ #28865 > > I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs > to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because > it introduced regression on some monitoring tools [3]. But I think using sched_getaffinity as a fallback when /sys and /proc are not available makes somse. It's different form what we did temporarily (sched_getaffinity first). Thanks, Florian
Hi, On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote: > On 05/02/2022 18:24, Dmitry V. Levin wrote: > > get_nprocs() and get_nprocs_conf() use various methods to obtain an > > accurate number of processors. Re-introduce __get_nprocs_sched() as > > a source of information, and fix the order in which these methods are > > used to return the most accurate information. The primary source of > > information used in both functions remains unchanged. > > > > This also changes __get_nprocs_sched() error return value from 2 to 0, > > but all its users are already prepared to handle that. > > > > Old behavior: > > get_nprocs: > > /sys/devices/system/cpu/online -> /proc/stat -> 2 > > get_nprocs_conf: > > /sys/devices/system/cpu/ -> /proc/stat -> 2 > > > > New behavior: > > get_nprocs: > > /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 > > get_nprocs_conf: > > /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 > > > > Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") > > Closes: BZ #28865 > > I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs > to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because > it introduced regression on some monitoring tools [3]. > > In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use > sched_getaffinity because since some container environment returns a synthetic > mask that might break some programs. Also, sched_getaffinity returns a > 'per-process' mask instead of system-wide as we discussed in previous threads. > It should be ok to get adjusting internal tuning (as for malloc). > > [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645 > [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310 > [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5 > [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624 Is there any realistic case when 2 is a more accurate estimation for the number of processors than sched_getaffinity? I suppose there are no such cases. Also, /sys is consulted first anyway. I wish I saw commit 342298278e earlier to raise objections before it was committed. Please note that BZ #28865 is a real regression we had to patch, this means glibc must behave properly in that environment without any additional tuning. I suggest to install this fix and see what could be done later in an unlikely case anything else breaks.
On 07/02/2022 08:44, Florian Weimer wrote: > * Adhemerval Zanella via Libc-alpha: > >> On 05/02/2022 18:24, Dmitry V. Levin wrote: >>> get_nprocs() and get_nprocs_conf() use various methods to obtain an >>> accurate number of processors. Re-introduce __get_nprocs_sched() as >>> a source of information, and fix the order in which these methods are >>> used to return the most accurate information. The primary source of >>> information used in both functions remains unchanged. >>> >>> This also changes __get_nprocs_sched() error return value from 2 to 0, >>> but all its users are already prepared to handle that. >>> >>> Old behavior: >>> get_nprocs: >>> /sys/devices/system/cpu/online -> /proc/stat -> 2 >>> get_nprocs_conf: >>> /sys/devices/system/cpu/ -> /proc/stat -> 2 >>> >>> New behavior: >>> get_nprocs: >>> /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 >>> get_nprocs_conf: >>> /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 >>> >>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") >>> Closes: BZ #28865 >> >> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs >> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because >> it introduced regression on some monitoring tools [3]. > > But I think using sched_getaffinity as a fallback when /sys and /proc > are not available makes somse. It's different form what we did > temporarily (sched_getaffinity first). My concern is we start to see BZ#27645 again on environments that filter out sysfs and provide a synthetic sched_getaffinity.
* Adhemerval Zanella: >> But I think using sched_getaffinity as a fallback when /sys and /proc >> are not available makes somse. It's different form what we did >> temporarily (sched_getaffinity first). > > My concern is we start to see BZ#27645 again on environments that filter > out sysfs and provide a synthetic sched_getaffinity. Should we move the sched_getaffinity fallback after /proc, then? Thanks, Florian
On 07/02/2022 08:51, Dmitry V. Levin wrote: > Hi, > > On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote: >> On 05/02/2022 18:24, Dmitry V. Levin wrote: >>> get_nprocs() and get_nprocs_conf() use various methods to obtain an >>> accurate number of processors. Re-introduce __get_nprocs_sched() as >>> a source of information, and fix the order in which these methods are >>> used to return the most accurate information. The primary source of >>> information used in both functions remains unchanged. >>> >>> This also changes __get_nprocs_sched() error return value from 2 to 0, >>> but all its users are already prepared to handle that. >>> >>> Old behavior: >>> get_nprocs: >>> /sys/devices/system/cpu/online -> /proc/stat -> 2 >>> get_nprocs_conf: >>> /sys/devices/system/cpu/ -> /proc/stat -> 2 >>> >>> New behavior: >>> get_nprocs: >>> /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 >>> get_nprocs_conf: >>> /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 >>> >>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") >>> Closes: BZ #28865 >> >> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs >> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because >> it introduced regression on some monitoring tools [3]. >> >> In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use >> sched_getaffinity because since some container environment returns a synthetic >> mask that might break some programs. Also, sched_getaffinity returns a >> 'per-process' mask instead of system-wide as we discussed in previous threads. >> It should be ok to get adjusting internal tuning (as for malloc). >> >> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645 >> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310 >> [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5 >> [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624 > > Is there any realistic case when 2 is a more accurate estimation for the > number of processors than sched_getaffinity? I suppose there are no such > cases. Also, /sys is consulted first anyway. I am not sure, but my impression is on some environments sched_getaffinity returns a synthetic value that might not represent the correct system supported CPUs. At least, it was my impression in the bug reports, where it does break some programs. > > I wish I saw commit 342298278e earlier to raise objections before it was > committed. > > Please note that BZ #28865 is a real regression we had to patch, this > means glibc must behave properly in that environment without any > additional tuning. > > I suggest to install this fix and see what could be done later > in an unlikely case anything else breaks. I think in this case we should use sched_affinity as the last fallback on get_nprocs as well we, so we first use either sysfs or procfs and only then fallback to sched_getaffinity.
On 07/02/2022 09:01, Florian Weimer wrote: > * Adhemerval Zanella: > >>> But I think using sched_getaffinity as a fallback when /sys and /proc >>> are not available makes somse. It's different form what we did >>> temporarily (sched_getaffinity first). >> >> My concern is we start to see BZ#27645 again on environments that filter >> out sysfs and provide a synthetic sched_getaffinity. > > Should we move the sched_getaffinity fallback after /proc, then? Yeah, that was what I suggested in another reply.
On Mon, Feb 07, 2022 at 09:01:19AM -0300, Adhemerval Zanella via Libc-alpha wrote: > On 07/02/2022 08:51, Dmitry V. Levin wrote: > > On Mon, Feb 07, 2022 at 08:25:11AM -0300, Adhemerval Zanella via Libc-alpha wrote: > >> On 05/02/2022 18:24, Dmitry V. Levin wrote: > >>> get_nprocs() and get_nprocs_conf() use various methods to obtain an > >>> accurate number of processors. Re-introduce __get_nprocs_sched() as > >>> a source of information, and fix the order in which these methods are > >>> used to return the most accurate information. The primary source of > >>> information used in both functions remains unchanged. > >>> > >>> This also changes __get_nprocs_sched() error return value from 2 to 0, > >>> but all its users are already prepared to handle that. > >>> > >>> Old behavior: > >>> get_nprocs: > >>> /sys/devices/system/cpu/online -> /proc/stat -> 2 > >>> get_nprocs_conf: > >>> /sys/devices/system/cpu/ -> /proc/stat -> 2 > >>> > >>> New behavior: > >>> get_nprocs: > >>> /sys/devices/system/cpu/online -> sched_getaffinity -> /proc/stat -> 2 > >>> get_nprocs_conf: > >>> /sys/devices/system/cpu/ -> /proc/stat -> sched_getaffinity -> 2 > >>> > >>> Fixes: 342298278e ("linux: Revert the use of sched_getaffinity on get_nproc") > >>> Closes: BZ #28865 > >> > >> I think we are circling back on this, on BZ#27645 [1] we changed get_nprocs > >> to use sched_getaffinity and then we have to revert it with BZ#28310 [2] because > >> it introduced regression on some monitoring tools [3]. > >> > >> In fact from BZ#27645 and BZ#28624 [4] discussion I think we can't reliable use > >> sched_getaffinity because since some container environment returns a synthetic > >> mask that might break some programs. Also, sched_getaffinity returns a > >> 'per-process' mask instead of system-wide as we discussed in previous threads. > >> It should be ok to get adjusting internal tuning (as for malloc). > >> > >> [1] https://sourceware.org/bugzilla/show_bug.cgi?id=27645 > >> [2] https://sourceware.org/bugzilla/show_bug.cgi?id=28310 > >> [3] https://sourceware.org/bugzilla/show_bug.cgi?id=27645#c5 > >> [4] https://sourceware.org/bugzilla/show_bug.cgi?id=28624 > > > > Is there any realistic case when 2 is a more accurate estimation for the > > number of processors than sched_getaffinity? I suppose there are no such > > cases. Also, /sys is consulted first anyway. > > I am not sure, but my impression is on some environments sched_getaffinity > returns a synthetic value that might not represent the correct system > supported CPUs. At least, it was my impression in the bug reports, where > it does break some programs. > > > I wish I saw commit 342298278e earlier to raise objections before it was > > committed. > > > > Please note that BZ #28865 is a real regression we had to patch, this > > means glibc must behave properly in that environment without any > > additional tuning. > > > > I suggest to install this fix and see what could be done later > > in an unlikely case anything else breaks. > > I think in this case we should use sched_affinity as the last fallback > on get_nprocs as well we, so we first use either sysfs or procfs and > only then fallback to sched_getaffinity. OK, this should work, too, I'll post a revised patch shortly.
diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c index c98c8ce3d4..e1ed96070c 100644 --- a/sysdeps/unix/sysv/linux/getsysstats.c +++ b/sysdeps/unix/sysv/linux/getsysstats.c @@ -50,9 +50,8 @@ __get_nprocs_sched (void) is an arbitrary values assuming such systems should be rare and there is no offline cpus. */ return max_num_cpus; - /* Some other error. 2 is conservative (not a uniprocessor system, so - atomics are needed). */ - return 2; + /* Some other error. */ + return 0; } static char * @@ -108,22 +107,19 @@ next_line (int fd, char *const buffer, char **cp, char **re, } static int -get_nproc_stat (char *buffer, size_t buffer_size) +get_nproc_stat (void) { + enum { buffer_size = 1024 }; + char buffer[buffer_size]; char *buffer_end = buffer + buffer_size; char *cp = buffer_end; char *re = buffer_end; - - /* Default to an SMP system in case we cannot obtain an accurate - number. */ - int result = 2; + int result = 0; const int flags = O_RDONLY | O_CLOEXEC; int fd = __open_nocancel ("/proc/stat", flags); if (fd != -1) { - result = 0; - char *l; while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL) /* The current format of /proc/stat has all the cpu* entries @@ -139,8 +135,8 @@ get_nproc_stat (char *buffer, size_t buffer_size) return result; } -int -__get_nprocs (void) +static int +get_nprocs_cpu_online (void) { enum { buffer_size = 1024 }; char buffer[buffer_size]; @@ -179,7 +175,8 @@ __get_nprocs (void) } } - result += m - n + 1; + if (m >= n) + result += m - n + 1; l = endp; if (l < re && *l == ',') @@ -188,28 +185,18 @@ __get_nprocs (void) while (l < re && *l != '\n'); __close_nocancel_nostatus (fd); - - if (result > 0) - return result; } - return get_nproc_stat (buffer, buffer_size); + return result; } -libc_hidden_def (__get_nprocs) -weak_alias (__get_nprocs, get_nprocs) - -/* On some architectures it is possible to distinguish between configured - and active cpus. */ -int -__get_nprocs_conf (void) +static int +get_nprocs_cpu (void) { - /* Try to use the sysfs filesystem. It has actual information about - online processors. */ + int count = 0; DIR *dir = __opendir ("/sys/devices/system/cpu"); if (dir != NULL) { - int count = 0; struct dirent64 *d; while ((d = __readdir64 (dir)) != NULL) @@ -224,12 +211,64 @@ __get_nprocs_conf (void) __closedir (dir); - return count; } + return count; +} - enum { buffer_size = 1024 }; - char buffer[buffer_size]; - return get_nproc_stat (buffer, buffer_size); +int +__get_nprocs (void) +{ + int result; + + /* Try /sys/devices/system/cpu/online first. */ + result = get_nprocs_cpu_online (); + if (result) + return result; + + /* Try sched_getaffinity(2). */ + result = __get_nprocs_sched (); + if (result) + return result; + + /* Try /proc/stat. */ + result = get_nproc_stat (); + if (result) + return result; + + /* We failed to obtain an accurate number. Be conservative: return + the smallest number meaning that this is not a uniprocessor system, + so atomics are needed. */ + return 2; +} +libc_hidden_def (__get_nprocs) +weak_alias (__get_nprocs, get_nprocs) + +/* On some architectures it is possible to distinguish between configured + and active cpus. */ +int +__get_nprocs_conf (void) +{ + int result; + + /* Try /sys/devices/system/cpu/ first. */ + result = get_nprocs_cpu (); + if (result) + return result; + + /* Try /proc/stat. */ + result = get_nproc_stat (); + if (result) + return result; + + /* Try sched_getaffinity(2). */ + result = __get_nprocs_sched (); + if (result) + return result; + + /* We failed to obtain an accurate number. Be conservative: return + the smallest number meaning that this is not a uniprocessor system, + so atomics are needed. */ + return 2; } libc_hidden_def (__get_nprocs_conf) weak_alias (__get_nprocs_conf, get_nprocs_conf)