Message ID | 20240212214912.2550529-1-ppalka@redhat.com |
---|---|
State | New |
Headers | show |
Series | c++/modules: use optimized crc32 from zlib | expand |
On 2/12/24 16:49, Patrick Palka wrote: > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK > for trunk? OK. > -- >8 -- > > The current implementation of bytes::calc_crc computes the checksum one > byte at a time, and turns out to be quite slow, taking 5% and 15% of > time compiling and streaming in the std module respectively. We have > a crc32_unsigned routine that handles 4 bytes at a time which could > speed up this hot function, but we also have a bundled zlib with highly > optimized crc routines that can handle up to 32 bytes at a time. > > So this patch makes us use zlib's crc32 in this hot code path. > According to some perf experiments this reduces the overhead of calc_crc > from 15% of total time to 3% when streaming in the std module. > > gcc/cp/ChangeLog: > > * Make-lang.in (CFLAGS-cp/module.o): Add $(ZLIBINC). > * module.cc: Include <zlib.h>. > (bytes::calc_crc): Use crc32 from zlib. > (bytes_out::set_crc): Use crc32_combine from zlib. > --- > gcc/cp/Make-lang.in | 2 +- > gcc/cp/module.cc | 8 +++----- > 2 files changed, 4 insertions(+), 6 deletions(-) > > diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in > index 630db41d87e..f153891a1ef 100644 > --- a/gcc/cp/Make-lang.in > +++ b/gcc/cp/Make-lang.in > @@ -55,7 +55,7 @@ c++.serial = cc1plus$(exeext) > CFLAGS-cp/g++spec.o += $(DRIVER_DEFINES) > > CFLAGS-cp/module.o += -DHOST_MACHINE=\"$(host)\" \ > - -DTARGET_MACHINE=\"$(target)\" > + -DTARGET_MACHINE=\"$(target)\" $(ZLIBINC) > > # In non-release builds, use a date-related module version. > ifneq ($(DEVPHASE_c),) > diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc > index 86e43aee542..c94f4a257de 100644 > --- a/gcc/cp/module.cc > +++ b/gcc/cp/module.cc > @@ -233,6 +233,7 @@ Classes used: > /* This TU doesn't need or want to see the networking. */ > #define CODY_NETWORKING 0 > #include "mapper-client.h" > +#include <zlib.h> // for crc32, crc32_combine > > #if 0 // 1 for testing no mmap > #define MAPPED_READING 0 > @@ -487,10 +488,7 @@ protected: > unsigned > bytes::calc_crc (unsigned l) const > { > - unsigned crc = 0; > - for (size_t ix = 4; ix < l; ix++) > - crc = crc32_byte (crc, buffer[ix]); > - return crc; > + return crc32 (0, (unsigned char *)buffer + 4, l - 4); > } > > class elf_in; > @@ -717,7 +715,7 @@ bytes_out::set_crc (unsigned *crc_ptr) > unsigned crc = calc_crc (pos); > unsigned accum = *crc_ptr; > /* Only mix the existing *CRC_PTR if it is non-zero. */ > - accum = accum ? crc32_unsigned (accum, crc) : crc; > + accum = accum ? crc32_combine (accum, crc, pos - 4) : crc; > *crc_ptr = accum; > > /* Buffer will be sufficiently aligned. */
diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in index 630db41d87e..f153891a1ef 100644 --- a/gcc/cp/Make-lang.in +++ b/gcc/cp/Make-lang.in @@ -55,7 +55,7 @@ c++.serial = cc1plus$(exeext) CFLAGS-cp/g++spec.o += $(DRIVER_DEFINES) CFLAGS-cp/module.o += -DHOST_MACHINE=\"$(host)\" \ - -DTARGET_MACHINE=\"$(target)\" + -DTARGET_MACHINE=\"$(target)\" $(ZLIBINC) # In non-release builds, use a date-related module version. ifneq ($(DEVPHASE_c),) diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 86e43aee542..c94f4a257de 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -233,6 +233,7 @@ Classes used: /* This TU doesn't need or want to see the networking. */ #define CODY_NETWORKING 0 #include "mapper-client.h" +#include <zlib.h> // for crc32, crc32_combine #if 0 // 1 for testing no mmap #define MAPPED_READING 0 @@ -487,10 +488,7 @@ protected: unsigned bytes::calc_crc (unsigned l) const { - unsigned crc = 0; - for (size_t ix = 4; ix < l; ix++) - crc = crc32_byte (crc, buffer[ix]); - return crc; + return crc32 (0, (unsigned char *)buffer + 4, l - 4); } class elf_in; @@ -717,7 +715,7 @@ bytes_out::set_crc (unsigned *crc_ptr) unsigned crc = calc_crc (pos); unsigned accum = *crc_ptr; /* Only mix the existing *CRC_PTR if it is non-zero. */ - accum = accum ? crc32_unsigned (accum, crc) : crc; + accum = accum ? crc32_combine (accum, crc, pos - 4) : crc; *crc_ptr = accum; /* Buffer will be sufficiently aligned. */