diff mbox series

[v2] zlib: Fix big performance regression

Message ID 20240716143546.647604-1-trini@konsulko.com
State Accepted
Commit 4cf1275f2b85b4bf5c5b037369178e9fedacd0a9
Delegated to: Tom Rini
Headers show
Series [v2] zlib: Fix big performance regression | expand

Commit Message

Tom Rini July 16, 2024, 2:35 p.m. UTC
From: Christophe Leroy <christophe.leroy@csgroup.eu>

Commit 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
brings a big performance regression in inflate_fast(), which leads
to watchdog timer reset on powerpc 8xx.

It looks like that commit does more than what it describe, it
especially removed an important optimisation that was doing copies
using halfwords instead of bytes. That unexpected change multiplied
by almost 4 the time spent in inflate_fast() and increased by 40%
the overall time needed to uncompress linux kernel image.

So partially revert that commit but keep post incrementation as it
is the initial purpose of said commit.

Fixes: 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
[trini: Combine assorted patches in to this one, just restoring the
        performance commit]
Signed-off-by: Tom Rini <trini@konsulko.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
Given that in master we now have Michal's series un-reverted, I have
collapsed Christophe's series down to a patch that is just fixing the
performance issues, while not regressing other platforms.

Cc: Michal Simek <michal.simek@amd.com>
---
 lib/zlib/inffast.c | 51 ++++++++++++++++++++++++++++++++++++----------
 lib/zlib/zlib.h    |  1 -
 2 files changed, 40 insertions(+), 12 deletions(-)

Comments

Michal Simek July 17, 2024, 6:55 a.m. UTC | #1
On 7/16/24 16:35, Tom Rini wrote:
> From: Christophe Leroy <christophe.leroy@csgroup.eu>
> 
> Commit 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
> brings a big performance regression in inflate_fast(), which leads
> to watchdog timer reset on powerpc 8xx.
> 
> It looks like that commit does more than what it describe, it
> especially removed an important optimisation that was doing copies
> using halfwords instead of bytes. That unexpected change multiplied
> by almost 4 the time spent in inflate_fast() and increased by 40%
> the overall time needed to uncompress linux kernel image.
> 
> So partially revert that commit but keep post incrementation as it
> is the initial purpose of said commit.
> 
> Fixes: 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
> [trini: Combine assorted patches in to this one, just restoring the
>          performance commit]
> Signed-off-by: Tom Rini <trini@konsulko.com>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> Given that in master we now have Michal's series un-reverted, I have
> collapsed Christophe's series down to a patch that is just fixing the
> performance issues, while not regressing other platforms.
> 
> Cc: Michal Simek <michal.simek@amd.com>
> ---
>   lib/zlib/inffast.c | 51 ++++++++++++++++++++++++++++++++++++----------
>   lib/zlib/zlib.h    |  1 -
>   2 files changed, 40 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/zlib/inffast.c b/lib/zlib/inffast.c
> index 5e2a65ad4d27..b5a0adcce69f 100644
> --- a/lib/zlib/inffast.c
> +++ b/lib/zlib/inffast.c
> @@ -236,18 +236,47 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
>                       }
>                   }
>                   else {
> +		    unsigned short *sout;
> +		    unsigned long loops;
> +
>                       from = out - dist;          /* copy direct from output */
> -                    do {                        /* minimum length is three */
> -                        *out++ = *from++;
> -                        *out++ = *from++;
> -                        *out++ = *from++;
> -                        len -= 3;
> -                    } while (len > 2);
> -                    if (len) {
> -                        *out++ = *from++;
> -                        if (len > 1)
> -                            *out++ = *from++;
> -                    }
> +                    /* minimum length is three */
> +		    /* Align out addr */
> +		    if (!((long)(out - 1) & 1)) {
> +			*out++ = *from++;
> +			len--;
> +		    }
> +		    sout = (unsigned short *)out;
> +		    if (dist > 2 ) {
> +			unsigned short *sfrom;
> +
> +			sfrom = (unsigned short *)from;
> +			loops = len >> 1;
> +			do
> +			    *sout++ = get_unaligned(sfrom++);
> +			while (--loops);
> +			out = (unsigned char *)sout;
> +			from = (unsigned char *)sfrom;
> +		    } else { /* dist == 1 or dist == 2 */
> +			unsigned short pat16;
> +
> +			pat16 = *(sout - 1);
> +			if (dist == 1)
> +#if defined(__BIG_ENDIAN)
> +			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
> +#elif defined(__LITTLE_ENDIAN)
> +			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
> +#else
> +#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
> +#endif
> +			loops = len >> 1;
> +			do
> +			    *sout++ = pat16;
> +			while (--loops);
> +			out = (unsigned char *)sout;
> +		    }
> +		    if (len & 1)
> +			*out++ = *from++;
>                   }
>               }
>               else if ((op & 64) == 0) {          /* 2nd level distance code */
> diff --git a/lib/zlib/zlib.h b/lib/zlib/zlib.h
> index 560e7be97d3a..f9b2f69ac027 100644
> --- a/lib/zlib/zlib.h
> +++ b/lib/zlib/zlib.h
> @@ -10,7 +10,6 @@
>   /* avoid conflicts */
>   #undef OFF
>   #undef ASMINF
> -#undef POSTINC
>   #undef NO_GZIP
>   #define GUNZIP
>   #undef STDC

I am fine with whatever change which doesn't break functionality and address 
CVE. I took that code from upstream zlib version but I forget details why I 
removed this part of code.

Acked-by: Michal Simek <michal.simek@amd.com>

Thanks,
Michal
Tom Rini July 20, 2024, 5:10 p.m. UTC | #2
On Tue, 16 Jul 2024 08:35:46 -0600, Tom Rini wrote:

> Commit 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
> brings a big performance regression in inflate_fast(), which leads
> to watchdog timer reset on powerpc 8xx.
> 
> It looks like that commit does more than what it describe, it
> especially removed an important optimisation that was doing copies
> using halfwords instead of bytes. That unexpected change multiplied
> by almost 4 the time spent in inflate_fast() and increased by 40%
> the overall time needed to uncompress linux kernel image.
> 
> [...]

Applied to u-boot/master, thanks!
diff mbox series

Patch

diff --git a/lib/zlib/inffast.c b/lib/zlib/inffast.c
index 5e2a65ad4d27..b5a0adcce69f 100644
--- a/lib/zlib/inffast.c
+++ b/lib/zlib/inffast.c
@@ -236,18 +236,47 @@  unsigned start;         /* inflate()'s starting value for strm->avail_out */
                     }
                 }
                 else {
+		    unsigned short *sout;
+		    unsigned long loops;
+
                     from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                        *out++ = *from++;
-                        if (len > 1)
-                            *out++ = *from++;
-                    }
+                    /* minimum length is three */
+		    /* Align out addr */
+		    if (!((long)(out - 1) & 1)) {
+			*out++ = *from++;
+			len--;
+		    }
+		    sout = (unsigned short *)out;
+		    if (dist > 2 ) {
+			unsigned short *sfrom;
+
+			sfrom = (unsigned short *)from;
+			loops = len >> 1;
+			do
+			    *sout++ = get_unaligned(sfrom++);
+			while (--loops);
+			out = (unsigned char *)sout;
+			from = (unsigned char *)sfrom;
+		    } else { /* dist == 1 or dist == 2 */
+			unsigned short pat16;
+
+			pat16 = *(sout - 1);
+			if (dist == 1)
+#if defined(__BIG_ENDIAN)
+			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
+#elif defined(__LITTLE_ENDIAN)
+			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
+#else
+#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
+#endif
+			loops = len >> 1;
+			do
+			    *sout++ = pat16;
+			while (--loops);
+			out = (unsigned char *)sout;
+		    }
+		    if (len & 1)
+			*out++ = *from++;
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
diff --git a/lib/zlib/zlib.h b/lib/zlib/zlib.h
index 560e7be97d3a..f9b2f69ac027 100644
--- a/lib/zlib/zlib.h
+++ b/lib/zlib/zlib.h
@@ -10,7 +10,6 @@ 
 /* avoid conflicts */
 #undef OFF
 #undef ASMINF
-#undef POSTINC
 #undef NO_GZIP
 #define GUNZIP
 #undef STDC