Message ID | 20181114024412.19368-1-joel@jms.id.au (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | [v2] powerpc/math-emu: Update macros from gmp-6.1.2 | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | next/apply_patch Successfully applied |
snowpatch_ozlabs/build-ppc64le | success | build succeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/build-ppc64be | success | build succeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/build-ppc64e | success | build succeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/build-pmac32 | success | build succeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 30 warnings, 17 checks, 154 lines checked |
On Tue, Nov 13, 2018 at 6:44 PM Joel Stanley <joel@jms.id.au> wrote: > > The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate > from GMP's longlong.h. > This was found when compiling with clang: > > arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a > inline asm context requiring an l-value: remove the cast or build with > -fheinous-gnu-extensions > FP_ADD_D(R, T, B); > ^~~~~~~~~~~~~~~~~ > ... > > ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from > macro 'sub_ddmmss' > : "=r" ((USItype)(sh)), \ > ~~~~~~~~~~^~~ > > Segher points out: this was fixed in GCC over 16 years ago > ( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from) > presumably before that. > > Update to the latest version in order to git rid of the invalid casts. Hi Joel, thanks for this patch. It's a much better fix IMO than v1. One nit below. > > The only functional change I noticed was this in udiv_qrnnd. > > __r1 = (n1) % __d1; > __q1 = (n1) / __d1; > > Becomes this: > > __q1 = (n1) / __d1; > __r1 = (n1) - __q1 * __d1; > > This is equivalent as it instead of calculating the remainder > using modulo, it uses the result of integer division to subtract the > count of 'whole' d1 from r1. I don't understand this; why was this functional change made? > > Link: https://github.com/ClangBuiltLinux/linux/issues/260 > Signed-off-by: Joel Stanley <joel@jms.id.au> > --- > v1: > https://lore.kernel.org/linuxppc-dev/20181102033713.31916-1-joel@jms.id.au/ > v2: > Instead of setting the -fheinous-gnu-extensions for clang, fix the code. > > arch/powerpc/include/asm/sfp-machine.h | 99 +++++++++----------------- > 1 file changed, 32 insertions(+), 67 deletions(-) > > diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h > index d89beaba26ff..a6353d9dd5ba 100644 > --- a/arch/powerpc/include/asm/sfp-machine.h > +++ b/arch/powerpc/include/asm/sfp-machine.h > @@ -213,30 +213,18 @@ > * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow > * (i.e. carry out) is not stored anywhere, and is lost. > */ > -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ > +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ > do { \ > if (__builtin_constant_p (bh) && (bh) == 0) \ > - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "%r" ((USItype)(ah)), \ > - "%r" ((USItype)(al)), \ > - "rI" ((USItype)(bl))); \ > - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ > - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "%r" ((USItype)(ah)), \ > - "%r" ((USItype)(al)), \ > - "rI" ((USItype)(bl))); \ > + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \ > + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ > + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \ > else \ > - __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "%r" ((USItype)(ah)), \ > - "r" ((USItype)(bh)), \ > - "%r" ((USItype)(al)), \ > - "rI" ((USItype)(bl))); \ > + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ > + : "=r" (sh), "=&r" (sl) \ > + : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ > } while (0) > > /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to > @@ -248,44 +236,24 @@ > * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, > * and is lost. > */ > -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ > +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ > do { \ > if (__builtin_constant_p (ah) && (ah) == 0) \ > - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "r" ((USItype)(bh)), \ > - "rI" ((USItype)(al)), \ > - "r" ((USItype)(bl))); \ > - else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ > - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "r" ((USItype)(bh)), \ > - "rI" ((USItype)(al)), \ > - "r" ((USItype)(bl))); \ > + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ > + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ > + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ > else if (__builtin_constant_p (bh) && (bh) == 0) \ > - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "r" ((USItype)(ah)), \ > - "rI" ((USItype)(al)), \ > - "r" ((USItype)(bl))); \ > - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ > - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "r" ((USItype)(ah)), \ > - "rI" ((USItype)(al)), \ > - "r" ((USItype)(bl))); \ > + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ > + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ > + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ > + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ > else \ > - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ > - : "=r" ((USItype)(sh)), \ > - "=&r" ((USItype)(sl)) \ > - : "r" ((USItype)(ah)), \ > - "r" ((USItype)(bh)), \ > - "rI" ((USItype)(al)), \ > - "r" ((USItype)(bl))); \ > + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ > + : "=r" (sh), "=&r" (sl) \ > + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ > } while (0) > > /* asm fragments for mul and div */ > @@ -294,13 +262,10 @@ > * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype > * word product in HIGH_PROD and LOW_PROD. > */ > -#define umul_ppmm(ph, pl, m0, m1) \ > +#define umul_ppmm(ph, pl, m0, m1) \ > do { \ > USItype __m0 = (m0), __m1 = (m1); \ > - __asm__ ("mulhwu %0,%1,%2" \ > - : "=r" ((USItype)(ph)) \ > - : "%r" (__m0), \ > - "r" (__m1)); \ > + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ > (pl) = __m0 * __m1; \ > } while (0) > > @@ -312,28 +277,28 @@ > * significant bit of DENOMINATOR must be 1, then the pre-processor symbol > * UDIV_NEEDS_NORMALIZATION is defined to 1. > */ > -#define udiv_qrnnd(q, r, n1, n0, d) \ > +#define udiv_qrnnd(q, r, n1, n0, d) \ > do { \ > UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ > __d1 = __ll_highpart (d); \ > __d0 = __ll_lowpart (d); \ > \ > - __r1 = (n1) % __d1; \ > __q1 = (n1) / __d1; \ > - __m = (UWtype) __q1 * __d0; \ > + __r1 = (n1) - __q1 * __d1; \ > + __m = __q1 * __d0; \ > __r1 = __r1 * __ll_B | __ll_highpart (n0); \ > if (__r1 < __m) \ > { \ > __q1--, __r1 += (d); \ > - if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ > + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ > if (__r1 < __m) \ > __q1--, __r1 += (d); \ > } \ > __r1 -= __m; \ > \ > - __r0 = __r1 % __d1; \ > __q0 = __r1 / __d1; \ > - __m = (UWtype) __q0 * __d0; \ > + __r0 = __r1 - __q0 * __d1; \ > + __m = __q0 * __d0; \ > __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ > if (__r0 < __m) \ > { \ > @@ -344,7 +309,7 @@ > } \ > __r0 -= __m; \ > \ > - (q) = (UWtype) __q1 * __ll_B | __q0; \ > + (q) = __q1 * __ll_B | __q0; \ > (r) = __r0; \ > } while (0) This appears to now differ from the upstream source: https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679 If we're going to borrow implementations from GCC, let's borrow the same implementation. Otherwise it's hard to have confidence in this part of the patch.
On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote: > > > > The only functional change I noticed was this in udiv_qrnnd. > > > > __r1 = (n1) % __d1; > > __q1 = (n1) / __d1; > > > > Becomes this: > > > > __q1 = (n1) / __d1; > > __r1 = (n1) - __q1 * __d1; > > > > This is equivalent as it instead of calculating the remainder > > using modulo, it uses the result of integer division to subtract the > > count of 'whole' d1 from r1. > > I don't understand this; why was this functional change made? I couldn't see why. It pre-dates GMP's mecurial history that . \ > > - (q) = (UWtype) __q1 * __ll_B | __q0; \ > > + (q) = __q1 * __ll_B | __q0; \ > > (r) = __r0; \ > > } while (0) > > This appears to now differ from the upstream source: > https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679 AIUI the upstream source is GMP: https://gmplib.org/repo/gmp/file/tip/longlong.h > If we're going to borrow implementations from GCC, let's borrow the > same implementation. Otherwise it's hard to have confidence in this > part of the patch. I agree we should use the upstream source. Segher, which tree contains the One True Upstream for longlong.h? Cheers, Joel
On Tue, Nov 20, 2018 at 09:45:33AM +1030, Joel Stanley wrote: > On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote: > > > > > > The only functional change I noticed was this in udiv_qrnnd. > > > > > > __r1 = (n1) % __d1; > > > __q1 = (n1) / __d1; > > > > > > Becomes this: > > > > > > __q1 = (n1) / __d1; > > > __r1 = (n1) - __q1 * __d1; > > > > > > This is equivalent as it instead of calculating the remainder > > > using modulo, it uses the result of integer division to subtract the > > > count of 'whole' d1 from r1. > > > > I don't understand this; why was this functional change made? > > I couldn't see why. It pre-dates GMP's mecurial history that . Perhaps it is to make it more likely only a single divide machine instruction results. Maybe for -O0, maybe for older GCC, maybe for GCC targets that do not get this right. It of course means exactly the same. > AIUI the upstream source is GMP: > > https://gmplib.org/repo/gmp/file/tip/longlong.h > > > If we're going to borrow implementations from GCC, let's borrow the > > same implementation. Otherwise it's hard to have confidence in this > > part of the patch. > > I agree we should use the upstream source. > > Segher, which tree contains the One True Upstream for longlong.h? You should probably get your updates from the same place as was used to get the file in the first place. Or, don't worry so much about it, how often is this updated? Once in ten years, or twenty? If you do not want to include bigger (maybe irrelevant) changes from upstream, you have already forked, effectively. Segher
On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool <segher@kernel.crashing.org> wrote: > > On Tue, Nov 20, 2018 at 09:45:33AM +1030, Joel Stanley wrote: > > On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote: > > > > > > > > The only functional change I noticed was this in udiv_qrnnd. > > AIUI the upstream source is GMP: > > > > https://gmplib.org/repo/gmp/file/tip/longlong.h > > > > > If we're going to borrow implementations from GCC, let's borrow the > > > same implementation. Otherwise it's hard to have confidence in this > > > part of the patch. > > > > I agree we should use the upstream source. > > > > Segher, which tree contains the One True Upstream for longlong.h? > > You should probably get your updates from the same place as was used to > get the file in the first place. So sounds like GMP is the source to take then. Joel, would you mind sending a v3 with just the commit message updated to reflect the fact that GCC and GMP differ in implementations, and this is taken from GMP, maybe with some links? I think that will provide more context for future travelers. I'd be happy to add my reviewed-by tag to that.
On Tue, Nov 20, 2018 at 10:45:19AM -0800, Nick Desaulniers wrote: > On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool > <segher@kernel.crashing.org> wrote: > > > Segher, which tree contains the One True Upstream for longlong.h? > > > > You should probably get your updates from the same place as was used to > > get the file in the first place. > > So sounds like GMP is the source to take then. I'm pretty sure the kernel got it from GCC. How much does the kernel one differ from current GCC? FWIW, the GCC version still uses this "abhorrent GCC extension" (or what were those words) for most targets, just not for powerpc. Segher
On Tue, Nov 20, 2018 at 12:15 PM Segher Boessenkool <segher@kernel.crashing.org> wrote: > > On Tue, Nov 20, 2018 at 10:45:19AM -0800, Nick Desaulniers wrote: > > On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool > > <segher@kernel.crashing.org> wrote: > > > > Segher, which tree contains the One True Upstream for longlong.h? > > > > > > You should probably get your updates from the same place as was used to > > > get the file in the first place. > > > > So sounds like GMP is the source to take then. > > I'm pretty sure the kernel got it from GCC. > How much does the kernel > one differ from current GCC? Compare udiv_qrnnd implementations from: gcc: https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679 gmp: https://gmplib.org/repo/gmp/file/tip/longlong.h#l2063 The kernel currently is using the GCC impl, which Joel's patch here changes it to the GMP impl. If you prefer the kernel to keep the kernel keep the GCC derivative, then Joel can drop the change to udiv_qrnnd() as it does not contain inline assembly relevant to the original issue.
diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h index d89beaba26ff..a6353d9dd5ba 100644 --- a/arch/powerpc/include/asm/sfp-machine.h +++ b/arch/powerpc/include/asm/sfp-machine.h @@ -213,30 +213,18 @@ * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow * (i.e. carry out) is not stored anywhere, and is lost. */ -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \ else \ - __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to @@ -248,44 +236,24 @@ * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, * and is lost. */ -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) /* asm fragments for mul and div */ @@ -294,13 +262,10 @@ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype * word product in HIGH_PROD and LOW_PROD. */ -#define umul_ppmm(ph, pl, m0, m1) \ +#define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("mulhwu %0,%1,%2" \ - : "=r" ((USItype)(ph)) \ - : "%r" (__m0), \ - "r" (__m1)); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) @@ -312,28 +277,28 @@ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol * UDIV_NEEDS_NORMALIZATION is defined to 1. */ -#define udiv_qrnnd(q, r, n1, n0, d) \ +#define udiv_qrnnd(q, r, n1, n0, d) \ do { \ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ __d1 = __ll_highpart (d); \ __d0 = __ll_lowpart (d); \ \ - __r1 = (n1) % __d1; \ __q1 = (n1) / __d1; \ - __m = (UWtype) __q1 * __d0; \ + __r1 = (n1) - __q1 * __d1; \ + __m = __q1 * __d0; \ __r1 = __r1 * __ll_B | __ll_highpart (n0); \ if (__r1 < __m) \ { \ __q1--, __r1 += (d); \ - if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ if (__r1 < __m) \ __q1--, __r1 += (d); \ } \ __r1 -= __m; \ \ - __r0 = __r1 % __d1; \ __q0 = __r1 / __d1; \ - __m = (UWtype) __q0 * __d0; \ + __r0 = __r1 - __q0 * __d1; \ + __m = __q0 * __d0; \ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ if (__r0 < __m) \ { \ @@ -344,7 +309,7 @@ } \ __r0 -= __m; \ \ - (q) = (UWtype) __q1 * __ll_B | __q0; \ + (q) = __q1 * __ll_B | __q0; \ (r) = __r0; \ } while (0)
The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate from GMP's longlong.h. This was found when compiling with clang: arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a inline asm context requiring an l-value: remove the cast or build with -fheinous-gnu-extensions FP_ADD_D(R, T, B); ^~~~~~~~~~~~~~~~~ ... ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from macro 'sub_ddmmss' : "=r" ((USItype)(sh)), \ ~~~~~~~~~~^~~ Segher points out: this was fixed in GCC over 16 years ago ( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from) presumably before that. Update to the latest version in order to git rid of the invalid casts. The only functional change I noticed was this in udiv_qrnnd. __r1 = (n1) % __d1; __q1 = (n1) / __d1; Becomes this: __q1 = (n1) / __d1; __r1 = (n1) - __q1 * __d1; This is equivalent as it instead of calculating the remainder using modulo, it uses the result of integer division to subtract the count of 'whole' d1 from r1. Link: https://github.com/ClangBuiltLinux/linux/issues/260 Signed-off-by: Joel Stanley <joel@jms.id.au> --- v1: https://lore.kernel.org/linuxppc-dev/20181102033713.31916-1-joel@jms.id.au/ v2: Instead of setting the -fheinous-gnu-extensions for clang, fix the code. arch/powerpc/include/asm/sfp-machine.h | 99 +++++++++----------------- 1 file changed, 32 insertions(+), 67 deletions(-)