Message ID | 5A5C8F42.6040503@foss.arm.com |
---|---|
State | New |
Headers | show |
Series | [arm] PR target/83687: Fix invalid combination of VSUB + VABS into VABD | expand |
Hi all, On 15/01/18 11:23, Kyrill Tkachov wrote: > Hi all, > > In this wrong-code bug we combine a VSUB.I8 and a VABS.S8 > into a VABD.S8 instruction . This combination is not valid > for integer operands because in the VABD instruction the semantics > are that the difference is computed in notionally infinite precision > and the absolute difference is computed on that, whereas for a > VSUB.I8 + VABS.S8 sequence the VSUB operation will perform any > wrapping that's needed for the 8-bit signed type before the VABS > gets its hands on it. > > This leads to the wrong-code in the PR where the expected > sequence from the intrinsics: > VSUB + VABS of two vectors {-100, -100, -100...}, {100, 100, 100...} > gives a result of {56, 56, 56...} (-100 - 100) > > but GCC optimises it into a single > VABD of {-100, -100, -100...}, {100, 100, 100...} > which produces a result of {200, 200, 200...} > > The transformation is still valid for floating-point operands, > which is why it was added in the first place I believe (r178817) > but this patch disables it for integer operands. > The HFmode variants though only exist for TARGET_NEON_FP16INST, so > this patch adds the appropriate guards to the new mode iterator > > Bootstrapped and tested on arm-none-linux-gnueabihf. > > Committing to trunk. I've backported this patch to the GCC 7 branch after bootstrapping and testing on arm-none-linux-gnueabihf. Thanks, Kyrill > > Thanks, > Kyrill > > 2018-01-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > PR target/83687 > * config/arm/iterators.md (VF): New mode iterator. > * config/arm/neon.md (neon_vabd<mode>_2): Use the above. > Remove integer-related logic from pattern. > (neon_vabd<mode>_3): Likewise. > > 2018-01-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com> > > PR target/83687 > * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Delete integer > tests. > * gcc.target/arm/pr83687.c: New test.
On 17/01/18 11:51, Kyrill Tkachov wrote: > Hi all, > > On 15/01/18 11:23, Kyrill Tkachov wrote: >> Hi all, >> >> In this wrong-code bug we combine a VSUB.I8 and a VABS.S8 >> into a VABD.S8 instruction . This combination is not valid >> for integer operands because in the VABD instruction the semantics >> are that the difference is computed in notionally infinite precision >> and the absolute difference is computed on that, whereas for a >> VSUB.I8 + VABS.S8 sequence the VSUB operation will perform any >> wrapping that's needed for the 8-bit signed type before the VABS >> gets its hands on it. >> >> This leads to the wrong-code in the PR where the expected >> sequence from the intrinsics: >> VSUB + VABS of two vectors {-100, -100, -100...}, {100, 100, 100...} >> gives a result of {56, 56, 56...} (-100 - 100) >> >> but GCC optimises it into a single >> VABD of {-100, -100, -100...}, {100, 100, 100...} >> which produces a result of {200, 200, 200...} >> >> The transformation is still valid for floating-point operands, >> which is why it was added in the first place I believe (r178817) >> but this patch disables it for integer operands. >> The HFmode variants though only exist for TARGET_NEON_FP16INST, so >> this patch adds the appropriate guards to the new mode iterator >> >> Bootstrapped and tested on arm-none-linux-gnueabihf. >> >> Committing to trunk. > > I've backported this patch to the GCC 7 branch after > bootstrapping and testing on arm-none-linux-gnueabihf. > The GCC 6 backport required some changes as TARGET_NEON_FP16INST does not exist on that branch. Bootstrapped and tested arm-none-linux-gnueabihf on that branch. Committing to the branch. Thanks, Kyrill 2018-05-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com> PR target/83687 * config/arm/neon.md (neon_vabd<mode>_2): Use VCVTF mode iterator. Remove integer-related logic from pattern. (neon_vabd<mode>_3): Likewise. 2018-05-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com> PR target/83687 * gcc.target/arm/neon-combine-sub-abs-into-vabd.c: Delete integer tests. * gcc.target/arm/pr83687.c: New test. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ac46b04..2d50a4d 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5583,28 +5583,22 @@ if (BYTES_BIG_ENDIAN) }) (define_insn "neon_vabd<mode>_2" - [(set (match_operand:VDQ 0 "s_register_operand" "=w") - (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") - (match_operand:VDQ 2 "s_register_operand" "w"))))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (abs:VCVTF (minus:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w"))))] + "TARGET_NEON && flag_unsafe_math_optimizations" "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set (attr "type") - (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (const_string "neon_fp_abd_s<q>") - (const_string "neon_abd<q>")))] + [(set_attr "type" "neon_fp_abd_s<q>")] ) (define_insn "neon_vabd<mode>_3" - [(set (match_operand:VDQ 0 "s_register_operand" "=w") - (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") - (match_operand:VDQ 2 "s_register_operand" "w")] - UNSPEC_VSUB)))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (abs:VCVTF (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w")] + UNSPEC_VSUB)))] + "TARGET_NEON && flag_unsafe_math_optimizations" "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set (attr "type") - (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (const_string "neon_fp_abd_s<q>") - (const_string "neon_abd<q>")))] + [(set_attr "type" "neon_fp_abd_s<q>")] ) ;; Copy from core-to-neon regs, then extend, not vice-versa diff --git a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c index fe3d78b..784714f 100644 --- a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c +++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c @@ -12,31 +12,3 @@ float32x2_t f_sub_abs_to_vabd_32(float32x2_t val1, float32x2_t val2) return res; } /* { dg-final { scan-assembler "vabd\.f32" } }*/ - -#include <arm_neon.h> -int8x8_t sub_abs_to_vabd_8(int8x8_t val1, int8x8_t val2) -{ - int8x8_t sres = vsub_s8(val1, val2); - int8x8_t res = vabs_s8 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s8" } }*/ - -int16x4_t sub_abs_to_vabd_16(int16x4_t val1, int16x4_t val2) -{ - int16x4_t sres = vsub_s16(val1, val2); - int16x4_t res = vabs_s16 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s16" } }*/ - -int32x2_t sub_abs_to_vabd_32(int32x2_t val1, int32x2_t val2) -{ - int32x2_t sres = vsub_s32(val1, val2); - int32x2_t res = vabs_s32 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s32" } }*/ diff --git a/gcc/testsuite/gcc.target/arm/pr83687.c b/gcc/testsuite/gcc.target/arm/pr83687.c new file mode 100644 index 0000000..4275413 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr83687.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +__attribute__ ((noinline)) int8_t +testFunction1 (int8_t a, int8_t b) +{ + volatile int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b)); + int8x16_t abs = vabsq_s8 (sub); + return vgetq_lane_s8 (abs, 0); +} + +__attribute__ ((noinline)) int8_t +testFunction2 (int8_t a, int8_t b) +{ + int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b)); + int8x16_t abs = vabsq_s8 (sub); + return vgetq_lane_s8 (abs, 0); +} + +int +main (void) +{ + if (testFunction1 (-100, 100) != testFunction2 (-100, 100)) + __builtin_abort (); + + return 0; +}
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 5772aa99cc92de66ef4438b76632e86325a96ef2..0b2d42399d22ba89a976e39bef6182d31173c1ef 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -119,6 +119,10 @@ (define_mode_iterator VN [V8HI V4SI V2DI]) ;; All supported vector modes (except singleton DImode). (define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI]) +;; All supported floating-point vector modes (except V2DF). +(define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST") + (V8HF "TARGET_NEON_FP16INST") V2SF V4SF]) + ;; All supported vector modes (except those with 64-bit integer elements). (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 59fb6435da8abfe46254558e8646cd4606acb4fa..6a6f5d737715e4100adee8fb7de1d6211da3d85c 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -6706,28 +6706,22 @@ (define_expand "vec_pack_trunc_<mode>" }) (define_insn "neon_vabd<mode>_2" - [(set (match_operand:VDQ 0 "s_register_operand" "=w") - (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") - (match_operand:VDQ 2 "s_register_operand" "w"))))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" + [(set (match_operand:VF 0 "s_register_operand" "=w") + (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") + (match_operand:VF 2 "s_register_operand" "w"))))] + "TARGET_NEON && flag_unsafe_math_optimizations" "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set (attr "type") - (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (const_string "neon_fp_abd_s<q>") - (const_string "neon_abd<q>")))] + [(set_attr "type" "neon_fp_abd_s<q>")] ) (define_insn "neon_vabd<mode>_3" - [(set (match_operand:VDQ 0 "s_register_operand" "=w") - (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") - (match_operand:VDQ 2 "s_register_operand" "w")] - UNSPEC_VSUB)))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" + [(set (match_operand:VF 0 "s_register_operand" "=w") + (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") + (match_operand:VF 2 "s_register_operand" "w")] + UNSPEC_VSUB)))] + "TARGET_NEON && flag_unsafe_math_optimizations" "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set (attr "type") - (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (const_string "neon_fp_abd_s<q>") - (const_string "neon_abd<q>")))] + [(set_attr "type" "neon_fp_abd_s<q>")] ) ;; Copy from core-to-neon regs, then extend, not vice-versa diff --git a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c index fe3d78b308cde0338300785cf7cb6ca77a831e3d..784714f0e87d8cd1216af948c61cdb87319e02cd 100644 --- a/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c +++ b/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c @@ -12,31 +12,3 @@ float32x2_t f_sub_abs_to_vabd_32(float32x2_t val1, float32x2_t val2) return res; } /* { dg-final { scan-assembler "vabd\.f32" } }*/ - -#include <arm_neon.h> -int8x8_t sub_abs_to_vabd_8(int8x8_t val1, int8x8_t val2) -{ - int8x8_t sres = vsub_s8(val1, val2); - int8x8_t res = vabs_s8 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s8" } }*/ - -int16x4_t sub_abs_to_vabd_16(int16x4_t val1, int16x4_t val2) -{ - int16x4_t sres = vsub_s16(val1, val2); - int16x4_t res = vabs_s16 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s16" } }*/ - -int32x2_t sub_abs_to_vabd_32(int32x2_t val1, int32x2_t val2) -{ - int32x2_t sres = vsub_s32(val1, val2); - int32x2_t res = vabs_s32 (sres); - - return res; -} -/* { dg-final { scan-assembler "vabd\.s32" } }*/ diff --git a/gcc/testsuite/gcc.target/arm/pr83687.c b/gcc/testsuite/gcc.target/arm/pr83687.c new file mode 100644 index 0000000000000000000000000000000000000000..42754138660739d9fbffcd337460e26de94f736f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr83687.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include <arm_neon.h> + +__attribute__ ((noinline)) int8_t +testFunction1 (int8_t a, int8_t b) +{ + volatile int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b)); + int8x16_t abs = vabsq_s8 (sub); + return vgetq_lane_s8 (abs, 0); +} + +__attribute__ ((noinline)) int8_t +testFunction2 (int8_t a, int8_t b) +{ + int8x16_t sub = vsubq_s8 (vdupq_n_s8 (a), vdupq_n_s8 (b)); + int8x16_t abs = vabsq_s8 (sub); + return vgetq_lane_s8 (abs, 0); +} + +int +main (void) +{ + if (testFunction1 (-100, 100) != testFunction2 (-100, 100)) + __builtin_abort (); + + return 0; +}