[ARM,1/n] PR53447: optimizations of 64bit ALU operation with constant

On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> In rtl expression, substract a constant c is expressed as add a value -c, so it
> is alse processed by adddi3, and I extend it more to handle a subtraction of
> 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
> represent substraction with 64bit constant while continue keeping the add rtl
> expression.
>

Sorry about the time it has taken to review this patch -Thanks for
tackling this but I'm not convinced that this patch is correct and
definitely can be more efficient.

The range of valid 64 bit constants allowed would be in my opinion are
the following- obtained by dividing the 64 bit constant into 2 32 bit
halves (upper32 and lower32 referred to as upper and lower below)

 arm_not_operand (upper) && arm_add_operand (lower) which boils down
to the valid combination of

  adds lo : adc hi - both positive constants.
  adds lo ; sbc hi  - lower positive, upper negative
  subs lo ; sbc hi - lower negative, upper negative
  subs lo ; adc hi  - lower negative, upper positive

Therefore I'd do the following -

* Don't make *arm_adddi3 a named pattern - we don't need that.
* Change the *addsi3_carryin_<optab> pattern to be something like this :

* I'd like a new const_ok_for_dimode_op function that dealt with each
of these operations, thus your plus operation with a DImode constant
would just be a check similar to what I've said above.
* You then don't need the new subdi3_immediate pattern and the split
can happen after reload. Adjust predicates and constraints
accordingly, delete it. Also please use CONST_INT_P instead of
GET_CODE (x) == CONST_INT in your patch.

regards,
Ramana

> Tested on arm qemu with both arm/thumb modes. OK for trunk?
>
> thanks
> Carrot
>
>
> 2012-06-08  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>        * gcc.target/arm/pr53447-5.c: New testcase.
>
>
> 2012-06-08  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/constraints.md (Dd): New constraint.
>        * config/arm/predicates.md (arm_neg_immediate_di_operand): New
>        predicate.
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_subdi3_immediate): New insn pattern.
>        (arm_adddi3): Extend it to handle constants.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: testsuite/gcc.target/arm/pr53447-5.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-5.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-5.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p -= 0x100000008;
> +}
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "arm_di_operand"     "w,r,0,w,r,Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,8,8,8")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/constraints.md
> ===================================================================
> --- config/arm/constraints.md   (revision 187751)
> +++ config/arm/constraints.md   (working copy)
> @@ -29,7 +29,7 @@
>  ;; in Thumb-1 state: I, J, K, L, M, N, O
>
>  ;; The following multi-letter normal constraints have been used:
> -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
> +;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
>  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
>  ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
>
> @@ -251,6 +251,13 @@
>       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
>                   && !(optimize_size || arm_ld_sched)")))
>
> +(define_constraint "Dd"
> + "@internal
> +  In ARM/Thumb-2 state a const_int whose negative value satisfy constraint
> +  Di."
> + (and (match_code "const_int")
> +      (match_test "TARGET_32BIT && arm_const_double_by_immediates
> (GEN_INT (-ival))")))
> +
>  (define_constraint "Di"
>  "@internal
>   In ARM/Thumb-2 state a const_int or const_double where both the high
> Index: config/arm/predicates.md
> ===================================================================
> --- config/arm/predicates.md    (revision 187751)
> +++ config/arm/predicates.md    (working copy)
> @@ -117,6 +117,10 @@
>   (and (match_code "const_int,const_double")
>        (match_test "arm_const_double_by_immediates (op)")))
>
> +(define_predicate "arm_neg_immediate_di_operand"
> +  (and (match_code "const_int")
> +       (match_test "arm_const_double_by_immediates (GEN_INT (-INTVAL (op)))")))
> +
>  (define_predicate "arm_neg_immediate_operand"
>   (and (match_code "const_int")
>        (match_test "const_ok_for_arm (-INTVAL (op))")))
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,10 +574,29 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      rtx neg_val = GEN_INT (-INTVAL (operands[2]));
> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
> +       {
> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
> +         DONE;
> +       }
> +      else if (TARGET_32BIT && arm_const_double_by_immediates (neg_val))
> +       {
> +         emit_insn (gen_arm_subdi3_immediate (operands[0],
> +                                              operands[1],
> +                                              operands[2]));
> +         DONE;
> +       }
> +      else
> +       operands[2] = force_reg (DImode, operands[2]);
> +    }
> +
>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>     {
>       if (!cirrus_fp_register (operands[0], DImode))
> @@ -609,10 +628,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "arm_di_operand"     "r,  0, r, Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +649,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }
>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]
> @@ -1122,6 +1150,25 @@
>    (set_attr "length" "8")]
>  )
>
> +(define_insn "arm_subdi3_immediate"
> +  [(set (match_operand:DI          0 "s_register_operand"           "=&r,&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand"           "0, r")
> +                 (match_operand:DI 2 "arm_neg_immediate_di_operand" "Dd,Dd")))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_32BIT"
> +  "*
> +  {
> +    HOST_WIDE_INT v = -INTVAL (operands[2]);
> +    operands[3] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +    operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +    output_asm_insn (\"subs\\t%Q0, %Q1, %2\", operands);
> +    output_asm_insn (\"sbc\\t%R0, %R1, %3\", operands);
> +    return \"\";
> +  }"
> +  [(set_attr "conds" "clob")
> +   (set_attr "length" "8")]
> +)
> +
>  (define_insn "*thumb_subdi3"
>   [(set (match_operand:DI           0 "register_operand" "=l")
>        (minus:DI (match_operand:DI 1 "register_operand"  "0")
>
>
>
> On Wed, Jun 6, 2012 at 7:16 PM, Carrot Wei <carrot@google.com> wrote:
>> In the original patch, if "add r0, c" is not possible, but "sub r0,
>> -c" is possible, it will use the sub instruction. Although they
>> generate same result, but they may generate different CF flag, and
>> cause subsequent adc to compute out wrong result. So I updated the
>> patch to avoid using sub instruction.
>>
>> Tested on arm qemu with both arm/thumb mode.
>>
>> thanks
>> Carrot
>>
>>
>> 2012-06-06  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/53447
>>        * gcc.target/arm/pr53447-1.c: New testcase.
>>
>>
>> 2012-06-06  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/53447
>>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>>        (arm_adddi3): Likewise.
>>        * config/arm/neon.md (adddi3_neon): Likewise.
>>
>>
>> Index: testsuite/gcc.target/arm/pr53447-1.c
>> ===================================================================
>> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
>> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
>> @@ -0,0 +1,8 @@
>> +/* { dg-options "-O2" }  */
>> +/* { dg-require-effective-target arm32 } */
>> +/* { dg-final { scan-assembler-not "mov" } } */
>> +
>> +void t0p(long long * p)
>> +{
>> +  *p += 0x100000001;
>> +}
>> Index: config/arm/neon.md
>> ===================================================================
>> --- config/arm/neon.md  (revision 187751)
>> +++ config/arm/neon.md  (working copy)
>> @@ -588,9 +588,9 @@
>>  )
>>
>>  (define_insn "adddi3_neon"
>> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
>> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
>> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
>> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
>> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
>> +                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Di,Di")))
>>    (clobber (reg:CC CC_REGNUM))]
>>   "TARGET_NEON"
>>  {
>> @@ -600,13 +600,16 @@
>>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>>     case 1: return "#";
>>     case 2: return "#";
>> +    case 4: return "#";
>> +    case 5: return "#";
>> +    case 6: return "#";
>>     default: gcc_unreachable ();
>>     }
>>  }
>> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
>> -   (set_attr "conds" "*,clob,clob,*")
>> -   (set_attr "length" "*,8,8,*")
>> -   (set_attr "arch" "nota8,*,*,onlya8")]
>> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
>> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
>> +   (set_attr "length" "*,8,8,*,8,8,8")
>> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>>  )
>>
>>  (define_insn "*sub<mode>3_neon"
>> Index: config/arm/arm.md
>> ===================================================================
>> --- config/arm/arm.md   (revision 187751)
>> +++ config/arm/arm.md   (working copy)
>> @@ -574,10 +574,21 @@
>>  [(parallel
>>    [(set (match_operand:DI           0 "s_register_operand" "")
>>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
>> -                  (match_operand:DI 2 "s_register_operand" "")))
>> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>>     (clobber (reg:CC CC_REGNUM))])]
>>   "TARGET_EITHER"
>>   "
>> +  if (GET_CODE (operands[2]) == CONST_INT)
>> +    {
>> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
>> +       {
>> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
>> +         DONE;
>> +       }
>> +      else
>> +       operands[2] = force_reg (DImode, operands[2]);
>> +    }
>> +
>>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>>     {
>>       if (!cirrus_fp_register (operands[0], DImode))
>> @@ -609,10 +620,10 @@
>>   [(set_attr "length" "4")]
>>  )
>>
>> -(define_insn_and_split "*arm_adddi3"
>> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
>> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
>> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
>> +(define_insn_and_split "arm_adddi3"
>> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
>> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
>> +                (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Di,Di")))
>>    (clobber (reg:CC CC_REGNUM))]
>>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>>   "#"
>> @@ -630,8 +641,17 @@
>>     operands[0] = gen_lowpart (SImode, operands[0]);
>>     operands[4] = gen_highpart (SImode, operands[1]);
>>     operands[1] = gen_lowpart (SImode, operands[1]);
>> -    operands[5] = gen_highpart (SImode, operands[2]);
>> -    operands[2] = gen_lowpart (SImode, operands[2]);
>> +    if (GET_CODE (operands[2]) == CONST_INT)
>> +      {
>> +       HOST_WIDE_INT v = INTVAL (operands[2]);
>> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
>> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
>> +      }
>> +    else
>> +      {
>> +       operands[5] = gen_highpart (SImode, operands[2]);
>> +       operands[2] = gen_lowpart (SImode, operands[2]);
>> +      }
>>   }"
>>   [(set_attr "conds" "clob")
>>    (set_attr "length" "8")]
>>

[ARM,1/n] PR53447: optimizations of 64bit ALU operation with constant

Commit Message

Comments

Patch