diff mbox

[3/3,AArch64] Improve zero extend

Message ID HE1PR0801MB1482CBB48F683DD49784CD3283300@HE1PR0801MB1482.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra July 19, 2016, 3:32 p.m. UTC
On AArch64 the UXTB and UXTH instructions are aliases of UBFM,
which does a shift as part of its operation. An AND immediate is a
simpler operation, and might be faster on some implementations, so it is
better to emit this this instead of UBFM.

Benchmarking showed no difference on implementations where UBFM has
the same performance as AND, and minor speedups across several
benchmarks on an implementation where UBFM is slower than AND.

Bootstrapped and tested on aarch64-none-elf.

2016-07-19  Kristina Martsenko  <kristina.martsenko@arm.com>
2016-07-19  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.md
        (zero_extend<SHORT:mode><GPI:mode>2_aarch64): Change output
        statement and type.
        (<optab>qihi2_aarch64): Likewise, and split into two.
        (extendqihi2_aarch64): New.
        (zero_extendqihi2_aarch64): New.
        * config/aarch64/iterators.md (ldrxt): Remove.
        * config/aarch64/aarch64.c (aarch64_rtx_costs): Change cost of
        uxtb/uxth.
---

Comments

Richard Earnshaw (lists) July 20, 2016, 9:37 a.m. UTC | #1
On 19/07/16 16:32, Wilco Dijkstra wrote:
> On AArch64 the UXTB and UXTH instructions are aliases of UBFM,
> which does a shift as part of its operation. An AND immediate is a
> simpler operation, and might be faster on some implementations, so it is
> better to emit this this instead of UBFM.
> 
> Benchmarking showed no difference on implementations where UBFM has
> the same performance as AND, and minor speedups across several
> benchmarks on an implementation where UBFM is slower than AND.
> 
> Bootstrapped and tested on aarch64-none-elf.
> 
> 2016-07-19  Kristina Martsenko  <kristina.martsenko@arm.com>
> 2016-07-19  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         * config/aarch64/aarch64.md
>         (zero_extend<SHORT:mode><GPI:mode>2_aarch64): Change output
>         statement and type.
>         (<optab>qihi2_aarch64): Likewise, and split into two.
>         (extendqihi2_aarch64): New.
>         (zero_extendqihi2_aarch64): New.
>         * config/aarch64/iterators.md (ldrxt): Remove.
>         * config/aarch64/aarch64.c (aarch64_rtx_costs): Change cost of
>         uxtb/uxth.

This looks sensible to me, but please wait 24 hours before committing,
just to allow the other folk with different implementations to comment
if they wish.

R.

> ---
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c7249e8e98905bea4879bb2e2ee81d51a1004faa..e98e41521bfa8f807248b0147843de9e1f3447e3 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -6886,8 +6886,8 @@ cost_plus:
>  	    }
>  	  else
>  	    {
> -	      /* UXTB/UXTH.  */
> -	      *cost += extra_cost->alu.extend;
> +	      /* We generate an AND instead of UXTB/UXTH.  */
> +	      *cost += extra_cost->alu.logical;
>  	    }
>  	}
>        return false;
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 64f9ca1c4d1bec64cef769c9dbef9e4b5b00ba9e..5e8b1a815515eabc7e69c75574c2c300f50a6fe4 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1580,10 +1580,10 @@
>          (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
>    ""
>    "@
> -   uxt<SHORT:size>\t%<GPI:w>0, %w1
> +   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
>     ldr<SHORT:size>\t%w0, %1
>     ldr\t%<SHORT:size>0, %1"
> -  [(set_attr "type" "extend,load1,load1")]
> +  [(set_attr "type" "logic_imm,load1,load1")]
>  )
>  
>  (define_expand "<optab>qihi2"
> @@ -1592,16 +1592,26 @@
>    ""
>  )
>  
> -(define_insn "*<optab>qihi2_aarch64"
> +(define_insn "*extendqihi2_aarch64"
>    [(set (match_operand:HI 0 "register_operand" "=r,r")
> -        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
> +	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
>    ""
>    "@
> -   <su>xtb\t%w0, %w1
> -   <ldrxt>b\t%w0, %1"
> +   sxtb\t%w0, %w1
> +   ldrsb\t%w0, %1"
>    [(set_attr "type" "extend,load1")]
>  )
>  
> +(define_insn "*zero_extendqihi2_aarch64"
> +  [(set (match_operand:HI 0 "register_operand" "=r,r")
> +	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
> +  ""
> +  "@
> +   and\t%w0, %w1, 255
> +   ldrb\t%w0, %1"
> +  [(set_attr "type" "logic_imm,load1")]
> +)
> +
>  ;; -------------------------------------------------------------------
>  ;; Simple arithmetic
>  ;; -------------------------------------------------------------------
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index e8fbb1281dec2e8f37f58ef2ced792dd62e3b5aa..ef48ffda6f98a2d4aa29daaca206fef2bafcda48 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -888,9 +888,6 @@
>  ;; Similar, but when not(op)
>  (define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")])
>  
> -;; Sign- or zero-extending load
> -(define_code_attr ldrxt [(sign_extend "ldrs") (zero_extend "ldr")])
> -
>  ;; Sign- or zero-extending data-op
>  (define_code_attr su [(sign_extend "s") (zero_extend "u")
>  		      (sign_extract "s") (zero_extract "u")
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c7249e8e98905bea4879bb2e2ee81d51a1004faa..e98e41521bfa8f807248b0147843de9e1f3447e3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6886,8 +6886,8 @@  cost_plus:
 	    }
 	  else
 	    {
-	      /* UXTB/UXTH.  */
-	      *cost += extra_cost->alu.extend;
+	      /* We generate an AND instead of UXTB/UXTH.  */
+	      *cost += extra_cost->alu.logical;
 	    }
 	}
       return false;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 64f9ca1c4d1bec64cef769c9dbef9e4b5b00ba9e..5e8b1a815515eabc7e69c75574c2c300f50a6fe4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1580,10 +1580,10 @@ 
         (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))]
   ""
   "@
-   uxt<SHORT:size>\t%<GPI:w>0, %w1
+   and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask>
    ldr<SHORT:size>\t%w0, %1
    ldr\t%<SHORT:size>0, %1"
-  [(set_attr "type" "extend,load1,load1")]
+  [(set_attr "type" "logic_imm,load1,load1")]
 )
 
 (define_expand "<optab>qihi2"
@@ -1592,16 +1592,26 @@ 
   ""
 )
 
-(define_insn "*<optab>qihi2_aarch64"
+(define_insn "*extendqihi2_aarch64"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
-        (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
   ""
   "@
-   <su>xtb\t%w0, %w1
-   <ldrxt>b\t%w0, %1"
+   sxtb\t%w0, %w1
+   ldrsb\t%w0, %1"
   [(set_attr "type" "extend,load1")]
 )
 
+(define_insn "*zero_extendqihi2_aarch64"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   and\t%w0, %w1, 255
+   ldrb\t%w0, %1"
+  [(set_attr "type" "logic_imm,load1")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Simple arithmetic
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e8fbb1281dec2e8f37f58ef2ced792dd62e3b5aa..ef48ffda6f98a2d4aa29daaca206fef2bafcda48 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -888,9 +888,6 @@ 
 ;; Similar, but when not(op)
 (define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")])
 
-;; Sign- or zero-extending load
-(define_code_attr ldrxt [(sign_extend "ldrs") (zero_extend "ldr")])
-
 ;; Sign- or zero-extending data-op
 (define_code_attr su [(sign_extend "s") (zero_extend "u")
 		      (sign_extract "s") (zero_extract "u")