diff mbox

[ARM] Tie operand 1 to operand 0 in AESMC pattern when fusing AES/AESMC

Message ID 573EE122.9030002@foss.arm.com
State New
Headers show

Commit Message

Kyrill Tkachov May 20, 2016, 10:04 a.m. UTC
Hi all,

The recent -frename-registers change exposed a deficiency in the way we fuse AESE/AESMC instruction
pairs in arm.

Basically we want to enforce:
     AESE Vn, _
     AESMC Vn, Vn

to enable the fusion, but regrename comes along and renames the output Vn register in AESMC to something
else, killing the fusion in the hardware.

The solution in this patch is to add an alternative that ties the input and output registers in the AESMC pattern
and enable that alternative when the fusion is enabled.

With this patch I've confirmed that the above preferred register sequence is kept even with -frename-registers
when tuning for a cpu that enables the fusion and that the chain is broken by regrename otherwise and have
seen the appropriate improvement in a proprietary benchmark (that I cannot name) that exercises this sequence.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill


2016-05-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/arm/arm.c (arm_fusion_enabled_p): New function.
     * config/arm/arm-protos.h (arm_fusion_enabled_p): Declare prototype.
     * config/arm/crypto.md (crypto_<crypto_pattern>, CRYPTO_UNARY):
     Add "=w,0" alternative.  Enable it when AES/AESMC fusion is enabled.
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index cf221d6793eaf0959f2713fe0903a5d8602ec2f4..253f14be5c8266a8b305988d0e145e4b4742f256 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -131,6 +131,7 @@  extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);
 extern void arm_emit_call_insn (rtx, rtx, bool);
+extern bool arm_fusion_enabled_p (unsigned int);
 extern const char *output_call (rtx *);
 void arm_emit_movpair (rtx, rtx);
 extern const char *output_mov_long_double_arm_from_arm (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index e2dc592de1974abf3fc03c8a7908dd204512f936..2cc7f7b452a62f898346a51ca7ede0d19bcfcfad 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -29642,6 +29642,13 @@  aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
   return false;
 }
 
+/* Return true iff the instruction fusion described by OP is enabled.  */
+bool
+arm_fusion_enabled_p (unsigned int op)
+{
+  return current_tune->fusible_ops & op;
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md
index c6f17270b1dbaf6dc43eb1e9b8a182dbb0f5a1e1..0f510f069408471fcbf6751f161e984f39929813 100644
--- a/gcc/config/arm/crypto.md
+++ b/gcc/config/arm/crypto.md
@@ -18,14 +18,27 @@ 
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
+
+;; When AES/AESMC fusion is enabled we want the register allocation to
+;; look like:
+;;    AESE Vn, _
+;;    AESMC Vn, Vn
+;; So prefer to tie operand 1 to operand 0 when fusing.
+
 (define_insn "crypto_<crypto_pattern>"
-  [(set (match_operand:<crypto_mode> 0 "register_operand" "=w")
+  [(set (match_operand:<crypto_mode> 0 "register_operand" "=w,w")
         (unspec:<crypto_mode> [(match_operand:<crypto_mode> 1
-                       "register_operand" "w")]
+                       "register_operand" "0,w")]
          CRYPTO_UNARY))]
   "TARGET_CRYPTO"
   "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q1"
-  [(set_attr "type" "<crypto_type>")]
+  [(set_attr "type" "<crypto_type>")
+   (set_attr_alternative "enabled"
+     [(if_then_else (match_test
+		       "arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)")
+		     (const_string "yes" )
+		     (const_string "no"))
+      (const_string "yes")])]
 )
 
 (define_insn "crypto_<crypto_pattern>"