@@ -414,6 +414,29 @@ (define_mode_attr VM3_char [(V2DI "d")
;; VSX moves
+;; TImode memory to memory move optimization on LE with p8vector
+(define_insn_and_split "*vsx_le_mem_to_mem_mov_ti"
+ [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z")
+ (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))]
+ "!BYTES_BIG_ENDIAN
+ && TARGET_VSX
+ && !TARGET_P9_VECTOR
+ && !MEM_VOLATILE_P (operands[0])
+ && !MEM_VOLATILE_P (operands[1])
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx tmp = gen_reg_rtx (V2DImode);
+ rtx src = adjust_address (operands[1], V2DImode, 0);
+ emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src));
+ rtx dest = adjust_address (operands[0], V2DImode, 0);
+ emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp));
+ DONE;
+}
+ [(set_attr "length" "16")])
+
;; The patterns for LE permuted loads and stores come before the general
;; VSX moves so they match first.
(define_insn_and_split "*vsx_le_perm_load_<mode>"
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { has_arch_pwr8 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+/* Ensure 16-byte by pieces move is enabled. */
+
+void move1 (void *s1, void *s2)
+{
+ __builtin_memcpy (s1, s2, 16);
+}
+
+void move2 (void *s1)
+{
+ __builtin_memcpy (s1, "0123456789012345", 16);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mp?lxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvd2x\M|\mstxv\M} 2 } } */