===================================================================
@@ -1257,3 +1257,205 @@ (define_peephole2
"
operands[2] = GEN_INT (32 - INTVAL (operands[2]));
")
+
+(define_insn "*thumb2_ldrd"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py"))))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 4 "const_int_operand" "Py"))))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], operands[4], 1)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+ if (offset1 > offset2)
+ {
+ /* Swap the operands so that memory [base+offset1] is loaded into
+ operands[0]. */
+ rtx tmp = operands[0];
+ operands[0] = operands[1];
+ operands[1] = tmp;
+ tmp = operands[3];
+ operands[3] = operands[4];
+ operands[4] = tmp;
+ offset1 = INTVAL (operands[3]);
+ offset2 = INTVAL (operands[4]);
+ }
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ if (offset1 <= -256)
+ {
+ output_asm_insn (\"sub\\t%2, %2, %n3\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2, #4]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ }
+ else
+ {
+ output_asm_insn (\"ldr\\t%1, [%2, %4]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ }
+ return \"\";
+ }
+ else
+ return \"ldrd\\t%0, %1, [%2, %3]\";
+ }"
+)
+
+(define_insn "*thumb2_ldrd_reg1"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (match_operand:SI 2 "s_register_operand" "rk")))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 3 "const_int_operand" "Py"))))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], 0, operands[3], 1)"
+ "*
+ {
+ HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+ if (offset2 == 4)
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%0, %1, [%2]\";
+ }
+ else
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[1]))
+ {
+ output_asm_insn (\"ldr\\t%0, [%2]\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2, %3]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%1, %0, [%2, %3]\";
+ }
+ }"
+)
+
+(define_insn "*thumb2_ldrd_reg2"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py"))))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (mem:SI (match_dup 2)))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], 0, 1)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ if (offset1 == -4)
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[0]))
+ {
+ output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%0, %1, [%2, %3]\";
+ }
+ else
+ {
+ if (fix_cm3_ldrd && (operands[2] == operands[1]))
+ {
+ output_asm_insn (\"ldr\\t%0, [%2, %3]\", operands);
+ output_asm_insn (\"ldr\\t%1, [%2]\", operands);
+ return \"\";
+ }
+ return \"ldrd\\t%1, %0, [%2]\";
+ }
+ }"
+)
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))]
+ "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+ operands[2], operands[3], 1)"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))])]
+ ""
+)
+
+(define_insn "*thumb2_strd"
+ [(parallel [(set (mem:SI
+ (plus:SI (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 4 "const_int_operand" "Py")))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], operands[4], 0)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ HOST_WIDE_INT offset2 = INTVAL (operands[4]);
+ if (offset1 < offset2 )
+ return \"strd\\t%0, %1, [%2, %3]\";
+ else
+ return \"strd\\t%1, %0, [%2, %4]\";
+ }"
+)
+
+(define_insn "*thumb2_strd_reg1"
+ [(parallel [(set (mem:SI (match_operand:SI 2 "s_register_operand" "rk"))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 2)
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], 0, operands[3], 0)"
+ "*
+ {
+ HOST_WIDE_INT offset2 = INTVAL (operands[3]);
+ if (offset2 == 4)
+ return \"strd\\t%0, %1, [%2]\";
+ else
+ return \"strd\\t%1, %0, [%2, %3]\";
+ }"
+)
+
+(define_insn "*thumb2_strd_reg2"
+ [(parallel [(set (mem:SI (plus:SI
+ (match_operand:SI 2 "s_register_operand" "rk")
+ (match_operand:SI 3 "const_int_operand" "Py")))
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (mem:SI (match_dup 2))
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ "TARGET_THUMB2 && thumb2_check_ldrd_operands (operands[0], operands[1],
+ operands[2], operands[3], 0, 0)"
+ "*
+ {
+ HOST_WIDE_INT offset1 = INTVAL (operands[3]);
+ if (offset1 == -4)
+ return \"strd\\t%0, %1, [%2, %3]\";
+ else
+ return \"strd\\t%1, %0, [%2]\";
+ }"
+)
+
+(define_peephole2
+ [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))]
+ "TARGET_THUMB2 && thumb2_legitimate_ldrd_p (operands[0], operands[1],
+ operands[2], operands[3], 0)"
+ [(parallel [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))])]
+ ""
+)
===================================================================
@@ -22959,4 +22959,85 @@ arm_expand_sync (enum machine_mode mode,
}
}
+/* Check the legality of operands in an ldrd/strd instruction. */
+bool
+thumb2_check_ldrd_operands (rtx reg1, rtx reg2, rtx base,
+ rtx off1, rtx off2, bool ldrd)
+{
+ HOST_WIDE_INT offset1 = 0;
+ HOST_WIDE_INT offset2 = 0;
+
+ if (off1 != NULL)
+ offset1 = INTVAL (off1);
+ if (off2 != NULL)
+ offset2 = INTVAL (off2);
+
+ if (ldrd && (reg1 == reg2))
+ return false;
+
+ if ((offset1 + 4) == offset2)
+ return true;
+ if ((offset2 + 4) == offset1)
+ return true;
+
+ return false;
+}
+
+/* Check if the two memory accesses can be merged to an ldrd/strd instruction.
+ That is they use the same base register, and the gap between constant
+ offsets should be 4. */
+bool
+thumb2_legitimate_ldrd_p (rtx reg1, rtx reg2, rtx mem1, rtx mem2, bool ldrd)
+{
+ rtx base1, base2, op1;
+ rtx addr1 = XEXP (mem1, 0);
+ rtx addr2 = XEXP (mem2, 0);
+ HOST_WIDE_INT offset1 = 0;
+ HOST_WIDE_INT offset2 = 0;
+
+ if (REG_P (addr1))
+ base1 = addr1;
+ else if (GET_CODE (addr1) == PLUS)
+ {
+ base1 = XEXP (addr1, 0);
+ op1 = XEXP (addr1, 1);
+ if (!REG_P (base1) || (GET_CODE (op1) != CONST_INT))
+ return false;
+ offset1 = INTVAL (op1);
+ }
+ else
+ return false;
+
+ if (REG_P (addr2))
+ base2 = addr2;
+ else if (GET_CODE (addr2) == PLUS)
+ {
+ base2 = XEXP (addr2, 0);
+ op1 = XEXP (addr2, 1);
+ if (!REG_P (base2) || (GET_CODE (op1) != CONST_INT))
+ return false;
+ offset2 = INTVAL (op1);
+ }
+ else
+ return false;
+
+ if (base1 != base2)
+ return false;
+
+ if ((offset1 > 1024) || (offset1 < -1020) || ((offset1 & 3) != 0))
+ return false;
+ if ((offset2 > 1024) || (offset2 < -1020) || ((offset2 & 3) != 0))
+ return false;
+
+ if (ldrd && ((reg1 == reg2) || (reg1 == base1)))
+ return false;
+
+ if ((offset1 + 4) == offset2)
+ return true;
+ if ((offset2 + 4) == offset1)
+ return true;
+
+ return false;
+}
+
#include "gt-arm.h"
===================================================================
@@ -149,7 +149,8 @@ extern void arm_expand_sync (enum machin
extern const char *arm_output_memory_barrier (rtx *);
extern const char *arm_output_sync_insn (rtx, rtx *);
extern unsigned int arm_sync_loop_insns (rtx , rtx *);
-
+extern bool thumb2_check_ldrd_operands (rtx, rtx, rtx, rtx, rtx, bool);
+extern bool thumb2_legitimate_ldrd_p (rtx, rtx, rtx, rtx, bool);
extern bool arm_output_addr_const_extra (FILE *, rtx);
#if defined TREE_CODE
===================================================================
@@ -852,7 +852,7 @@ (define_insn "*ldm2_ia"
(set (match_operand:SI 2 "arm_hard_register_operand" "")
(mem:SI (plus:SI (match_dup 3)
(const_int 4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"ldm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
(set_attr "predicable" "yes")])
@@ -901,7 +901,7 @@ (define_insn "*stm2_ia"
(match_operand:SI 1 "arm_hard_register_operand" ""))
(set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
(match_operand:SI 2 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"stm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
(set_attr "predicable" "yes")])
@@ -1041,7 +1041,7 @@ (define_insn "*ldm2_db"
(set (match_operand:SI 2 "arm_hard_register_operand" "")
(mem:SI (plus:SI (match_dup 3)
(const_int -4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"ldm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
(set_attr "predicable" "yes")])
@@ -1067,7 +1067,7 @@ (define_insn "*stm2_db"
(match_operand:SI 1 "arm_hard_register_operand" ""))
(set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
(match_operand:SI 2 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
"stm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
(set_attr "predicable" "yes")])
===================================================================
@@ -31,7 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -189,6 +189,13 @@ (define_constraint "Px"
(and (match_code "const_int")
(match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+(define_constraint "Py"
+ "@internal In Thumb-2 state a constant that is a multiple of 4 in the
+ range -1020 to 1024"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1024
+ && (ival & 3) == 0")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
===================================================================
@@ -1,9 +1,9 @@
-/* { dg-options "-Os" } */
+/* { dg-options "-O2" } */
/* { dg-do compile } */
int bar(int* p)
{
- int x = p[0] + p[1];
+ int x = p[0] + p[1] + p[2];
return x;
}
===================================================================
@@ -5,6 +5,7 @@ void foo(int* p)
{
p[0] = 1;
p[1] = 0;
+ p[2] = 2;
}
/* { dg-final { scan-assembler "stm" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-final { scan-assembler "ldrd" } } */
+/* { dg-final { scan-assembler "strd" } } */
+
+struct S
+{
+ void* p1;
+ void* p2;
+ void* p3;
+ void* p4;
+};
+
+extern printf(char*, ...);
+
+void foo1(struct S* fp, struct S* otherSaveArea)
+{
+ struct S* saveA = fp - 1;
+ printf("StackSaveArea for fp %p [%p/%p]:\n", fp, saveA, otherSaveArea);
+ printf("prevFrame=%p savedPc=%p meth=%p curPc=%p fp[0]=0x%08x\n",
+ saveA->p1, saveA->p2, saveA->p3, saveA->p4, *(unsigned int*)fp);
+}