* doc/tm.texi.in: Add hook TARGET_CANNOT_RELOAD_MEM_ADDRESS.
* doc/tm.texi: Regenerate.
* doc/invoke.texi: Document ARM -munaligned-access.
* targhooks.c (default_cannot_reload_mem_address): New.
* targhooks.h ( default_cannot_reload_mem_address): Declare.
* target.def (cannot_reload_mem_address): New hook.
* reload.c (find_reloads): Don't try to reload memory address
if targetm.cannot_reload_mem_address returns true.
* config/arm/arm.c (arm_cannot_reload_mem_address): Declare.
(arm_emit): Declare.
(TARGET_CANNOT_RELOAD_MEM_ADDRESS): Define.
(arm_option_override): Handle -munaligned-access option.
(neon_struct_mem_operand): return FALSE if the operand is not
word aligned.
(adjacent_mem_locations): return 0 if one of the memory
locations is not word aligned.
(load_multiple_sequence): Can't use load multiple instruction
if memory is not word aligned.
(store_multiple_sequence): Likewise.
(arm_reorg): All constants in minipool are word aligned.
(output_move_double_1): New.
(output_move_double): Use output_move_double_1.
(move_double_length): New.
(neon_emit_pair_result_insn): Set memory alignment.
(arm_file_start): Output Tag_CPU_unaligned_access.
(arm_cannot_reload_mem_address): New.
* config/arm/arm.h (STRICT_ALIGNMENT): Depend on
unaligned_access.
(SLOW_UNALIGNED_ACCESS): Remove.
* config/arm/arm-protos.h (move_double_length): Declare.
* config/arm/arm.opt (munaligned-access): New option.
* config/arm/vfp.md (arm_movdi_vfp): Call move_double_length
to calculate length attribute for alternatives using
output_move_double.
(thumb2_movdi_vfp): Likewise.
(movdf_vfp): Likewise.
(thumb2_movdf_vfp): Likewise.
* config/arm/neon.md (neon_mov<mode>): Likewise.
* config/arm/iwmmxt.md (iwmmxt_arm_movdi): Likewise.
(mov<mode>_internal): Likewise.
* config/arm/arm.md (arm_movdi): Likewise.
(movdf_soft_insn): Likewise.
* config/arm/fpa.md (movdf_fpa): Likewise.
(thumb2_movdf_fpa): Likewise.
* config/arm/cirrus.md (cirrus_arm_movdi): Likewise.
(cirrus_movdf_hard_insn): Likewise.
(cirrus_thumb2_movdi): Likewise.
(thumb2_cirrus_movdf_hard_insn): Likewise.
testsuite/
* gcc.target/arm/unaligned.c: New test.
===================================================================
@@ -2763,6 +2763,10 @@ intermediate storage. This case often o
general registers.
@end defmac
+@deftypefn {Target Hook} bool TARGET_CANNOT_RELOAD_MEM_ADDRESS (rtx @var{mem}, const char *@var{p})
+A target hook that return true if @var{mem} cannot be reloaded by reloading its address to satisfy the constraint pointed by @var{p}.
+@end deftypefn
+
@defmac SECONDARY_MEMORY_NEEDED (@var{class1}, @var{class2}, @var{m})
Certain machines have the property that some registers cannot be copied
to some other registers without using memory. Define this macro on
===================================================================
@@ -2751,6 +2751,8 @@ intermediate storage. This case often o
general registers.
@end defmac
+@hook TARGET_CANNOT_RELOAD_MEM_ADDRESS
+
@defmac SECONDARY_MEMORY_NEEDED (@var{class1}, @var{class2}, @var{m})
Certain machines have the property that some registers cannot be copied
to some other registers without using memory. Define this macro on
===================================================================
@@ -474,7 +474,8 @@ Objective-C and Objective-C++ Dialects}.
-mcaller-super-interworking -mcallee-super-interworking @gol
-mtp=@var{name} @gol
-mword-relocations @gol
--mfix-cortex-m3-ldrd}
+-mfix-cortex-m3-ldrd @gol
+-munaligned-access}
@emph{AVR Options}
@gccoptlist{-mmcu=@var{mcu} -mno-interrupts @gol
@@ -10376,6 +10377,12 @@ with overlapping destination and base re
generating these instructions. This option is enabled by default when
@option{-mcpu=cortex-m3} is specified.
+@item -munaligned-access
+@opindex munaligned-access
+Enable unaligned access for data memory. It's enabled by default for
+all ARMv6 architecture-based processors and ARMv7-A, ARMv7-R, ARMv7-M
+architecture-based processors. It's disabled by default for all pre-ARMv6
+architecture-based processors and ARMv6-M architecture-based processors.
@end table
@node AVR Options
===================================================================
@@ -955,6 +955,13 @@ default_secondary_reload (bool in_p ATTR
}
bool
+default_cannot_reload_mem_address (rtx mem ATTRIBUTE_UNUSED,
+ const char *p ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
+bool
default_handle_c_option (size_t code ATTRIBUTE_UNUSED,
const char *arg ATTRIBUTE_UNUSED,
int value ATTRIBUTE_UNUSED)
===================================================================
@@ -130,6 +130,7 @@ extern const reg_class_t *default_ira_co
extern reg_class_t default_secondary_reload (bool, rtx, reg_class_t,
enum machine_mode,
secondary_reload_info *);
+extern bool default_cannot_reload_mem_address (rtx, const char *);
extern void default_target_option_override (void);
extern void hook_void_bitmap (bitmap);
extern bool default_handle_c_option (size_t, const char *, int);
===================================================================
@@ -2243,6 +2243,14 @@ DEFHOOK
secondary_reload_info *sri),
default_secondary_reload)
+DEFHOOK
+(cannot_reload_mem_address,
+ "A target hook that return true if @var{mem} cannot be reloaded\
+ by reloading its address to satisfy the constraint pointed by @var{p}.",
+ bool,
+ (rtx mem, const char *p),
+ default_cannot_reload_mem_address)
+
/* Given an rtx X being reloaded into a reg required to be in class CLASS,
return the class of reg to actually use. */
DEFHOOK
===================================================================
@@ -3308,6 +3308,12 @@ find_reloads (rtx insn, int replace, int
&& offsettable_memref_p (reg_equiv_mem[REGNO (operand)]))
|| (reg_equiv_address[REGNO (operand)] != 0))))
win = 1;
+ else if (MEM_P (operand)
+ && targetm.cannot_reload_mem_address (operand, p))
+ {
+ badop = 1;
+ break;
+ }
if (CONST_POOL_OK_P (operand)
|| MEM_P (operand))
badop = 0;
@@ -3420,7 +3426,12 @@ find_reloads (rtx insn, int replace, int
&& EXTRA_CONSTRAINT_STR (reg_equiv_mem[REGNO (operand)], c, p))
|| (reg_equiv_address[REGNO (operand)] != 0)))
win = 1;
-
+ else if (MEM_P (operand)
+ && targetm.cannot_reload_mem_address (operand, p))
+ {
+ badop = 1;
+ break;
+ }
/* If we didn't already win, we can reload
constants via force_const_mem, and other
MEMs by reloading the address like for 'o'. */
===================================================================
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -munaligned-access" } */
+
+struct bar
+{
+ char c;
+ int i;
+ short s;
+ char c2;
+} __attribute__((packed));
+
+foo (struct bar *p)
+{
+ return p->i;
+}
+
+/* { dg-final { scan-assembler-times "ldr" 1 } } */
===================================================================
@@ -250,6 +250,8 @@ static bool arm_builtin_support_vector_m
bool is_packed);
static void arm_conditional_register_usage (void);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static bool arm_cannot_reload_mem_address (rtx, const char *);
+static void arm_emit (int, const char *, rtx *);
/* Table of machine attributes. */
@@ -591,6 +593,10 @@ static const struct default_options arm_
#define TARGET_PREFERRED_RENAME_CLASS \
arm_preferred_rename_class
+#undef TARGET_CANNOT_RELOAD_MEM_ADDRESS
+#define TARGET_CANNOT_RELOAD_MEM_ADDRESS \
+ arm_cannot_reload_mem_address
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -1965,6 +1971,22 @@ arm_option_override (void)
fix_cm3_ldrd = 0;
}
+ /* Enable -munaligned-access by default for
+ - all ARMv6 architecture-based processors
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+ Disable -munaligned-access by default for
+ - all pre-ARMv6 architecture-based processors
+ - ARMv6-M architecture-based processors. */
+
+ if (unaligned_access == 2)
+ {
+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
+ unaligned_access = 1;
+ else
+ unaligned_access = 0;
+ }
+
if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
@@ -9041,6 +9063,9 @@ neon_struct_mem_operand (rtx op)
if (GET_CODE (op) != MEM)
return FALSE;
+ if (MEM_ALIGN (op) < 32)
+ return FALSE;
+
ind = XEXP (op, 0);
if (reload_completed
@@ -9409,6 +9434,10 @@ adjacent_mem_locations (rtx a, rtx b)
if (volatile_refs_p (a) || volatile_refs_p (b))
return 0;
+ /* Adjacent memory locations are only useful when they are word aligned. */
+ if (MEM_ALIGN (a) < 32 || MEM_ALIGN (b) < 32)
+ return 0;
+
if ((GET_CODE (XEXP (a, 0)) == REG
|| (GET_CODE (XEXP (a, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
@@ -9632,6 +9661,11 @@ load_multiple_sequence (rtx *operands, i
if (MEM_VOLATILE_P (operands[nops + i]))
return 0;
+ /* Load-multiple instructions requires the memory references to
+ be word aligned. */
+ if (MEM_ALIGN (operands[nops + i]) < 32)
+ return 0;
+
offset = const0_rtx;
if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
@@ -9782,6 +9816,11 @@ store_multiple_sequence (rtx *operands,
if (MEM_VOLATILE_P (operands[nops + i]))
return 0;
+ /* Store-multiple instructions requires the memory references to
+ be word aligned. */
+ if (MEM_ALIGN (operands[nops + i]) < 32)
+ return 0;
+
offset = const0_rtx;
if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
@@ -12448,6 +12487,8 @@ arm_reorg (void)
minipool_vector_label),
this_fix->minipool->offset);
*this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
+ /* All constants in minipool are aligned on BITS_PER_WORD. */
+ set_mem_align (*this_fix->loc, BITS_PER_WORD);
}
dump_minipool (last_barrier->insn);
@@ -12907,14 +12948,27 @@ output_mov_double_arm_from_fpa (rtx *ope
return "";
}
+#define output_asm_insn(pattern, operands) \
+ do \
+ { \
+ if (lengthp == NULL) \
+ arm_emit (0, pattern, operands); \
+ else \
+ length += 4; \
+ } \
+ while (0)
+
/* Output a move between double words. It must be REG<-MEM
or MEM<-REG. */
-const char *
-output_move_double (rtx *operands)
+static const char *
+output_move_double_1 (rtx *operands, unsigned int *lengthp)
{
enum rtx_code code0 = GET_CODE (operands[0]);
enum rtx_code code1 = GET_CODE (operands[1]);
rtx otherops[3];
+ bool unaligned;
+ bool use_ldrd;
+ unsigned int length = 0;
if (code0 == REG)
{
@@ -12924,11 +12978,30 @@ output_move_double (rtx *operands)
gcc_assert (code1 == MEM); /* Constraints should ensure this. */
+ unaligned = MEM_ALIGN (operands[1]) < 32 ? true : false;
+ use_ldrd = (TARGET_LDRD && (reg0 & 1) == 0) ? true : false;
+
switch (GET_CODE (XEXP (operands[1], 0)))
{
case REG:
- if (TARGET_LDRD
- && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
+ if (unaligned)
+ {
+ otherops[1] = adjust_address (operands[1], SImode, 4);
+
+ /* Take care of overlapping base/data reg. */
+ if (reg_mentioned_p (operands[0], operands[1]))
+ {
+ output_asm_insn ("ldr%?\t%0, %1", otherops);
+ output_asm_insn ("ldr%?\t%0, [%m1]", operands);
+ }
+ else
+ {
+ output_asm_insn ("ldr%?\t%0, [%m1]", operands);
+ output_asm_insn ("ldr%?\t%0, %1", otherops);
+ }
+ }
+ else if (use_ldrd
+ && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
else
output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
@@ -12936,18 +13009,34 @@ output_move_double (rtx *operands)
case PRE_INC:
gcc_assert (TARGET_LDRD);
- output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
+ if (use_ldrd && !unaligned)
+ output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
+ else
+ {
+ output_asm_insn ("ldr%?\t%0, [%m1, #8]!", operands);
+ output_asm_insn ("ldr%?\t%H0, [%m1, #4]", operands);
+ }
break;
case PRE_DEC:
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("ldr%?\t%0, [%m1, #-8]!", operands);
+ output_asm_insn ("ldr%?\t%H0, [%m1, #4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
else
output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
break;
case POST_INC:
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("ldr%?\t%0, [%m1], #8", operands);
+ output_asm_insn ("ldr%?\t%H0, [%m1, #-4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
else
output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
@@ -12955,12 +13044,18 @@ output_move_double (rtx *operands)
case POST_DEC:
gcc_assert (TARGET_LDRD);
- output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
+ if (use_ldrd && !unaligned)
+ output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
+ else
+ {
+ output_asm_insn ("ldr%?\t%0, [%m1], #-8", operands);
+ output_asm_insn ("ldr%?\t%H0, [%m1, #4]", operands);
+ }
break;
case PRE_MODIFY:
case POST_MODIFY:
- /* Autoicrement addressing modes should never have overlapping
+ /* Autoincrement addressing modes should never have overlapping
base and destination registers, and overlapping index registers
are already prohibited, so this doesn't need to worry about
fix_cm3_ldrd. */
@@ -12974,17 +13069,24 @@ output_move_double (rtx *operands)
{
/* Registers overlap so split out the increment. */
output_asm_insn ("add%?\t%1, %1, %2", otherops);
- output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
+ if (unaligned)
+ {
+ output_asm_insn ("ldr%?\t%0, [%1]", otherops);
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+ }
+ else
+ output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
}
else
{
/* Use a single insn if we can.
FIXME: IWMMXT allows offsets larger than ldrd can
handle, fix these up with a pair of ldr. */
- if (TARGET_THUMB2
- || GET_CODE (otherops[2]) != CONST_INT
- || (INTVAL (otherops[2]) > -256
- && INTVAL (otherops[2]) < 256))
+ if ((TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ && !unaligned)
output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
else
{
@@ -12998,10 +13100,11 @@ output_move_double (rtx *operands)
/* Use a single insn if we can.
FIXME: IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (TARGET_THUMB2
- || GET_CODE (otherops[2]) != CONST_INT
- || (INTVAL (otherops[2]) > -256
- && INTVAL (otherops[2]) < 256))
+ if ((TARGET_THUMB2
+ || GET_CODE (otherops[2]) != CONST_INT
+ || (INTVAL (otherops[2]) > -256
+ && INTVAL (otherops[2]) < 256))
+ && !unaligned)
output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
else
{
@@ -13021,7 +13124,12 @@ output_move_double (rtx *operands)
otherops[1] = operands[1];
output_asm_insn ("adr%?\t%0, %1", otherops);
operands[1] = otherops[0];
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("ldr%?\t%0, [%1]", operands);
+ output_asm_insn ("ldr%?\t%H0, [%1, #4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
else
output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
@@ -13029,8 +13137,9 @@ output_move_double (rtx *operands)
/* ??? This needs checking for thumb2. */
default:
- if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
- GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
+ if (!unaligned
+ && arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
+ GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
{
otherops[0] = operands[0];
otherops[1] = XEXP (XEXP (operands[1], 0), 0);
@@ -13038,28 +13147,28 @@ output_move_double (rtx *operands)
if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
{
- if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
+ if (GET_CODE (otherops[2]) == CONST_INT && !use_ldrd)
{
switch ((int) INTVAL (otherops[2]))
{
case -8:
output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
- return "";
+ goto done;
case -4:
if (TARGET_THUMB2)
break;
output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
- return "";
+ goto done;
case 4:
if (TARGET_THUMB2)
break;
output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
- return "";
+ goto done;
}
}
otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
operands[1] = otherops[0];
- if (TARGET_LDRD
+ if (use_ldrd
&& (GET_CODE (otherops[2]) == REG
|| TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
@@ -13089,7 +13198,7 @@ output_move_double (rtx *operands)
otherops[0] = operands[0];
output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
}
- return "";
+ goto done;
}
if (GET_CODE (otherops[2]) == CONST_INT)
@@ -13105,38 +13214,105 @@ output_move_double (rtx *operands)
else
output_asm_insn ("sub%?\t%0, %1, %2", otherops);
- if (TARGET_LDRD)
- return "ldr%(d%)\t%0, [%1]";
+ if (use_ldrd)
+ {
+ output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+ goto done;
+ }
- return "ldm%(ia%)\t%1, %M0";
+ output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
+ goto done;
}
else
{
- otherops[1] = adjust_address (operands[1], SImode, 4);
- /* Take care of overlapping base/data reg. */
- if (reg_mentioned_p (operands[0], operands[1]))
+ /* When the address is (REG + REG), we cannot use
+ adjust_address to add offset 4. */
+ if (GET_CODE (XEXP (operands[1], 0)) == PLUS
+ && REG_P (XEXP (XEXP (operands[1], 0), 1)))
{
- output_asm_insn ("ldr%?\t%0, %1", otherops);
- output_asm_insn ("ldr%?\t%0, %1", operands);
+ rtx reg_base = XEXP (XEXP (operands[1], 0), 0);
+ rtx reg_index = XEXP (XEXP (operands[1], 0), 1);
+
+ /* If REG0 is not same as the base register or the index
+ register, it can be loaded first and will be not
+ clobbered when later we change base register. */
+ if (reg0 != REGNO (reg_base) && reg0 != REGNO (reg_index))
+ {
+ output_asm_insn ("ldr%?\t%0, %1", operands);
+ otherops[1] = reg_base;
+ if (REGNO (reg_base) != REGNO (reg_index))
+ output_asm_insn ("add%?\t%1, %1, #4", otherops);
+ else
+ output_asm_insn ("add%?\t%1, %1, #2", otherops);
+ output_asm_insn ("ldr%?\t%H0, %1", operands);
+ /* We only need to restore base register when
+ it is not clobbered. */
+ if (REGNO (reg_base) != reg0 + 1)
+ {
+ if (REGNO (reg_base) != REGNO (reg_index))
+ output_asm_insn ("add%?\t%1, %1, #-4", otherops);
+ else
+ output_asm_insn ("add%?\t%1, %1, #-2", otherops);
+ }
+ }
+ /* If REG0 is same as the base register or the index
+ register, it can be used to hold the sum of the base
+ register and the index register. */
+ else
+ {
+ if (reg0 == REGNO (reg_base))
+ {
+ otherops[0] = reg_base;
+ otherops[1] = reg_index;
+ }
+ else
+ {
+ otherops[0] = reg_index;
+ otherops[1] = reg_base;
+ }
+ output_asm_insn ("add%?\t%0, %0, %1", otherops);
+ output_asm_insn ("ldr%?\t%H0, [%0, 4]", otherops);
+ output_asm_insn ("ldr%?\t%0, [%0]", otherops);
+ }
}
else
{
- output_asm_insn ("ldr%?\t%0, %1", operands);
- output_asm_insn ("ldr%?\t%0, %1", otherops);
+ otherops[1] = adjust_address (operands[1], SImode, 4);
+ /* Take care of overlapping base/data reg. */
+ if (reg_mentioned_p (operands[0], operands[1]))
+ {
+ output_asm_insn ("ldr%?\t%0, %1", otherops);
+ output_asm_insn ("ldr%?\t%0, %1", operands);
+ }
+ else
+ {
+ output_asm_insn ("ldr%?\t%0, %1", operands);
+ output_asm_insn ("ldr%?\t%0, %1", otherops);
+ }
}
}
}
}
else
{
+ unsigned int reg1 = REGNO (operands[1]);
+
/* Constraints should ensure this. */
gcc_assert (code0 == MEM && code1 == REG);
gcc_assert (REGNO (operands[1]) != IP_REGNUM);
+ unaligned = MEM_ALIGN (operands[0]) < 32 ? true : false;
+ use_ldrd = (TARGET_LDRD && (reg1 & 1) == 0) ? true : false;
+
switch (GET_CODE (XEXP (operands[0], 0)))
{
case REG:
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("str%?\t%1, [%m0]", operands);
+ output_asm_insn ("str%?\t%H1, [%m0, #4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
else
output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
@@ -13144,18 +13320,34 @@ output_move_double (rtx *operands)
case PRE_INC:
gcc_assert (TARGET_LDRD);
- output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
+ if (use_ldrd && !unaligned)
+ output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
+ else
+ {
+ output_asm_insn ("str%?\t%1, [%m0, #8]!", operands);
+ output_asm_insn ("str%?\t%H1, [%m0, #4]", operands);
+ }
break;
case PRE_DEC:
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("str%?\t%1, [%m0, #-8]!", operands);
+ output_asm_insn ("str%?\t%H1, [%m0, #4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
else
output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
break;
case POST_INC:
- if (TARGET_LDRD)
+ if (unaligned)
+ {
+ output_asm_insn ("str%?\t%1, [%m0], #8", operands);
+ output_asm_insn ("str%?\t%H1, [%m0, #-4]", operands);
+ }
+ else if (use_ldrd)
output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
else
output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
@@ -13163,7 +13355,13 @@ output_move_double (rtx *operands)
case POST_DEC:
gcc_assert (TARGET_LDRD);
- output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
+ if (use_ldrd && !unaligned)
+ output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
+ else
+ {
+ output_asm_insn ("str%?\t%1, [%m0], #-8", operands);
+ output_asm_insn ("str%?\t%H1, [%m0, #4]", operands);
+ }
break;
case PRE_MODIFY:
@@ -13174,10 +13372,11 @@ output_move_double (rtx *operands)
/* IWMMXT allows offsets larger than ldrd can handle,
fix these up with a pair of ldr. */
- if (!TARGET_THUMB2
- && GET_CODE (otherops[2]) == CONST_INT
- && (INTVAL(otherops[2]) <= -256
- || INTVAL(otherops[2]) >= 256))
+ if ((!TARGET_THUMB2
+ && GET_CODE (otherops[2]) == CONST_INT
+ && (INTVAL(otherops[2]) <= -256
+ || INTVAL(otherops[2]) >= 256))
+ || unaligned)
{
if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
{
@@ -13198,52 +13397,133 @@ output_move_double (rtx *operands)
case PLUS:
otherops[2] = XEXP (XEXP (operands[0], 0), 1);
- if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
+ if (GET_CODE (otherops[2]) == CONST_INT
+ && !use_ldrd
+ && !unaligned)
{
switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
{
case -8:
output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
- return "";
+ goto done;
case -4:
if (TARGET_THUMB2)
break;
output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
- return "";
+ goto done;
case 4:
if (TARGET_THUMB2)
break;
output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
- return "";
+ goto done;
}
}
- if (TARGET_LDRD
+ if (use_ldrd
&& (GET_CODE (otherops[2]) == REG
|| TARGET_THUMB2
|| (GET_CODE (otherops[2]) == CONST_INT
&& INTVAL (otherops[2]) > -256
- && INTVAL (otherops[2]) < 256)))
+ && INTVAL (otherops[2]) < 256))
+ && !unaligned)
{
otherops[0] = operands[1];
otherops[1] = XEXP (XEXP (operands[0], 0), 0);
output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
- return "";
+ goto done;
}
/* Fall through */
default:
- otherops[0] = adjust_address (operands[0], SImode, 4);
- otherops[1] = operands[1];
output_asm_insn ("str%?\t%1, %0", operands);
- output_asm_insn ("str%?\t%H1, %0", otherops);
+
+ /* When the address is (REG + REG), we cannot use
+ adjust_address to add offset 4. */
+ if (GET_CODE (XEXP (operands[0], 0)) == PLUS
+ && REG_P (XEXP (XEXP (operands[0], 0), 1)))
+ {
+ rtx reg_base = XEXP (XEXP (operands[0], 0), 0);
+ rtx reg_index = XEXP (XEXP (operands[0], 0), 1);
+
+ /* If base register and index register are different,
+ it's not difficult to deal with. */
+ if (REGNO (reg_base) != REGNO (reg_index))
+ {
+ if (REGNO (reg_base) != reg1 + 1)
+ otherops[0] = reg_base;
+ else
+ otherops[0] = reg_index;
+ output_asm_insn ("add%?\t%0, %0, #4", otherops);
+ output_asm_insn ("str%?\t%H1, %0", operands);
+ output_asm_insn ("add%?\t%0, %0, #-4", otherops);
+ }
+ /* Base register and index register are same,
+ but the source register is not same as them. */
+ else if (REGNO (reg_base) != reg1 + 1)
+ {
+ otherops[0] = reg_base;
+ output_asm_insn ("add%?\t%0, %0, #2", otherops);
+ output_asm_insn ("str%?\t%H1, %0", operands);
+ output_asm_insn ("add%?\t%0, %0, #-2", otherops);
+ }
+ /* Base register, index register and the source register
+ are same. */
+ else
+ {
+ otherops[0] = reg_base;
+ otherops[1] = operands[1];
+ output_asm_insn ("add%?\t%1, %0, %0", otherops);
+ otherops[0] = gen_rtx_REG (SImode, 1 + reg1);
+ output_asm_insn ("str%?\t%0, [%m1, #4]", otherops);
+ otherops[0] = operands[0];
+ output_asm_insn ("ldr%?\t%1, %0", otherops);
+ }
+ }
+ else
+ {
+ otherops[0] = adjust_address (operands[0], SImode, 4);
+ otherops[1] = operands[1];
+ output_asm_insn ("str%?\t%H1, %0", otherops);
+ }
}
}
+ done:
+ if (lengthp != NULL)
+ *lengthp = length;
return "";
}
+#undef output_asm_insn
+
+const char *
+output_move_double (rtx *operands)
+{
+ return output_move_double_1 (operands, NULL);
+}
+
+unsigned int
+move_double_length (rtx insn)
+{
+ unsigned int length = 0;
+ rtx operands[2];
+
+ extract_insn_cached (insn);
+
+ operands[0] = recog_data.operand[0];
+ operands[1] = recog_data.operand[1];
+
+ /* When arm_reorg has not run, INSN might be REG<-REG,
+ REG<-CONST_DOUBLE, REG<-CONST_INT. Return 8 as the
+ insn length for such cases. */
+ if (REG_P (operands[0]) && !MEM_P (operands[1]))
+ return 8;
+
+ output_move_double_1 (operands, &length);
+ return length;
+}
+
/* Output a move, load or store for quad-word vectors in ARM registers. Only
handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
@@ -19510,6 +19790,7 @@ neon_emit_pair_result_insn (enum machine
emit_insn (intfn (tmp1, op1, tmp2, op2));
+ set_mem_align (mem, GET_MODE_BITSIZE (mode));
emit_move_insn (mem, tmp1);
mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
emit_move_insn (mem, tmp2);
@@ -21461,6 +21742,11 @@ arm_file_start (void)
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_CPU_unaligned_access. */
+ gcc_assert (unaligned_access == 0 || unaligned_access == 1);
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+ unaligned_access);
+
/* Tag_ABI_FP_16bit_format. */
if (arm_fp16_format)
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
@@ -23046,6 +23332,18 @@ arm_have_conditional_execution (void)
return !TARGET_THUMB1;
}
+static bool
+arm_cannot_reload_mem_address (rtx mem, const char *p)
+{
+ gcc_assert (MEM_P (mem));
+
+ if (MEM_ALIGN (mem) < BITS_PER_WORD
+ && p[0] == 'U' && p[1] == 'v')
+ return true;
+
+ return false;
+}
+
/* Legitimize a memory reference for sync primitive implemented using
ldrex / strex. We currently force the form of the reference to be
indirect without offset. We do not yet support the indirect offset
===================================================================
@@ -641,7 +641,7 @@ extern int arm_structure_size_boundary;
/* Nonzero if move instructions will actually fail to work
when given unaligned data. */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (!unaligned_access)
/* wchar_t is unsigned under the AAPCS. */
#ifndef WCHAR_TYPE
@@ -2048,8 +2048,6 @@ typedef struct
/* Nonzero if access to memory by bytes is slow and undesirable. */
#define SLOW_BYTE_ACCESS 0
-#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
-
/* Immediate shift counts are truncated by the output routines (or was it
the assembler?). Shift counts in a register are truncated by ARM. Note
that the native compiler puts too large (> 32) immediate shift counts
===================================================================
@@ -125,6 +125,7 @@ extern const char *output_mov_long_doubl
extern const char *output_mov_double_fpa_from_arm (rtx *);
extern const char *output_mov_double_arm_from_fpa (rtx *);
extern const char *output_move_double (rtx *);
+extern unsigned int move_double_length (rtx);
extern const char *output_move_quad (rtx *);
extern const char *output_move_vfp (rtx *operands);
extern const char *output_move_neon (rtx *operands);
===================================================================
@@ -163,7 +163,9 @@
}
"
[(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
- (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "0") (const_int 8)
+ (eq_attr "alternative" "1,2")
+ (symbol_ref ("move_double_length (insn)"))
(eq_attr "alternative" "5")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -201,7 +203,8 @@
}
"
[(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
- (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "0,1,2")
+ (symbol_ref ("move_double_length (insn)"))
(eq_attr "alternative" "5")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -435,7 +438,9 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "8") (const_int 8)
+ (eq_attr "alternative" "3,4")
+ (symbol_ref ("move_double_length (insn)"))
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -479,7 +484,8 @@
"
[(set_attr "type"
"r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
- (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+ (set (attr "length") (cond [(eq_attr "alternative" "3,4,8")
+ (symbol_ref ("move_double_length (insn)"))
(eq_attr "alternative" "7")
(if_then_else
(eq (symbol_ref "TARGET_VFP_SINGLE")
===================================================================
@@ -195,7 +195,10 @@
[(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*")
(set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2")
(set_attr "insn" "*,*,*,*,*,*,mov,*,*")
- (set_attr "length" "4,4,4,4,4,4,8,8,8")
+ (set (attr "length")
+ (cond [(eq_attr "alternative" "6,7,8")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
(set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
===================================================================
@@ -169,3 +169,7 @@ mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned access to data.
===================================================================
@@ -91,7 +91,11 @@
return \"wstrd%?\\t%1,%0\";
}
}"
- [(set_attr "length" "8,8,8,4,4,4,4,4")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "0") (const_int 8)
+ (eq_attr "alternative" "1,2")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
(set_attr "type" "*,load1,store2,*,*,*,*,*")
(set_attr "pool_range" "*,1020,*,*,*,*,*,*")
(set_attr "neg_pool_range" "*,1012,*,*,*,*,*,*")]
@@ -175,8 +179,12 @@
case 5: return \"#\";
default: return output_move_double (operands);
}"
- [(set_attr "predicable" "yes")
- (set_attr "length" "4, 4, 4,4,4,8, 8,8")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "5") (const_int 8)
+ (eq_attr "alternative" "6,7")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
+ (set_attr "predicable" "yes")
(set_attr "type" "*,store1,load1,*,*,*,load1,store1")
(set_attr "pool_range" "*, *, 256,*,*,*, 256,*")
(set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")])
===================================================================
@@ -4881,7 +4881,11 @@
return output_move_double (operands);
}
"
- [(set_attr "length" "8,12,16,8,8")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "0") (const_int 8)
+ (eq_attr "alternative" "1") (const_int 12)
+ (eq_attr "alternative" "2") (const_int 16)]
+ (symbol_ref ("move_double_length (insn)"))))
(set_attr "type" "*,*,*,load2,store2")
(set_attr "arm_pool_range" "*,*,*,1020,*")
(set_attr "arm_neg_pool_range" "*,*,*,1008,*")
@@ -6238,7 +6242,11 @@
return output_move_double (operands);
}
"
- [(set_attr "length" "8,12,16,8,8")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "0") (const_int 8)
+ (eq_attr "alternative" "1") (const_int 12)
+ (eq_attr "alternative" "2") (const_int 16)]
+ (symbol_ref ("move_double_length (insn)"))))
(set_attr "type" "*,*,*,load2,store2")
(set_attr "pool_range" "*,*,*,1020,*")
(set_attr "arm_neg_pool_range" "*,*,*,1008,*")
===================================================================
@@ -577,7 +577,11 @@
}
}
"
- [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "2,9,10") (const_int 8)
+ (eq_attr "alternative" "3,4")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
(set_attr "predicable" "yes")
(set_attr "type"
"load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r")
@@ -667,7 +671,11 @@
}
}
"
- [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "2,3,4")
+ (symbol_ref ("move_double_length (insn)"))
+ (eq_attr "alternative" "9,10") (const_int 8)]
+ (const_int 4)))
(set_attr "type"
"load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r")
(set_attr "pool_range" "*,*,*,*,4092,*,*,1024,*,*,*")
===================================================================
@@ -393,7 +393,11 @@
default: gcc_unreachable ();
}
}"
- [(set_attr "length" " 8, 8, 8, 8, 8, 4, 4, 4")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "0,3,4") (const_int 8)
+ (eq_attr "alternative" "1,2")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
(set_attr "type" " *,load2,store2, *, *, load2,store2, *")
(set_attr "pool_range" " *,1020, *, *, *, 1020, *, *")
(set_attr "neg_pool_range" " *,1012, *, *, *, 1008, *, *")
@@ -448,8 +452,12 @@
default: gcc_unreachable ();
}
}"
- [(set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2")
- (set_attr "length" " 4, 4, 8, 8, 8, 4, 4, 8, 8, 4")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "2,7,8") (const_int 8)
+ (eq_attr "alternative" "3,4")
+ (symbol_ref ("move_double_length (insn)"))]
+ (const_int 4)))
+ (set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2")
(set_attr "pool_range" " *, *, *, *, 252, *, 1020, *, *, *")
(set_attr "neg_pool_range" " *, *, *, *, 244, *, 1008, *, *, *")
(set_attr "cirrus" " not, not,not, not, not,normal,double,move,normal,double")]
@@ -480,7 +488,11 @@
default: abort ();
}
}"
- [(set_attr "length" " 8, 8, 8, 8, 8, 4, 4, 4")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (symbol_ref ("move_double_length (insn)"))
+ (eq_attr "alternative" "3,4") (const_int 8)]
+ (const_int 4)))
(set_attr "type" " *,load2,store2, *, *, load2,store2, *")
(set_attr "pool_range" " *,4096, *, *, *, 1020, *, *")
(set_attr "neg_pool_range" " *, 0, *, *, *, 1008, *, *")
@@ -531,8 +543,12 @@
default: abort ();
}
}"
- [(set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2")
- (set_attr "length" " 4, 4, 8, 8, 8, 4, 4, 8, 8, 4")
+ [(set (attr "length")
+ (cond [(eq_attr "alternative" "2,3,4")
+ (symbol_ref ("move_double_length (insn)"))
+ (eq_attr "alternative" "7,8") (const_int 8)]
+ (const_int 4)))
+ (set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2")
(set_attr "pool_range" " *, *, *, *,4092, *, 1020, *, *, *")
(set_attr "neg_pool_range" " *, *, *, *, 0, *, 1008, *, *, *")
(set_attr "cirrus" " not, not,not, not, not,normal,double,move,normal,double")]