commit d66c777aa23e6df0d68265767d4ae9f370ffb761
Author: Julian Brown <julian@henry8.codesourcery.com>
Date: Tue May 24 10:01:48 2011 -0700
Unaligned support for packed structs
@@ -1674,6 +1674,28 @@ arm_option_override (void)
fix_cm3_ldrd = 0;
}
+ /* Enable -munaligned-access by default for
+ - all ARMv6 architecture-based processors
+ - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+ Disable -munaligned-access by default for
+ - all pre-ARMv6 architecture-based processors
+ - ARMv6-M architecture-based processors. */
+
+ if (unaligned_access == 2)
+ {
+ if (arm_arch6 && (arm_arch_notm || arm_arch7))
+ unaligned_access = 1;
+ else
+ unaligned_access = 0;
+ }
+ else if (unaligned_access == 1
+ && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+ {
+ warning (0, "target CPU does not support unaligned accesses");
+ unaligned_access = 0;
+ }
+
if (TARGET_THUMB1 && flag_schedule_insns)
{
/* Don't warn since it's on by default in -O2. */
@@ -21555,6 +21577,10 @@ arm_file_start (void)
val = 6;
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+ /* Tag_CPU_unaligned_access. */
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+ unaligned_access);
+
/* Tag_ABI_FP_16bit_format. */
if (arm_fp16_format)
asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
@@ -104,6 +104,10 @@
UNSPEC_SYMBOL_OFFSET ; The offset of the start of the symbol from
; another symbolic address.
UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+ UNSPEC_UNALIGNED_LOAD ; Used to represent ldr/ldrh instructions that access
+ ; unaligned locations, on architectures which support
+ ; that.
+ UNSPEC_UNALIGNED_STORE ; Same for str/strh.
])
;; UNSPEC_VOLATILE Usage:
@@ -2393,7 +2397,7 @@
;;; this insv pattern, so this pattern needs to be reevalutated.
(define_expand "insv"
- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
+ [(set (zero_extract:SI (match_operand:SI 0 "nonimmediate_operand" "")
(match_operand:SI 1 "general_operand" "")
(match_operand:SI 2 "general_operand" ""))
(match_operand:SI 3 "reg_or_int_operand" ""))]
@@ -2407,35 +2411,66 @@
if (arm_arch_thumb2)
{
- bool use_bfi = TRUE;
-
- if (GET_CODE (operands[3]) == CONST_INT)
+ if (unaligned_access && MEM_P (operands[0])
+ && s_register_operand (operands[3], GET_MODE (operands[3]))
+ && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
{
- HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
-
- if (val == 0)
+ rtx base_addr;
+
+ if (width == 32)
{
- emit_insn (gen_insv_zero (operands[0], operands[1],
- operands[2]));
- DONE;
+ base_addr = adjust_address (operands[0], SImode,
+ start_bit / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
}
+ else
+ {
+ rtx tmp = gen_reg_rtx (HImode);
- /* See if the set can be done with a single orr instruction. */
- if (val == mask && const_ok_for_arm (val << start_bit))
- use_bfi = FALSE;
+ base_addr = adjust_address (operands[0], HImode,
+ start_bit / BITS_PER_UNIT);
+ emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+ emit_insn (gen_unaligned_storehi (base_addr, tmp));
+ }
+ DONE;
}
-
- if (use_bfi)
+ else if (s_register_operand (operands[0], GET_MODE (operands[0])))
{
- if (GET_CODE (operands[3]) != REG)
- operands[3] = force_reg (SImode, operands[3]);
+ bool use_bfi = TRUE;
- emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+ if (val == 0)
+ {
+ emit_insn (gen_insv_zero (operands[0], operands[1],
+ operands[2]));
+ DONE;
+ }
+
+ /* See if the set can be done with a single orr instruction. */
+ if (val == mask && const_ok_for_arm (val << start_bit))
+ use_bfi = FALSE;
+ }
+
+ if (use_bfi)
+ {
+ if (GET_CODE (operands[3]) != REG)
+ operands[3] = force_reg (SImode, operands[3]);
+
+ emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
}
+ else
+ FAIL;
}
+ if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+ FAIL;
+
target = copy_rtx (operands[0]);
/* Avoid using a subreg as a subtarget, and avoid writing a paradoxical
subreg as the final target. */
@@ -3628,7 +3663,7 @@
(define_expand "extzv"
[(set (match_dup 4)
- (ashift:SI (match_operand:SI 1 "register_operand" "")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
(match_operand:SI 2 "const_int_operand" "")))
(set (match_operand:SI 0 "register_operand" "")
(lshiftrt:SI (match_dup 4)
@@ -3641,10 +3676,53 @@
if (arm_arch_thumb2)
{
- emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
- operands[3]));
- DONE;
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1])
+ && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+ DONE;
+ }
+ else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+ {
+ emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+ else
+ FAIL;
}
+
+ if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
operands[3] = GEN_INT (rshift);
@@ -3659,7 +3737,113 @@
}"
)
-(define_insn "extv"
+(define_expand "extv"
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")))]
+ "arm_arch_thumb2"
+{
+ HOST_WIDE_INT width = INTVAL (operands[2]);
+ HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+ if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+ && (bitpos % BITS_PER_UNIT) == 0)
+ {
+ rtx base_addr;
+
+ if (width == 32)
+ {
+ base_addr = adjust_address (operands[1], SImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+ }
+ else
+ {
+ rtx dest = operands[0];
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* We may get a paradoxical subreg here. Strip it off. */
+ if (GET_CODE (dest) == SUBREG
+ && GET_MODE (dest) == SImode
+ && GET_MODE (SUBREG_REG (dest)) == HImode)
+ dest = SUBREG_REG (dest);
+
+ if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+ FAIL;
+
+ base_addr = adjust_address (operands[1], HImode,
+ bitpos / BITS_PER_UNIT);
+ emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+ emit_move_insn (gen_lowpart (SImode, dest), tmp);
+ }
+
+ DONE;
+ }
+ else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+ FAIL;
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%?\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (sign_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(sh%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (zero_extend:SI
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ UNSPEC_UNALIGNED_LOAD)))]
+ "unaligned_access && TARGET_32BIT"
+ "ldr%(h%)\t%0, %1\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+ [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+ (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%?\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+ [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+ (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+ UNSPEC_UNALIGNED_STORE))]
+ "unaligned_access && TARGET_32BIT"
+ "str%(h%)\t%1, %0\t@ unaligned"
+ [(set_attr "arch" "t2,any")
+ (set_attr "length" "2,4")
+ (set_attr "predicable" "yes")
+ (set_attr "type" "store1")])
+
+(define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "const_int_operand" "M")
@@ -235,3 +235,7 @@ mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
@@ -36,7 +36,7 @@
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
;; in ARM state: Uq
-;; in Thumb state: Uu
+;; in Thumb state: Uu, Uw
(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
@@ -341,6 +341,19 @@
&& thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
0)")))
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+ In Thumb state an address that is valid in 16bit encoding, and that can be
+ used for unaligned accesses."
+ (and (match_code "mem")
+ (match_test "TARGET_THUMB
+ && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+ 0)
+ && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
;; We used to have constraint letters for S and R in ARM state, but
;; all uses of these now appear to have been removed.
@@ -648,7 +648,7 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
/* On big-endian machines, we count bits from the most significant.
If the bit field insn does not, we must invert. */
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos = unit - bitsize - xbitpos;
/* We have been counting XBITPOS within UNIT.
@@ -1456,7 +1456,7 @@ extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
/* On big-endian machines, we count bits from the most significant.
If the bit field insn does not, we must invert. */
- if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+ if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN && !MEM_P (xop0))
xbitpos = unit - bitsize - xbitpos;
/* Now convert from counting within UNIT to counting in EXT_MODE. */
On Mon, 9 May 2011 18:01:12 +0100 Julian Brown <julian@codesourcery.com> wrote: > How does this look now? (Re-testing in progress.) The previously-posted version contained a bug in the "extv" expander, which became apparent only when testing together with my fixed-point support patch. This is a corrected version. Re-tested (together with the followup unaligned-memcpy patch and fixed-point patch series), with no regressions in both big & little-endian mode, cross to ARM EABI. OK to apply this version? Thanks, Julian ChangeLog gcc/ * config/arm/arm.c (arm_override_options): Add unaligned_access support. (arm_file_start): Emit attribute for unaligned access as appropriate. * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD) (UNSPEC_UNALIGNED_STORE): Add constants for unspecs. (insv, extzv): Add unaligned-access support. (extv): Change to expander. Likewise. (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu) (unaligned_storesi, unaligned_storehi): New. (*extv_reg): New (previous extv implementation). * config/arm/arm.opt (munaligned_access): Add option. * config/arm/constraints.md (Uw): New constraint. * expmed.c (store_bit_field_1): Don't tweak bitfield numbering for memory locations if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN. (extract_bit_field_1): Likewise.