===================================================================
@@ -1028,11 +1028,14 @@
;; shift-count granularity. That's good enough for the middle-end's current
;; needs.
+;; Note that it's not safe to perform such an operation in big-endian mode,
+;; due to element-ordering issues.
+
(define_expand "vec_shr_<mode>"
[(match_operand:VDQ 0 "s_register_operand" "")
(match_operand:VDQ 1 "s_register_operand" "")
(match_operand:SI 2 "const_multiple_of_8_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtx zero_reg;
HOST_WIDE_INT num_bits = INTVAL (operands[2]);
@@ -1060,7 +1063,7 @@
[(match_operand:VDQ 0 "s_register_operand" "")
(match_operand:VDQ 1 "s_register_operand" "")
(match_operand:SI 2 "const_multiple_of_8_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtx zero_reg;
HOST_WIDE_INT num_bits = INTVAL (operands[2]);
@@ -1256,7 +1259,8 @@
(define_expand "reduc_splus_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1272,7 +1276,7 @@
[(set (match_operand:V2DI 0 "s_register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
UNSPEC_VPADD))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vadd.i64\t%e0, %e1, %f1"
[(set_attr "neon_type" "neon_int_1")]
)
@@ -1282,7 +1286,7 @@
(define_expand "reduc_uplus_<mode>"
[(match_operand:VDQI 0 "s_register_operand" "")
(match_operand:VDQI 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
{
emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
DONE;
@@ -1301,7 +1305,8 @@
(define_expand "reduc_smin_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1326,7 +1331,8 @@
(define_expand "reduc_smax_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1351,7 +1357,7 @@
(define_expand "reduc_umin_<mode>"
[(match_operand:VQI 0 "s_register_operand" "")
(match_operand:VQI 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1376,7 +1382,7 @@
(define_expand "reduc_umax_<mode>"
[(match_operand:VQI 0 "s_register_operand" "")
(match_operand:VQI 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -5238,7 +5244,7 @@
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_low" ""))))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmovl.<US><V_sz_elem> %q0, %e1"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5248,7 +5254,7 @@
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_high" ""))))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmovl.<US><V_sz_elem> %q0, %f1"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5256,7 +5262,7 @@
(define_expand "vec_unpack<US>_hi_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
@@ -5275,7 +5281,7 @@
(define_expand "vec_unpack<US>_lo_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
@@ -5298,7 +5304,7 @@
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 3 "register_operand" "w")
(match_dup 2)))))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmull.<US><V_sz_elem> %q0, %e1, %e3"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5307,7 +5313,7 @@
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
@@ -5332,7 +5338,7 @@
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 3 "register_operand" "w")
(match_dup 2)))))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmull.<US><V_sz_elem> %q0, %f1, %f3"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5341,7 +5347,7 @@
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
@@ -5435,6 +5441,10 @@
}
)
+; FIXME: These instruction patterns can't be used safely in big-endian mode
+; because the ordering of vector elements in Q registers is different from what
+; the semantics of the instructions require.
+
(define_insn "vec_pack_trunc_<mode>"
[(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
(vec_concat:<V_narrow_pack>
@@ -5442,7 +5452,7 @@
(match_operand:VN 1 "register_operand" "w"))
(truncate:<V_narrow>
(match_operand:VN 2 "register_operand" "w"))))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmovn.i<V_sz_elem>\t%e0, %q1\n\tvmovn.i<V_sz_elem>\t%f0, %q2"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5451,7 +5461,7 @@
(define_insn "neon_vec_pack_trunc_<mode>"
[(set (match_operand:<V_narrow> 0 "register_operand" "=w")
(truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
"vmovn.i<V_sz_elem>\t%P0, %q1"
[(set_attr "neon_type" "neon_shift_1")]
)
@@ -5460,7 +5470,7 @@
[(match_operand:<V_narrow_pack> 0 "register_operand" "")
(match_operand:VSHFT 1 "register_operand" "")
(match_operand:VSHFT 2 "register_operand")]
- "TARGET_NEON"
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
{
rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
===================================================================
@@ -1820,6 +1820,15 @@ proc check_effective_target_arm32 { } {
}]
}
+# Return 1 if this is a little-endian ARM target
+proc check_effective_target_arm_little_endian { } {
+ return [check_no_compiler_messages arm_little_endian assembly {
+ #if !defined(__arm__) || !defined(__ARMEL__)
+ #error FOO
+ #endif
+ }]
+}
+
# Return 1 if this is an ARM target that only supports aligned vector accesses
proc check_effective_target_arm_vect_no_misalign { } {
return [check_no_compiler_messages arm_vect_no_misalign assembly {
@@ -2759,7 +2768,8 @@ proc check_effective_target_vect_pack_tr
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*]
|| [istarget spu-*-*]
- || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]
+ && [check_effective_target_arm_little_endian]) } {
set et_vect_pack_trunc_saved 1
}
}
@@ -2784,7 +2794,8 @@ proc check_effective_target_vect_unpack
|| [istarget x86_64-*-*]
|| [istarget spu-*-*]
|| [istarget ia64-*-*]
- || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon]
+ && [check_effective_target_arm_little_endian]) } {
set et_vect_unpack_saved 1
}
}
@@ -2970,7 +2981,8 @@ proc check_effective_target_vect_element
verbose "check_effective_target_vect_element_align: using cached result" 2
} else {
set et_vect_element_align 0
- if { [istarget arm*-*-*]
+ if { ([istarget arm*-*-*]
+ && ![check_effective_target_arm_vect_no_misalign])
|| [check_effective_target_vect_hw_misalign] } {
set et_vect_element_align 1
}