@@ -161,6 +161,8 @@ extern bool rs6000_function_pcrel_p (struct function *);
extern bool rs6000_pcrel_p (void);
extern bool rs6000_fndecl_pcrel_p (const_tree);
extern void rs6000_output_addr_vec_elt (FILE *, int);
+extern bool rs6000_vsx_element_in_dword0_p (rtx, enum machine_mode);
+extern void rs6000_vsx_extract_element_from_dword0 (rtx, rtx, rtx, bool);
/* Different PowerPC instruction formats that are used by GCC. There are
various other instruction formats used by the PowerPC hardware, but these
@@ -29098,6 +29098,74 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
return false;
}
+/* Return true when the element is in dword0 of a vector. Exclude word
+ element 1 of VS4SI as the word can be extracted by mfvsrwz directly. */
+
+bool
+rs6000_vsx_element_in_dword0_p (rtx op, enum machine_mode mode)
+{
+ gcc_assert (CONST_INT_P (op));
+ gcc_assert (mode == V16QImode || mode == V8HImode || mode == V4SImode);
+
+ int units = GET_MODE_NUNITS (mode);
+ int elt = INTVAL (op);
+ elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+
+ if (elt > units / 2
+ || (elt == units / 2 && mode != V4SImode))
+ return true;
+ else
+ return false;
+}
+
+/* Extract element from dword0 by mfvsrd and lshiftrt and mask. Extend_p
+ indicates if zero extend is needed or not. */
+
+void
+rs6000_vsx_extract_element_from_dword0 (rtx dest, rtx src, rtx element,
+ bool extend_p)
+{
+ enum machine_mode mode = GET_MODE (src);
+ gcc_assert (rs6000_vsx_element_in_dword0_p (element, mode));
+
+ enum machine_mode dest_mode = GET_MODE (dest);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int units = GET_MODE_NUNITS (mode);
+ int elt = INTVAL (element);
+ elt = BYTES_BIG_ENDIAN ? units - 1 - elt : elt;
+ int value, shift;
+ unsigned int mask;
+
+ rtx vec_tmp = gen_lowpart (V2DImode, src);
+ rtx tmp1 = can_create_pseudo_p ()
+ ? gen_reg_rtx (DImode)
+ : simplify_gen_subreg (DImode, dest, dest_mode, 0);
+ value = BYTES_BIG_ENDIAN ? 0 : 1;
+ emit_insn (gen_vsx_extract_v2di (tmp1, vec_tmp, GEN_INT (value)));
+
+ rtx tmp2;
+ shift = (elt - units / 2) * GET_MODE_BITSIZE (inner_mode);
+ if (shift || extend_p)
+ {
+ tmp2 = (dest_mode == DImode)
+ ? dest
+ : (can_create_pseudo_p ()
+ ? gen_reg_rtx (DImode)
+ : simplify_gen_subreg (DImode, dest, dest_mode, 0));
+ mask = (1ULL << GET_MODE_BITSIZE (inner_mode)) - 1;
+ rtx shift_op = gen_rtx_LSHIFTRT (DImode, tmp1, GEN_INT (shift));
+ emit_insn (gen_rotldi3_mask (tmp2, tmp1, GEN_INT (shift), GEN_INT (mask),
+ shift_op));
+ }
+ else
+ tmp2 = tmp1;
+
+ if (dest_mode != DImode)
+ emit_move_insn (dest, gen_lowpart (dest_mode, tmp2));
+
+ return;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"
@@ -4104,7 +4104,7 @@ (define_insn "*eqv<mode>3"
;; Rotate-and-mask and insert.
-(define_insn "*rotl<mode>3_mask"
+(define_insn "rotl<mode>3_mask"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(and:GPR (match_operator:GPR 4 "rotate_mask_operator"
[(match_operand:GPR 1 "gpc_reg_operand" "r")
@@ -3754,7 +3754,9 @@ (define_expand "vsx_extract_<mode>"
(clobber (match_scratch:VSX_EXTRACT_I 3))])]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
{
- /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
+ /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. But if
+ the element is word element 1 of a V4SI, it can be extracted by
+ mfvsrwz directly. */
if (TARGET_P9_VECTOR
&& (<MODE>mode != V4SImode
|| INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)))
@@ -3805,10 +3807,20 @@ (define_split
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
[(const_int 0)]
{
+ /* If the element is in dword0, it can be extracted by mfvsrd and lshiftrt
+ and mask. */
+ if (rs6000_vsx_element_in_dword0_p (operands[2], <MODE>mode))
+ {
+ rs6000_vsx_extract_element_from_dword0 (operands[0], operands[1],
+ operands[2], false);
+ DONE;
+ }
+
rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
rtx op1 = operands[1];
rtx op2 = operands[2];
rtx op3 = operands[3];
+
HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
emit_move_insn (op3, GEN_INT (offset));
@@ -3829,7 +3841,8 @@ (define_insn_and_split "*vsx_extract_<mode>_di_p9"
(clobber (match_scratch:SI 3 "=r,X"))]
"TARGET_VEXTRACTUB
&& (<MODE>mode != V4SImode
- || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+ && !rs6000_vsx_element_in_dword0_p (operands[2], <MODE>mode)"
"#"
"&& reload_completed"
[(parallel [(set (match_dup 4)
@@ -3837,6 +3850,7 @@ (define_insn_and_split "*vsx_extract_<mode>_di_p9"
(match_dup 1)
(parallel [(match_dup 2)])))
(clobber (match_dup 3))])]
+
{
operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
}
@@ -3902,6 +3916,14 @@ (define_insn_and_split "vsx_extract_si"
rtx element = operands[2];
rtx vec_tmp;
+ /* If the element is in dword0, it can be extracted by mfvsrd and lshiftrt
+ and mask. */
+ if (rs6000_vsx_element_in_dword0_p (element, V4SImode))
+ {
+ rs6000_vsx_extract_element_from_dword0 (dest, src, element, false);
+ DONE;
+ }
+
if (GET_CODE (operands[3]) == SCRATCH)
vec_tmp = gen_reg_rtx (V4SImode);
else
@@ -3923,49 +3945,78 @@ (define_insn_and_split "vsx_extract_si"
(set_attr "length" "4,4,4,0")
(set_attr "isa" "p8v,*,p8v,*")])
+(define_insn_and_split "*vsx_extract_<mode>_zero_extend"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (zero_extend:DI
+ (vec_select:<VEC_base>
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))))
+ (clobber (match_scratch:VSX_EXTRACT_I 3 "=v"))]
+ "TARGET_DIRECT_MOVE_64BIT
+ && rs6000_vsx_element_in_dword0_p (operands[2], <MODE>mode)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx element = operands[2];
+
+ rs6000_vsx_extract_element_from_dword0 (dest, src, element, true);
+ DONE;
+}
+ [(set_attr "type" "mfvsr")])
+
(define_insn_and_split "*vsx_extract_<mode>_p8"
- [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
+ [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r")
(vec_select:<VEC_base>
(match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
(parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
(clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
- "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
- && !TARGET_P9_VECTOR"
+ "TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
"#"
- "&& reload_completed"
+ "&& 1"
[(const_int 0)]
{
rtx dest = operands[0];
rtx src = operands[1];
rtx element = operands[2];
- rtx vec_tmp = operands[3];
+ rtx vec_tmp;
int value;
+ int num_elt = GET_MODE_NUNITS (<MODE>mode);
+ enum machine_mode dest_mode = GET_MODE (dest);
- if (!BYTES_BIG_ENDIAN)
- element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
+ if (rs6000_vsx_element_in_dword0_p (element, <MODE>mode))
+ {
+ rs6000_vsx_extract_element_from_dword0 (dest, src, element, false);
+ DONE;
+ }
- /* If the value is in the correct position, we can avoid doing the VSPLT<x>
- instruction. */
+ if (GET_CODE (operands[3]) == SCRATCH)
+ vec_tmp = gen_reg_rtx (<MODE>mode);
+ else
+ vec_tmp = operands[3];
+
+ if (!BYTES_BIG_ENDIAN)
+ element = GEN_INT (num_elt - 1 - INTVAL (element));
value = INTVAL (element);
+
if (<MODE>mode == V16QImode)
- {
- if (value != 7)
- emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
- else
- vec_tmp = src;
- }
+ emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
else if (<MODE>mode == V8HImode)
- {
- if (value != 3)
- emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
- else
- vec_tmp = src;
- }
+ emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
else
gcc_unreachable ();
- emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
- gen_rtx_REG (DImode, REGNO (vec_tmp)));
+ value = BYTES_BIG_ENDIAN ? 0 : 1;
+ rtx tmp1 = can_create_pseudo_p ()
+ ? gen_reg_rtx (DImode)
+ : simplify_gen_subreg (DImode, dest, dest_mode, 0);
+ rtx vec_tmp1 = gen_lowpart (V2DImode, vec_tmp);
+ emit_insn (gen_vsx_extract_v2di (tmp1, vec_tmp1, GEN_INT (value)));
+
+ emit_move_insn (dest, gen_lowpart (dest_mode, tmp1));
+
DONE;
}
[(set_attr "type" "mfvsr")])
@@ -49,21 +49,27 @@ testuc_var (vector unsigned char vuc2, signed int si)
return vec_extract (vuc2, si);
}
+#ifdef __BIG_ENDIAN__
+#define LANE 12
+#else
+#define LANE 3
+#endif
+
unsigned char
testbc_cst (vector bool char vbc2)
{
- return vec_extract (vbc2, 12);
+ return vec_extract (vbc2, LANE);
}
signed char
testsc_cst (vector signed char vsc2)
{
- return vec_extract (vsc2, 12);
+ return vec_extract (vsc2, LANE);
}
unsigned char
testuc_cst (vector unsigned char vuc2)
{
- return vec_extract (vuc2, 12);
+ return vec_extract (vuc2, LANE);
}
@@ -34,21 +34,27 @@ testuc_var (vector unsigned char vuc2, signed int si)
return vec_extract (vuc2, si);
}
+#ifdef __BIG_ENDIAN__
+#define LANE 12
+#else
+#define LANE 3
+#endif
+
unsigned char
testbc_cst (vector bool char vbc2)
{
- return vec_extract (vbc2, 12);
+ return vec_extract (vbc2, LANE);
}
signed char
testsc_cst (vector signed char vsc2)
{
- return vec_extract (vsc2, 12);
+ return vec_extract (vsc2, LANE);
}
unsigned char
testuc_cst (vector unsigned char vuc2)
{
- return vec_extract (vuc2, 12);
+ return vec_extract (vuc2, LANE);
}
@@ -54,21 +54,27 @@ testui_var (vector unsigned int vui2, signed int si)
return vec_extract (vui2, si);
}
+#ifdef __BIG_ENDIAN__
+#define LANE 11
+#else
+#define LANE 0
+#endif
+
unsigned int
testbi_cst (vector bool int vbi2)
{
- return vec_extract (vbi2, 12);
+ return vec_extract (vbi2, LANE);
}
signed int
testsi_cst (vector signed int vsi2)
{
- return vec_extract (vsi2, 12);
+ return vec_extract (vsi2, LANE);
}
unsigned int
testui_cst (vector unsigned int vui2)
{
- return vec_extract (vui2, 12);
+ return vec_extract (vui2, LANE);
}
@@ -40,21 +40,27 @@ testui_var (vector unsigned int vui2, signed int si)
return vec_extract (vui2, si);
}
+#ifdef __BIG_ENDIAN__
+#define LANE 11
+#else
+#define LANE 0
+#endif
+
unsigned int
testbi_cst (vector bool int vbi2)
{
- return vec_extract (vbi2, 12);
+ return vec_extract (vbi2, LANE);
}
signed int
testsi_cst (vector signed int vsi2)
{
- return vec_extract (vsi2, 12);
+ return vec_extract (vsi2, LANE);
}
unsigned int
testui_cst (vector unsigned int vui2)
{
- return vec_extract (vui2, 12);
+ return vec_extract (vui2, LANE);
}
@@ -38,22 +38,28 @@
#include <altivec.h>
+#ifdef __BIG_ENDIAN__
+#define LANE 15
+#else
+#define LANE 1
+#endif
+
unsigned short
testbi_cst (vector bool short vbs2)
{
- return vec_extract (vbs2, 12);
+ return vec_extract (vbs2, LANE);
}
signed short
testsi_cst (vector signed short vss2)
{
- return vec_extract (vss2, 12);
+ return vec_extract (vss2, LANE);
}
unsigned short
testui_cst12 (vector unsigned short vus2)
{
- return vec_extract (vus2, 12);
+ return vec_extract (vus2, LANE);
}
unsigned short
@@ -17,22 +17,28 @@
#include <altivec.h>
+#ifdef __BIG_ENDIAN__
+#define LANE 15
+#else
+#define LANE 1
+#endif
+
unsigned short
testbi_cst (vector bool short vbs2)
{
- return vec_extract (vbs2, 12);
+ return vec_extract (vbs2, LANE);
}
signed short
testsi_cst (vector signed short vss2)
{
- return vec_extract (vss2, 12);
+ return vec_extract (vss2, LANE);
}
unsigned short
testui_cst12 (vector unsigned short vus2)
{
- return vec_extract (vus2, 12);
+ return vec_extract (vus2, LANE);
}
unsigned short
@@ -9,123 +9,124 @@
#include <altivec.h>
+#ifdef __BIG_ENDIAN__
+#define DWORD1_FIRST_INT 3
+#define DWORD1_LAST_INT 2
+#define DWORD1_FIRST_SHORT 7
+#define DWORD1_LAST_SHORT 4
+#define DWORD1_FIRST_CHAR 15
+#define DWORD1_LAST_CHAR 8
+#else
+#define DWORD1_FIRST_INT 0
+#define DWORD1_LAST_INT 1
+#define DWORD1_FIRST_SHORT 0
+#define DWORD1_LAST_SHORT 3
+#define DWORD1_FIRST_CHAR 0
+#define DWORD1_LAST_CHAR 7
+#endif
+
int
extract_int_0 (vector int a)
{
- int c = 0;
- int b = vec_extract (a, c);
+ int b = vec_extract (a, DWORD1_FIRST_INT);
return b;
}
int
extract_int_3 (vector int a)
{
- int c = 3;
- int b = vec_extract (a, c);
+ int b = vec_extract (a, DWORD1_LAST_INT);
return b;
}
unsigned int
extract_uint_0 (vector unsigned int a)
{
- int c = 0;
- unsigned int b = vec_extract (a, c);
+ unsigned int b = vec_extract (a, DWORD1_FIRST_INT);
return b;
}
unsigned int
extract_uint_3 (vector unsigned int a)
{
- int c = 3;
- unsigned int b = vec_extract (a, c);
+ unsigned int b = vec_extract (a, DWORD1_LAST_INT);
return b;
}
short
extract_short_0 (vector short a)
{
- int c = 0;
- short b = vec_extract (a, c);
+ short b = vec_extract (a, DWORD1_FIRST_SHORT);
return b;
}
short
extract_short_7 (vector short a)
{
- int c = 7;
- short b = vec_extract (a, c);
+ short b = vec_extract (a, DWORD1_LAST_SHORT);
return b;
}
unsigned short
extract_ushort_0 (vector unsigned short a)
{
- int c = 0;
- unsigned short b = vec_extract (a, c);
+ unsigned short b = vec_extract (a, DWORD1_FIRST_SHORT);
return b;
}
unsigned short
extract_ushort_7 (vector unsigned short a)
{
- int c = 7;
- unsigned short b = vec_extract (a, c);
+ unsigned short b = vec_extract (a, DWORD1_LAST_SHORT);
return b;
}
signed char
extract_schar_0 (vector signed char a)
{
- int c = 0;
- signed char b = vec_extract (a, c);
+ signed char b = vec_extract (a, DWORD1_FIRST_CHAR);
return b;
}
signed char
extract_schar_15 (vector signed char a)
{
- int c = 15;
- signed char b = vec_extract (a, c);
+ signed char b = vec_extract (a, DWORD1_LAST_CHAR);
return b;
}
unsigned char
extract_uchar_0 (vector unsigned char a)
{
- int c = 0;
- unsigned char b = vec_extract (a, c);
+ unsigned char b = vec_extract (a, DWORD1_FIRST_CHAR);
return b;
}
unsigned char
extract_uchar_15 (vector unsigned char a)
{
- int c = 15;
- signed char b = vec_extract (a, c);
+ signed char b = vec_extract (a, DWORD1_LAST_CHAR);
return b;
}
unsigned char
extract_bool_char_0 (vector bool char a)
{
- int c = 0;
- unsigned char b = vec_extract (a, c);
+ unsigned char b = vec_extract (a, DWORD1_FIRST_CHAR);
return b;
}
unsigned int
extract_bool_int_0 (vector bool int a)
{
- int c = 0;
- unsigned int b = vec_extract (a, c);
+ unsigned int b = vec_extract (a, DWORD1_FIRST_INT);
return b;
}
unsigned short int
extract_bool_short_int_0 (vector bool short int a)
{
- int c = 0;
- unsigned short int b = vec_extract (a, c);
+ unsigned short int b = vec_extract (a, DWORD1_FIRST_SHORT);
return b;
}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr110331.h"
+
+/* { dg-final { scan-assembler-times {\mmfvsrd\M} 10 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mrldicl\M} 8 } } */
+/* { dg-final { scan-assembler-times "exts\[bhw\]" 6 } } */
+/* { dg-final { scan-assembler-not {\mvsplt} } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target has_arch_ppc64 } */
+
+#include "pr110331.h"
+
+/* { dg-final { scan-assembler-times {\mmfvsrd\M} 10 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mrldicl\M} 8 } } */
+/* { dg-final { scan-assembler-times "exts\[bhw\]" 6 } } */
+/* { dg-final { scan-assembler-not {\mvextu} } } */
new file mode 100644
@@ -0,0 +1,90 @@
+#include <altivec.h>
+
+#ifdef __BIG_ENDIAN__
+#define DWORD0_FIRST_INT 1
+#define DWORD0_LAST_INT 0
+#define DWORD0_FIRST_SHORT 3
+#define DWORD0_LAST_SHORT 0
+#define DWORD0_FIRST_CHAR 7
+#define DWORD0_LAST_CHAR 0
+#else
+#define DWORD0_FIRST_INT 2
+#define DWORD0_LAST_INT 3
+#define DWORD0_FIRST_SHORT 4
+#define DWORD0_LAST_SHORT 7
+#define DWORD0_FIRST_CHAR 8
+#define DWORD0_LAST_CHAR 15
+#endif
+
+/* mfvsrd, rldicl */
+unsigned char testuc_f (vector unsigned char v)
+{
+ return vec_extract (v, DWORD0_FIRST_CHAR);
+}
+
+/* mfvsrd, extsb */
+signed char testsc_f (vector signed char v)
+{
+ return vec_extract (v, DWORD0_FIRST_CHAR);
+}
+
+/* mfvsrd, rldicl */
+unsigned char testuc_l (vector unsigned char v)
+{
+ return vec_extract (v, DWORD0_LAST_CHAR);
+}
+
+/* mfvsrd, rldicl, extsb */
+signed char testsc_l (vector signed char v)
+{
+ return vec_extract (v, DWORD0_LAST_CHAR);
+}
+
+/* mfvsrd, rldicl */
+unsigned short testus_f (vector unsigned short v)
+{
+ return vec_extract (v, DWORD0_FIRST_SHORT);
+}
+
+/* mfvsrd, extsh */
+signed short testss_f (vector signed short v)
+{
+ return vec_extract (v, DWORD0_FIRST_SHORT);
+}
+
+/* mfvsrd, rldicl */
+unsigned short testus_l (vector unsigned short v)
+{
+ return vec_extract (v, DWORD0_LAST_SHORT);
+}
+
+/* mfvsrd, rldicl, extsh */
+signed short testss_l (vector signed short v)
+{
+ return vec_extract (v, DWORD0_LAST_SHORT);
+}
+
+/* mfvsrwz */
+unsigned int testui_f (vector unsigned int v)
+{
+ return vec_extract (v, DWORD0_FIRST_INT);
+}
+
+/* mfvsrwz, extsw */
+signed int testsi_f (vector signed int v)
+{
+ return vec_extract (v, DWORD0_FIRST_INT);
+}
+
+/* mfvsrd, rldicl */
+unsigned int testui_l (vector unsigned int v)
+{
+ return vec_extract (v, DWORD0_LAST_INT);
+}
+
+/* mfvsrd, rldicl, extsw */
+signed int testsi_l (vector signed int v)
+{
+ return vec_extract (v, DWORD0_LAST_INT);
+}
+