===================================================================
@@ -7519,8 +7519,52 @@ rs6000_split_vec_extract_var (rtx dest,
{
int bit_shift = byte_shift + 3;
rtx element2;
+ int dest_regno = regno_or_subregno (dest);
+ int src_regno = regno_or_subregno (src);
+ int element_regno = regno_or_subregno (element);
- gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
+ gcc_assert (REG_P (tmp_gpr));
+
+ /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
+ a general purpose register. */
+ if (TARGET_P9_VECTOR
+ && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ && INT_REGNO_P (dest_regno)
+ && ALTIVEC_REGNO_P (src_regno)
+ && INT_REGNO_P (element_regno))
+ {
+ rtx dest_si = gen_rtx_REG (SImode, dest_regno);
+ rtx element_si = gen_rtx_REG (SImode, element_regno);
+
+ if (mode == V16QImode)
+ emit_insn (VECTOR_ELT_ORDER_BIG
+ ? gen_vextublx (dest_si, element_si, src)
+ : gen_vextubrx (dest_si, element_si, src));
+
+ else if (mode == V8HImode)
+ {
+ rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
+ emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
+ emit_insn (VECTOR_ELT_ORDER_BIG
+ ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
+ : gen_vextuhrx (dest_si, tmp_gpr_si, src));
+ }
+
+
+ else
+ {
+ rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
+ emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
+ emit_insn (VECTOR_ELT_ORDER_BIG
+ ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
+ : gen_vextuwrx (dest_si, tmp_gpr_si, src));
+ }
+
+ return;
+ }
+
+
+ gcc_assert (REG_P (tmp_altivec));
/* For little endian, adjust element ordering. For V2DI/V2DF, we can use
an XOR, otherwise we need to subtract. The shift amount is so VSLO
===================================================================
@@ -119,13 +119,17 @@ (define_mode_attr VSr2 [(V2DF "wd")
(V4SF "wf")
(DF "ws")
(SF "ww")
- (DI "wi")])
+ (DI "wi")
+ (KF "wq")
+ (TF "wp")])
(define_mode_attr VSr3 [(V2DF "wa")
(V4SF "wa")
(DF "ws")
(SF "ww")
- (DI "wi")])
+ (DI "wi")
+ (KF "wq")
+ (TF "wp")])
;; Map the register class for sp<->dp float conversions, destination
(define_mode_attr VSr4 [(SF "ws")
@@ -298,6 +302,14 @@ (define_mode_iterator VSX_EXTRACT_FL [SF
|| (FLOAT128_IEEE_P (TFmode)
&& TARGET_FLOAT128_HW)")])
+;; Mode iterator for binary floating types that have a direct conversion
+;; from 64-bit integer to floating point
+(define_mode_iterator FL_CONV [SF
+ DF
+ (KF "TARGET_FLOAT128_HW")
+ (TF "TARGET_FLOAT128_HW
+ && FLOAT128_IEEE_P (TFmode)")])
+
;; Iterator for the 2 short vector types to do a splat from an integer
(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
@@ -2535,63 +2547,98 @@ (define_expand "vsx_extract_<mode>"
})
(define_insn "vsx_extract_<mode>_p9"
- [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>")
+ [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
(vec_select:<VS_scalar>
- (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
- (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))]
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
+ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
+ (clobber (match_scratch:SI 3 "=r,X"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
&& TARGET_VSX_SMALL_INTEGER"
{
- HOST_WIDE_INT elt = INTVAL (operands[2]);
- HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
- ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
- : elt);
-
- HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
- HOST_WIDE_INT offset = unit_size * elt_adj;
-
- operands[2] = GEN_INT (offset);
- if (unit_size == 4)
- return "xxextractuw %x0,%x1,%2";
+ if (which_alternative == 0)
+ return "#";
+
else
- return "vextractu<wd> %0,%1,%2";
+ {
+ HOST_WIDE_INT elt = INTVAL (operands[2]);
+ HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
+ ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
+ : elt);
+
+ HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
+ HOST_WIDE_INT offset = unit_size * elt_adj;
+
+ operands[2] = GEN_INT (offset);
+ if (unit_size == 4)
+ return "xxextractuw %x0,%x1,%2";
+ else
+ return "vextractu<wd> %0,%1,%2";
+ }
}
[(set_attr "type" "vecsimple")])
+(define_split
+ [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
+ (parallel [(match_operand:QI 2 "const_int_operand")])))
+ (clobber (match_operand:SI 3 "int_reg_operand"))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
+ && TARGET_VSX_SMALL_INTEGER && reload_completed"
+ [(const_int 0)]
+{
+ rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
+
+ emit_move_insn (op3, GEN_INT (offset));
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
+ else
+ emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
+ DONE;
+})
+
;; Optimize zero extracts to eliminate the AND after the extract.
(define_insn_and_split "*vsx_extract_<mode>_di_p9"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=<VSX_EX>")
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
(zero_extend:DI
(vec_select:<VS_scalar>
- (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
- (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))]
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
+ (clobber (match_scratch:SI 3 "=r,X"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
&& TARGET_VSX_SMALL_INTEGER"
"#"
"&& reload_completed"
- [(set (match_dup 3)
- (vec_select:<VS_scalar>
- (match_dup 1)
- (parallel [(match_dup 2)])))]
+ [(parallel [(set (match_dup 4)
+ (vec_select:<VS_scalar>
+ (match_dup 1)
+ (parallel [(match_dup 2)])))
+ (clobber (match_dup 3))])]
{
- operands[3] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
+ operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
})
;; Optimize stores to use the ISA 3.0 scalar store instructions
(define_insn_and_split "*vsx_extract_<mode>_store_p9"
- [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z")
+ [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
(vec_select:<VS_scalar>
- (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
- (parallel [(match_operand:QI 2 "const_int_operand" "n")])))
- (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>"))]
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,<VSX_EX>")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
+ (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
+ (clobber (match_scratch:SI 4 "=X,&r"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
&& TARGET_VSX_SMALL_INTEGER"
"#"
"&& reload_completed"
- [(set (match_dup 3)
- (vec_select:<VS_scalar>
- (match_dup 1)
- (parallel [(match_dup 2)])))
+ [(parallel [(set (match_dup 3)
+ (vec_select:<VS_scalar>
+ (match_dup 1)
+ (parallel [(match_dup 2)])))
+ (clobber (match_dup 4))])
(set (match_dup 0)
(match_dup 3))])
@@ -2721,13 +2768,13 @@ (define_insn_and_split "*vsx_extract_<mo
;; Variable V16QI/V8HI/V4SI extract
(define_insn_and_split "vsx_extract_<mode>_var"
- [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
+ [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
(unspec:<VS_scalar>
- [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,m")
- (match_operand:DI 2 "gpc_reg_operand" "r,r")]
+ [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
+ (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
UNSPEC_VSX_EXTRACT))
- (clobber (match_scratch:DI 3 "=r,&b"))
- (clobber (match_scratch:V2DI 4 "=&v,X"))]
+ (clobber (match_scratch:DI 3 "=r,r,&b"))
+ (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
@@ -2738,6 +2785,27 @@ (define_insn_and_split "vsx_extract_<mod
DONE;
})
+(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
+ [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
+ (zero_extend:SDI
+ (unspec:<VSX_EXTRACT_I:VS_scalar>
+ [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
+ (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
+ UNSPEC_VSX_EXTRACT)))
+ (clobber (match_scratch:DI 3 "=r,r,&b"))
+ (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
+ "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
+ rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
+ operands[1], operands[2],
+ operands[3], operands[4]);
+ DONE;
+})
+
;; VSX_EXTRACT optimizations
;; Optimize double d = (double) vec_extract (vi, <n>)
;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
@@ -2839,6 +2907,56 @@ (define_insn_and_split "*vsx_extract_si_
DONE;
})
+;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
+;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
+;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
+;; vector short or vector unsigned short.
+(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
+ [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
+ (float:FL_CONV
+ (vec_select:<VSX_EXTRACT_I:VS_scalar>
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+ (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
+ "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
+ && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 3)
+ (vec_select:<VSX_EXTRACT_I:VS_scalar>
+ (match_dup 1)
+ (parallel [(match_dup 2)])))
+ (clobber (scratch:SI))])
+ (set (match_dup 4)
+ (sign_extend:DI (match_dup 3)))
+ (set (match_dup 0)
+ (float:<FL_CONV:MODE> (match_dup 4)))]
+{
+ operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
+ [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
+ (unsigned_float:FL_CONV
+ (vec_select:<VSX_EXTRACT_I:VS_scalar>
+ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
+ (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
+ "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
+ && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 3)
+ (vec_select:<VSX_EXTRACT_I:VS_scalar>
+ (match_dup 1)
+ (parallel [(match_dup 2)])))
+ (clobber (scratch:SI))])
+ (set (match_dup 0)
+ (float:<FL_CONV:MODE> (match_dup 4)))]
+{
+ operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
;; V4SI/V8HI/V16QI set operation on ISA 3.0
(define_insn "vsx_set_<mode>_p9"
[(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
===================================================================
@@ -2,16 +2,53 @@
#include <stddef.h>
#include <altivec.h>
+#ifndef RTYPE
+#define RTYPE TYPE
+#endif
+
+#ifdef DO_TRACE
+#include <stdio.h>
+
+#define TRACE(STRING, NUM) \
+do \
+ { \
+ fprintf (stderr, "%s%s: %2d\n", (NUM == 0) ? "\n" : "", \
+ STRING, (int)NUM); \
+ fflush (stderr); \
+ } \
+while (0)
+
+#ifndef FAIL_FORMAT
+#define FAIL_FORMAT "%ld"
+#define FAIL_CAST(X) ((long)(X))
+#endif
+
+#define FAIL(EXP, GOT) \
+do \
+ { \
+ fprintf (stderr, "Expected: " FAIL_FORMAT ", got " FAIL_FORMAT "\n", \
+ FAIL_CAST (EXP), FAIL_CAST (GOT)); \
+ fflush (stderr); \
+ abort (); \
+ } \
+while (0)
+
+#else
+#define TRACE(STRING, NUM)
+#define FAIL(EXP, GOT) abort ()
+#endif
+
+static void check (RTYPE, RTYPE) __attribute__((__noinline__));
+static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__));
+static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__));
+
static void
-check (TYPE expected, TYPE got)
+check (RTYPE expected, RTYPE got)
{
if (expected != got)
- abort ();
+ FAIL (expected, got);
}
-static vector TYPE deoptimize (vector TYPE) __attribute__((__noinline__));
-static vector TYPE *deoptimize_ptr (vector TYPE *) __attribute__((__noinline__));
-
static vector TYPE
deoptimize (vector TYPE a)
{
@@ -29,116 +66,116 @@ deoptimize_ptr (vector TYPE *p)
/* Tests for the normal case of vec_extract where the vector is in a register
and returning the result in a register as a return value. */
-TYPE
+RTYPE
get_auto_n (vector TYPE a, ssize_t n)
{
- return vec_extract (a, n);
+ return (RTYPE) vec_extract (a, n);
}
-TYPE
+RTYPE
get_auto_0 (vector TYPE a)
{
- return vec_extract (a, 0);
+ return (RTYPE) vec_extract (a, 0);
}
-TYPE
+RTYPE
get_auto_1 (vector TYPE a)
{
- return vec_extract (a, 1);
+ return (RTYPE) vec_extract (a, 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_auto_2 (vector TYPE a)
{
- return vec_extract (a, 2);
+ return (RTYPE) vec_extract (a, 2);
}
-TYPE
+RTYPE
get_auto_3 (vector TYPE a)
{
- return vec_extract (a, 3);
+ return (RTYPE) vec_extract (a, 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_auto_4 (vector TYPE a)
{
- return vec_extract (a, 4);
+ return (RTYPE) vec_extract (a, 4);
}
-TYPE
+RTYPE
get_auto_5 (vector TYPE a)
{
- return vec_extract (a, 5);
+ return (RTYPE) vec_extract (a, 5);
}
-TYPE
+RTYPE
get_auto_6 (vector TYPE a)
{
- return vec_extract (a, 6);
+ return (RTYPE) vec_extract (a, 6);
}
-TYPE
+RTYPE
get_auto_7 (vector TYPE a)
{
- return vec_extract (a, 7);
+ return (RTYPE) vec_extract (a, 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_auto_8 (vector TYPE a)
{
- return vec_extract (a, 8);
+ return (RTYPE) vec_extract (a, 8);
}
-TYPE
+RTYPE
get_auto_9 (vector TYPE a)
{
- return vec_extract (a, 9);
+ return (RTYPE) vec_extract (a, 9);
}
-TYPE
+RTYPE
get_auto_10 (vector TYPE a)
{
- return vec_extract (a, 10);
+ return (RTYPE) vec_extract (a, 10);
}
-TYPE
+RTYPE
get_auto_11 (vector TYPE a)
{
- return vec_extract (a, 11);
+ return (RTYPE) vec_extract (a, 11);
}
-TYPE
+RTYPE
get_auto_12 (vector TYPE a)
{
- return vec_extract (a, 12);
+ return (RTYPE) vec_extract (a, 12);
}
-TYPE
+RTYPE
get_auto_13 (vector TYPE a)
{
- return vec_extract (a, 13);
+ return (RTYPE) vec_extract (a, 13);
}
-TYPE
+RTYPE
get_auto_14 (vector TYPE a)
{
- return vec_extract (a, 14);
+ return (RTYPE) vec_extract (a, 14);
}
-TYPE
+RTYPE
get_auto_15 (vector TYPE a)
{
- return vec_extract (a, 15);
+ return (RTYPE) vec_extract (a, 15);
}
#endif
#endif
#endif
-typedef TYPE (*auto_func_type) (vector TYPE);
+typedef RTYPE (*auto_func_type) (vector TYPE);
static auto_func_type get_auto_const[] = {
get_auto_0,
@@ -173,7 +210,10 @@ do_auto (vector TYPE a)
size_t i;
for (i = 0; i < sizeof (get_auto_const) / sizeof (get_auto_const[0]); i++)
- check (get_auto_n (a, i), (get_auto_const[i]) (a));
+ {
+ TRACE ("auto", i);
+ check (get_auto_n (a, i), (get_auto_const[i]) (a));
+ }
}
@@ -182,115 +222,115 @@ do_auto (vector TYPE a)
in the right position to use a scalar store). */
void
-get_store_n (TYPE *p, vector TYPE a, ssize_t n)
+get_store_n (RTYPE *p, vector TYPE a, ssize_t n)
{
- *p = vec_extract (a, n);
+ *p = (RTYPE) vec_extract (a, n);
}
void
-get_store_0 (TYPE *p, vector TYPE a)
+get_store_0 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 0);
+ *p = (RTYPE) vec_extract (a, 0);
}
void
-get_store_1 (TYPE *p, vector TYPE a)
+get_store_1 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 1);
+ *p = (RTYPE) vec_extract (a, 1);
}
#if ELEMENTS >= 4
void
-get_store_2 (TYPE *p, vector TYPE a)
+get_store_2 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 2);
+ *p = (RTYPE) vec_extract (a, 2);
}
void
-get_store_3 (TYPE *p, vector TYPE a)
+get_store_3 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 3);
+ *p = (RTYPE) vec_extract (a, 3);
}
#if ELEMENTS >= 8
void
-get_store_4 (TYPE *p, vector TYPE a)
+get_store_4 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 4);
+ *p = (RTYPE) vec_extract (a, 4);
}
void
-get_store_5 (TYPE *p, vector TYPE a)
+get_store_5 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 5);
+ *p = (RTYPE) vec_extract (a, 5);
}
void
-get_store_6 (TYPE *p, vector TYPE a)
+get_store_6 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 6);
+ *p = (RTYPE) vec_extract (a, 6);
}
void
-get_store_7 (TYPE *p, vector TYPE a)
+get_store_7 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 7);
+ *p = (RTYPE) vec_extract (a, 7);
}
#if ELEMENTS >= 16
void
-get_store_8 (TYPE *p, vector TYPE a)
+get_store_8 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 8);
+ *p = (RTYPE) vec_extract (a, 8);
}
void
-get_store_9 (TYPE *p, vector TYPE a)
+get_store_9 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 9);
+ *p = (RTYPE) vec_extract (a, 9);
}
void
-get_store_10 (TYPE *p, vector TYPE a)
+get_store_10 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 10);
+ *p = (RTYPE) vec_extract (a, 10);
}
void
-get_store_11 (TYPE *p, vector TYPE a)
+get_store_11 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 11);
+ *p = (RTYPE) vec_extract (a, 11);
}
void
-get_store_12 (TYPE *p, vector TYPE a)
+get_store_12 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 12);
+ *p = (RTYPE) vec_extract (a, 12);
}
void
-get_store_13 (TYPE *p, vector TYPE a)
+get_store_13 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 13);
+ *p = (RTYPE) vec_extract (a, 13);
}
void
-get_store_14 (TYPE *p, vector TYPE a)
+get_store_14 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 14);
+ *p = (RTYPE) vec_extract (a, 14);
}
void
-get_store_15 (TYPE *p, vector TYPE a)
+get_store_15 (RTYPE *p, vector TYPE a)
{
- *p = vec_extract (a, 15);
+ *p = (RTYPE) vec_extract (a, 15);
}
#endif
#endif
#endif
-typedef void (*store_func_type) (TYPE *, vector TYPE);
+typedef void (*store_func_type) (RTYPE *, vector TYPE);
static store_func_type get_store_const[] = {
get_store_0,
@@ -323,10 +363,11 @@ void
do_store (vector TYPE a)
{
size_t i;
- TYPE result_var, result_const;
+ RTYPE result_var, result_const;
for (i = 0; i < sizeof (get_store_const) / sizeof (get_store_const[0]); i++)
{
+ TRACE ("store", i);
get_store_n (&result_var, a, i);
(get_store_const[i]) (&result_const, a);
check (result_var, result_const);
@@ -337,116 +378,116 @@ do_store (vector TYPE a)
/* Tests for vec_extract where the vector comes from memory (the compiler can
optimize this by doing a scalar load without having to load the whole
vector). */
-TYPE
+RTYPE
get_pointer_n (vector TYPE *p, ssize_t n)
{
- return vec_extract (*p, n);
+ return (RTYPE) vec_extract (*p, n);
}
-TYPE
+RTYPE
get_pointer_0 (vector TYPE *p)
{
- return vec_extract (*p, 0);
+ return (RTYPE) vec_extract (*p, 0);
}
-TYPE
+RTYPE
get_pointer_1 (vector TYPE *p)
{
- return vec_extract (*p, 1);
+ return (RTYPE) vec_extract (*p, 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_pointer_2 (vector TYPE *p)
{
- return vec_extract (*p, 2);
+ return (RTYPE) vec_extract (*p, 2);
}
-TYPE
+RTYPE
get_pointer_3 (vector TYPE *p)
{
- return vec_extract (*p, 3);
+ return (RTYPE) vec_extract (*p, 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_pointer_4 (vector TYPE *p)
{
- return vec_extract (*p, 4);
+ return (RTYPE) vec_extract (*p, 4);
}
-static TYPE
+RTYPE
get_pointer_5 (vector TYPE *p)
{
- return vec_extract (*p, 5);
+ return (RTYPE) vec_extract (*p, 5);
}
-TYPE
+RTYPE
get_pointer_6 (vector TYPE *p)
{
- return vec_extract (*p, 6);
+ return (RTYPE) vec_extract (*p, 6);
}
-TYPE
+RTYPE
get_pointer_7 (vector TYPE *p)
{
- return vec_extract (*p, 7);
+ return (RTYPE) vec_extract (*p, 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_pointer_8 (vector TYPE *p)
{
- return vec_extract (*p, 8);
+ return (RTYPE) vec_extract (*p, 8);
}
-TYPE
+RTYPE
get_pointer_9 (vector TYPE *p)
{
- return vec_extract (*p, 9);
+ return (RTYPE) vec_extract (*p, 9);
}
-TYPE
+RTYPE
get_pointer_10 (vector TYPE *p)
{
- return vec_extract (*p, 10);
+ return (RTYPE) vec_extract (*p, 10);
}
-TYPE
+RTYPE
get_pointer_11 (vector TYPE *p)
{
- return vec_extract (*p, 11);
+ return (RTYPE) vec_extract (*p, 11);
}
-TYPE
+RTYPE
get_pointer_12 (vector TYPE *p)
{
- return vec_extract (*p, 12);
+ return (RTYPE) vec_extract (*p, 12);
}
-TYPE
+RTYPE
get_pointer_13 (vector TYPE *p)
{
- return vec_extract (*p, 13);
+ return (RTYPE) vec_extract (*p, 13);
}
-TYPE
+RTYPE
get_pointer_14 (vector TYPE *p)
{
- return vec_extract (*p, 14);
+ return (RTYPE) vec_extract (*p, 14);
}
-TYPE
+RTYPE
get_pointer_15 (vector TYPE *p)
{
- return vec_extract (*p, 15);
+ return (RTYPE) vec_extract (*p, 15);
}
#endif
#endif
#endif
-typedef TYPE (*pointer_func_type) (vector TYPE *);
+typedef RTYPE (*pointer_func_type) (vector TYPE *);
static pointer_func_type get_pointer_const[] = {
get_pointer_0,
@@ -481,7 +522,10 @@ do_pointer (vector TYPE *p)
size_t i;
for (i = 0; i < sizeof (get_pointer_const) / sizeof (get_pointer_const[0]); i++)
- check (get_pointer_n (p, i), (get_pointer_const[i]) (p));
+ {
+ TRACE ("pointer", i);
+ check (get_pointer_n (p, i), (get_pointer_const[i]) (p));
+ }
}
@@ -489,116 +533,116 @@ do_pointer (vector TYPE *p)
operation. This is to make sure that if the compiler optimizes vec_extract
from memory to be a scalar load, the address is correctly adjusted. */
-TYPE
+RTYPE
get_indexed_n (vector TYPE *p, size_t x, ssize_t n)
{
- return vec_extract (p[x], n);
+ return (RTYPE) vec_extract (p[x], n);
}
-TYPE
+RTYPE
get_indexed_0 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 0);
+ return (RTYPE) vec_extract (p[x], 0);
}
-TYPE
+RTYPE
get_indexed_1 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 1);
+ return (RTYPE) vec_extract (p[x], 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_indexed_2 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 2);
+ return (RTYPE) vec_extract (p[x], 2);
}
-TYPE
+RTYPE
get_indexed_3 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 3);
+ return (RTYPE) vec_extract (p[x], 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_indexed_4 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 4);
+ return (RTYPE) vec_extract (p[x], 4);
}
-static TYPE
+RTYPE
get_indexed_5 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 5);
+ return (RTYPE) vec_extract (p[x], 5);
}
-TYPE
+RTYPE
get_indexed_6 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 6);
+ return (RTYPE) vec_extract (p[x], 6);
}
-TYPE
+RTYPE
get_indexed_7 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 7);
+ return (RTYPE) vec_extract (p[x], 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_indexed_8 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 8);
+ return (RTYPE) vec_extract (p[x], 8);
}
-TYPE
+RTYPE
get_indexed_9 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 9);
+ return (RTYPE) vec_extract (p[x], 9);
}
-TYPE
+RTYPE
get_indexed_10 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 10);
+ return (RTYPE) vec_extract (p[x], 10);
}
-TYPE
+RTYPE
get_indexed_11 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 11);
+ return (RTYPE) vec_extract (p[x], 11);
}
-TYPE
+RTYPE
get_indexed_12 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 12);
+ return (RTYPE) vec_extract (p[x], 12);
}
-TYPE
+RTYPE
get_indexed_13 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 13);
+ return (RTYPE) vec_extract (p[x], 13);
}
-TYPE
+RTYPE
get_indexed_14 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 14);
+ return (RTYPE) vec_extract (p[x], 14);
}
-TYPE
+RTYPE
get_indexed_15 (vector TYPE *p, size_t x)
{
- return vec_extract (p[x], 15);
+ return (RTYPE) vec_extract (p[x], 15);
}
#endif
#endif
#endif
-typedef TYPE (*indexed_func_type) (vector TYPE *, size_t);
+typedef RTYPE (*indexed_func_type) (vector TYPE *, size_t);
static indexed_func_type get_indexed_const[] = {
get_indexed_0,
@@ -633,7 +677,10 @@ do_indexed (vector TYPE *p, size_t x)
size_t i;
for (i = 0; i < sizeof (get_indexed_const) / sizeof (get_indexed_const[0]); i++)
- check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x));
+ {
+ TRACE ("indexed", i);
+ check (get_indexed_n (p, x, i), (get_indexed_const[i]) (p, x));
+ }
}
@@ -641,116 +688,116 @@ do_indexed (vector TYPE *p, size_t x)
with a pointer and a constant offset. This will occur in ISA 3.0 which
added d-form memory addressing for vectors. */
-TYPE
+RTYPE
get_ptr_plus1_n (vector TYPE *p, ssize_t n)
{
- return vec_extract (p[1], n);
+ return (RTYPE) vec_extract (p[1], n);
}
-TYPE
+RTYPE
get_ptr_plus1_0 (vector TYPE *p)
{
- return vec_extract (p[1], 0);
+ return (RTYPE) vec_extract (p[1], 0);
}
-TYPE
+RTYPE
get_ptr_plus1_1 (vector TYPE *p)
{
- return vec_extract (p[1], 1);
+ return (RTYPE) vec_extract (p[1], 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_ptr_plus1_2 (vector TYPE *p)
{
- return vec_extract (p[1], 2);
+ return (RTYPE) vec_extract (p[1], 2);
}
-TYPE
+RTYPE
get_ptr_plus1_3 (vector TYPE *p)
{
- return vec_extract (p[1], 3);
+ return (RTYPE) vec_extract (p[1], 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_ptr_plus1_4 (vector TYPE *p)
{
- return vec_extract (p[1], 4);
+ return (RTYPE) vec_extract (p[1], 4);
}
-static TYPE
+RTYPE
get_ptr_plus1_5 (vector TYPE *p)
{
- return vec_extract (p[1], 5);
+ return (RTYPE) vec_extract (p[1], 5);
}
-TYPE
+RTYPE
get_ptr_plus1_6 (vector TYPE *p)
{
- return vec_extract (p[1], 6);
+ return (RTYPE) vec_extract (p[1], 6);
}
-TYPE
+RTYPE
get_ptr_plus1_7 (vector TYPE *p)
{
- return vec_extract (p[1], 7);
+ return (RTYPE) vec_extract (p[1], 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_ptr_plus1_8 (vector TYPE *p)
{
- return vec_extract (p[1], 8);
+ return (RTYPE) vec_extract (p[1], 8);
}
-TYPE
+RTYPE
get_ptr_plus1_9 (vector TYPE *p)
{
- return vec_extract (p[1], 9);
+ return (RTYPE) vec_extract (p[1], 9);
}
-TYPE
+RTYPE
get_ptr_plus1_10 (vector TYPE *p)
{
- return vec_extract (p[1], 10);
+ return (RTYPE) vec_extract (p[1], 10);
}
-TYPE
+RTYPE
get_ptr_plus1_11 (vector TYPE *p)
{
- return vec_extract (p[1], 11);
+ return (RTYPE) vec_extract (p[1], 11);
}
-TYPE
+RTYPE
get_ptr_plus1_12 (vector TYPE *p)
{
- return vec_extract (p[1], 12);
+ return (RTYPE) vec_extract (p[1], 12);
}
-TYPE
+RTYPE
get_ptr_plus1_13 (vector TYPE *p)
{
- return vec_extract (p[1], 13);
+ return (RTYPE) vec_extract (p[1], 13);
}
-TYPE
+RTYPE
get_ptr_plus1_14 (vector TYPE *p)
{
- return vec_extract (p[1], 14);
+ return (RTYPE) vec_extract (p[1], 14);
}
-TYPE
+RTYPE
get_ptr_plus1_15 (vector TYPE *p)
{
- return vec_extract (p[1], 15);
+ return (RTYPE) vec_extract (p[1], 15);
}
#endif
#endif
#endif
-typedef TYPE (*pointer_func_type) (vector TYPE *);
+typedef RTYPE (*pointer_func_type) (vector TYPE *);
static pointer_func_type get_ptr_plus1_const[] = {
get_ptr_plus1_0,
@@ -785,7 +832,10 @@ do_ptr_plus1 (vector TYPE *p)
size_t i;
for (i = 0; i < sizeof (get_ptr_plus1_const) / sizeof (get_ptr_plus1_const[0]); i++)
- check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p));
+ {
+ TRACE ("ptr_plus1", i);
+ check (get_ptr_plus1_n (p, i), (get_ptr_plus1_const[i]) (p));
+ }
}
@@ -793,116 +843,116 @@ do_ptr_plus1 (vector TYPE *p)
static vector TYPE s;
-TYPE
+RTYPE
get_static_n (ssize_t n)
{
- return vec_extract (s, n);
+ return (RTYPE) vec_extract (s, n);
}
-TYPE
+RTYPE
get_static_0 (void)
{
- return vec_extract (s, 0);
+ return (RTYPE) vec_extract (s, 0);
}
-TYPE
+RTYPE
get_static_1 (void)
{
- return vec_extract (s, 1);
+ return (RTYPE) vec_extract (s, 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_static_2 (void)
{
- return vec_extract (s, 2);
+ return (RTYPE) vec_extract (s, 2);
}
-TYPE
+RTYPE
get_static_3 (void)
{
- return vec_extract (s, 3);
+ return (RTYPE) vec_extract (s, 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_static_4 (void)
{
- return vec_extract (s, 4);
+ return (RTYPE) vec_extract (s, 4);
}
-TYPE
+RTYPE
get_static_5 (void)
{
- return vec_extract (s, 5);
+ return (RTYPE) vec_extract (s, 5);
}
-TYPE
+RTYPE
get_static_6 (void)
{
- return vec_extract (s, 6);
+ return (RTYPE) vec_extract (s, 6);
}
-TYPE
+RTYPE
get_static_7 (void)
{
- return vec_extract (s, 7);
+ return (RTYPE) vec_extract (s, 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_static_8 (void)
{
- return vec_extract (s, 8);
+ return (RTYPE) vec_extract (s, 8);
}
-TYPE
+RTYPE
get_static_9 (void)
{
- return vec_extract (s, 9);
+ return (RTYPE) vec_extract (s, 9);
}
-TYPE
+RTYPE
get_static_10 (void)
{
- return vec_extract (s, 10);
+ return (RTYPE) vec_extract (s, 10);
}
-TYPE
+RTYPE
get_static_11 (void)
{
- return vec_extract (s, 11);
+ return (RTYPE) vec_extract (s, 11);
}
-TYPE
+RTYPE
get_static_12 (void)
{
- return vec_extract (s, 12);
+ return (RTYPE) vec_extract (s, 12);
}
-TYPE
+RTYPE
get_static_13 (void)
{
- return vec_extract (s, 13);
+ return (RTYPE) vec_extract (s, 13);
}
-TYPE
+RTYPE
get_static_14 (void)
{
- return vec_extract (s, 14);
+ return (RTYPE) vec_extract (s, 14);
}
-TYPE
+RTYPE
get_static_15 (void)
{
- return vec_extract (s, 15);
+ return (RTYPE) vec_extract (s, 15);
}
#endif
#endif
#endif
-typedef TYPE (*static_func_type) (void);
+typedef RTYPE (*static_func_type) (void);
static static_func_type get_static_const[] = {
get_static_0,
@@ -937,7 +987,10 @@ do_static (void)
size_t i;
for (i = 0; i < sizeof (get_static_const) / sizeof (get_static_const[0]); i++)
- check (get_static_n (i), (get_static_const[i]) ());
+ {
+ TRACE ("static", i);
+ check (get_static_n (i), (get_static_const[i]) ());
+ }
}
@@ -945,116 +998,116 @@ do_static (void)
vector TYPE g;
-TYPE
+RTYPE
get_global_n (ssize_t n)
{
- return vec_extract (g, n);
+ return (RTYPE) vec_extract (g, n);
}
-TYPE
+RTYPE
get_global_0 (void)
{
- return vec_extract (g, 0);
+ return (RTYPE) vec_extract (g, 0);
}
-TYPE
+RTYPE
get_global_1 (void)
{
- return vec_extract (g, 1);
+ return (RTYPE) vec_extract (g, 1);
}
#if ELEMENTS >= 4
-TYPE
+RTYPE
get_global_2 (void)
{
- return vec_extract (g, 2);
+ return (RTYPE) vec_extract (g, 2);
}
-TYPE
+RTYPE
get_global_3 (void)
{
- return vec_extract (g, 3);
+ return (RTYPE) vec_extract (g, 3);
}
#if ELEMENTS >= 8
-TYPE
+RTYPE
get_global_4 (void)
{
- return vec_extract (g, 4);
+ return (RTYPE) vec_extract (g, 4);
}
-TYPE
+RTYPE
get_global_5 (void)
{
- return vec_extract (g, 5);
+ return (RTYPE) vec_extract (g, 5);
}
-TYPE
+RTYPE
get_global_6 (void)
{
- return vec_extract (g, 6);
+ return (RTYPE) vec_extract (g, 6);
}
-TYPE
+RTYPE
get_global_7 (void)
{
- return vec_extract (g, 7);
+ return (RTYPE) vec_extract (g, 7);
}
#if ELEMENTS >= 16
-TYPE
+RTYPE
get_global_8 (void)
{
- return vec_extract (g, 8);
+ return (RTYPE) vec_extract (g, 8);
}
-TYPE
+RTYPE
get_global_9 (void)
{
- return vec_extract (g, 9);
+ return (RTYPE) vec_extract (g, 9);
}
-TYPE
+RTYPE
get_global_10 (void)
{
- return vec_extract (g, 10);
+ return (RTYPE) vec_extract (g, 10);
}
-TYPE
+RTYPE
get_global_11 (void)
{
- return vec_extract (g, 11);
+ return (RTYPE) vec_extract (g, 11);
}
-TYPE
+RTYPE
get_global_12 (void)
{
- return vec_extract (g, 12);
+ return (RTYPE) vec_extract (g, 12);
}
-TYPE
+RTYPE
get_global_13 (void)
{
- return vec_extract (g, 13);
+ return (RTYPE) vec_extract (g, 13);
}
-TYPE
+RTYPE
get_global_14 (void)
{
- return vec_extract (g, 14);
+ return (RTYPE) vec_extract (g, 14);
}
-TYPE
+RTYPE
get_global_15 (void)
{
- return vec_extract (g, 15);
+ return (RTYPE) vec_extract (g, 15);
}
#endif
#endif
#endif
-typedef TYPE (*global_func_type) (void);
+typedef RTYPE (*global_func_type) (void);
static global_func_type get_global_const[] = {
get_global_0,
@@ -1089,7 +1142,10 @@ do_global (void)
size_t i;
for (i = 0; i < sizeof (get_global_const) / sizeof (get_global_const[0]); i++)
- check (get_global_n (i), (get_global_const[i]) ());
+ {
+ TRACE ("global", i);
+ check (get_global_n (i), (get_global_const[i]) ());
+ }
}
===================================================================
@@ -3,6 +3,8 @@
/* { dg-options "-O2 -mvsx" } */
#define TYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
#define ELEMENTS 2
#define INITIAL { 10.0, -20.0 }
===================================================================
@@ -3,6 +3,8 @@
/* { dg-options "-O2 -mvsx" } */
#define TYPE float
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
#define ELEMENTS 4
#define INITIAL { 10.0f, -20.0f, 30.0f, -40.0f }
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE int
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 4
+#define INITIAL { 10, -20, 30, -40 }
+
+#include "vec-extract.h"
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE unsigned int
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 4
+#define INITIAL { 1, 2, 0xff03, 0xff04 }
+
+#include "vec-extract.h"
===================================================================
@@ -0,0 +1,13 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE unsigned char
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 16
+#define INITIAL \
+ { 1, 2, 3, 4, 5, 6, 7, 8, 240, 241, 242, 243, 244, 245, 246, 247 }
+
+#include "vec-extract.h"
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE signed char
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 16
+#define INITIAL \
+ { 10, -20, 30, -40, 50, -60, 70, -80, \
+ 90, -100, 110, -120, 30, -40, 50, -60 }
+
+#include "vec-extract.h"
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE unsigned short
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 8
+#define INITIAL { 1, 2, 3, 4, 0xf1, 0xf2, 0xf3, 0xf4 }
+
+#include "vec-extract.h"
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#define TYPE short
+#define RTYPE double
+#define FAIL_FORMAT "%g"
+#define FAIL_CAST(X) ((double)(X))
+#define ELEMENTS 8
+#define INITIAL { 10, -20, 30, -40, 50, -60, 70, 80 }
+
+#include "vec-extract.h"
===================================================================
@@ -3,24 +3,107 @@
/* { dg-require-effective-target powerpc_p9vector_ok } */
/* { dg-options "-mcpu=power9 -O2" } */
+/* Test to make sure VEXTU{B,H,W}{L,R}X is generated for various vector extract
+ operations for ISA 3.0 (-mcpu=power9). In addition, make sure that neither
+ of the the the old methods of doing vector extracts are done either by
+ explict stores to the stack or by using direct move instructions. */
+
#include <altivec.h>
-int extract_int_0 (vector int a) { return vec_extract (a, 0); }
-int extract_int_3 (vector int a) { return vec_extract (a, 3); }
+int
+extract_int_0 (vector int a)
+{
+ int b = vec_extract (a, 0);
+ return b;
+}
+
+int
+extract_int_3 (vector int a)
+{
+ int b = vec_extract (a, 3);
+ return b;
+}
+
+unsigned int
+extract_uint_0 (vector unsigned int a)
+{
+ unsigned int b = vec_extract (a, 0);
+ return b;
+}
+
+unsigned int
+extract_uint_3 (vector unsigned int a)
+{
+ unsigned int b = vec_extract (a, 3);
+ return b;
+}
+
+short
+extract_short_0 (vector short a)
+{
+ short b = vec_extract (a, 0);
+ return b;
+}
+
+short
+extract_short_7 (vector short a)
+{
+ short b = vec_extract (a, 7);
+ return b;
+}
+
+unsigned short
+extract_ushort_0 (vector unsigned short a)
+{
+ unsigned short b = vec_extract (a, 0);
+ return b;
+}
+
+unsigned short
+extract_ushort_7 (vector unsigned short a)
+{
+ unsigned short b = vec_extract (a, 7);
+ return b;
+}
+
+signed char
+extract_schar_0 (vector signed char a)
+{
+ signed char b = vec_extract (a, 0);
+ return b;
+}
+
+signed char
+extract_schar_15 (vector signed char a)
+{
+ signed char b = vec_extract (a, 15);
+ return b;
+}
-int extract_short_0 (vector short a) { return vec_extract (a, 0); }
-int extract_short_3 (vector short a) { return vec_extract (a, 7); }
+unsigned char
+extract_uchar_0 (vector unsigned char a)
+{
+ unsigned char b = vec_extract (a, 0);
+ return b;
+}
-int extract_schar_0 (vector signed char a) { return vec_extract (a, 0); }
-int extract_schar_3 (vector signed char a) { return vec_extract (a, 15); }
+unsigned char
+extract_uchar_15 (vector unsigned char a)
+{
+ signed char b = vec_extract (a, 15);
+ return b;
+}
-/* { dg-final { scan-assembler "vextractub" } } */
-/* { dg-final { scan-assembler "vextractuh" } } */
-/* { dg-final { scan-assembler "xxextractuw" } } */
-/* { dg-final { scan-assembler "mfvsr" } } */
-/* { dg-final { scan-assembler-not "stxvd2x" } } */
-/* { dg-final { scan-assembler-not "stxv" } } */
-/* { dg-final { scan-assembler-not "lwa" } } */
-/* { dg-final { scan-assembler-not "lwz" } } */
-/* { dg-final { scan-assembler-not "lha" } } */
-/* { dg-final { scan-assembler-not "lhz" } } */
+/* { dg-final { scan-assembler "vextub\[lr\]x " } } */
+/* { dg-final { scan-assembler "vextuh\[lr\]x " } } */
+/* { dg-final { scan-assembler "vextuw\[lr\]x " } } */
+/* { dg-final { scan-assembler "extsb " } } */
+/* { dg-final { scan-assembler "extsh " } } */
+/* { dg-final { scan-assembler "extsw " } } */
+/* { dg-final { scan-assembler-not "m\[ft\]vsr" } } */
+/* { dg-final { scan-assembler-not "stxvd2x " } } */
+/* { dg-final { scan-assembler-not "stxv " } } */
+/* { dg-final { scan-assembler-not "lwa " } } */
+/* { dg-final { scan-assembler-not "lwz " } } */
+/* { dg-final { scan-assembler-not "lha " } } */
+/* { dg-final { scan-assembler-not "lhz " } } */
===================================================================
@@ -0,0 +1,108 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+/* Test that under ISA 3.0 (-mcpu=power9), the compiler optimizes conversion to
+ double after a vec_extract to use the VEXTRACTU{B,H} or XXEXTRACTUW
+ instructions (which leaves the result in a vector register), and not the
+ VEXTU{B,H,W}{L,R}X instructions (which needs a direct move to do the floating
+ point conversion). */
+
+#include <altivec.h>
+
+double
+fpcvt_int_0 (vector int a)
+{
+ int b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_int_3 (vector int a)
+{
+ int b = vec_extract (a, 3);
+ return (double)b;
+}
+
+double
+fpcvt_uint_0 (vector unsigned int a)
+{
+ unsigned int b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_uint_3 (vector unsigned int a)
+{
+ unsigned int b = vec_extract (a, 3);
+ return (double)b;
+}
+
+double
+fpcvt_short_0 (vector short a)
+{
+ short b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_short_7 (vector short a)
+{
+ short b = vec_extract (a, 7);
+ return (double)b;
+}
+
+double
+fpcvt_ushort_0 (vector unsigned short a)
+{
+ unsigned short b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_ushort_7 (vector unsigned short a)
+{
+ unsigned short b = vec_extract (a, 7);
+ return (double)b;
+}
+
+double
+fpcvt_schar_0 (vector signed char a)
+{
+ signed char b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_schar_15 (vector signed char a)
+{
+ signed char b = vec_extract (a, 15);
+ return (double)b;
+}
+
+double
+fpcvt_uchar_0 (vector unsigned char a)
+{
+ unsigned char b = vec_extract (a, 0);
+ return (double)b;
+}
+
+double
+fpcvt_uchar_15 (vector unsigned char a)
+{
+ signed char b = vec_extract (a, 15);
+ return (double)b;
+}
+
+/* { dg-final { scan-assembler "vextractu\[bh\] " } } */
+/* { dg-final { scan-assembler "vexts\[bh\]2d " } } */
+/* { dg-final { scan-assembler "vspltw " } } */
+/* { dg-final { scan-assembler "xscvsxddp " } } */
+/* { dg-final { scan-assembler "xvcvsxwdp " } } */
+/* { dg-final { scan-assembler "xvcvuxwdp " } } */
+/* { dg-final { scan-assembler-not "exts\[bhw\] " } } */
+/* { dg-final { scan-assembler-not "stxv" } } */
+/* { dg-final { scan-assembler-not "m\[ft\]vsrd " } } */
+/* { dg-final { scan-assembler-not "m\[ft\]vsrw\[az\] " } } */
+/* { dg-final { scan-assembler-not "l\[hw\]\[az\] " } } */