===================================================================
@@ -210,6 +210,11 @@ (define_predicate "const_0_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+;; Match op = 0..11
+(define_predicate "const_0_to_11_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 11)")))
+
;; Match op = 0..15
(define_predicate "const_0_to_15_operand"
(and (match_code "const_int")
===================================================================
@@ -877,7 +877,16 @@
"__builtin_vsx_" NAME, /* NAME */ \
RS6000_BTM_P9_VECTOR, /* MASK */ \
(RS6000_BTC_ ## ATTR /* ATTR */ \
- | RS6000_BTC_BINARY), \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_P9V_64BIT_VSX_3(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_vsx_" NAME, /* NAME */ \
+ (RS6000_BTM_64BIT \
+ | RS6000_BTM_P9_VECTOR), /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
/* See the comment on BU_ALTIVEC_P. */
@@ -1967,6 +1976,11 @@ BU_P9V_AV_2 (VEXTUHRX, "vextuhrx", CONS
BU_P9V_AV_2 (VEXTUWLX, "vextuwlx", CONST, vextuwlx)
BU_P9V_AV_2 (VEXTUWRX, "vextuwrx", CONST, vextuwrx)
+/* Insert/extract 4 byte word into a vector. */
+BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b)
+BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b)
+BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di)
+
/* 3 argument vector functions returning void, treated as SPECIAL,
added in ISA 3.0 (power9). */
BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC)
@@ -2008,12 +2022,13 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", C
/* ISA 3.0 Vector scalar overloaded 2 argument functions */
BU_P9V_OVERLOAD_2 (LXVL, "lxvl")
+BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx")
+BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx")
+BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b")
/* ISA 3.0 Vector scalar overloaded 3 argument functions */
BU_P9V_OVERLOAD_3 (STXVL, "stxvl")
-
-BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx")
-BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx")
+BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b")
/* Overloaded CMPNE support was implemented prior to Power 9,
so is not mentioned here. */
===================================================================
@@ -4682,6 +4682,11 @@ const struct altivec_builtin_types altiv
{ P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB,
RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
+ RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
+ { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
+ RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+
{ P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX,
RS6000_BTI_INTQI, RS6000_BTI_UINTSI,
RS6000_BTI_V16QI, 0 },
@@ -4735,6 +4740,28 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
+ RS6000_BTI_V16QI, RS6000_BTI_V4SI,
+ RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
+ RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI,
+ RS6000_BTI_V16QI, RS6000_BTI_INTDI,
+ RS6000_BTI_V16QI, RS6000_BTI_UINTDI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI,
+ RS6000_BTI_V16QI, RS6000_BTI_UINTDI,
+ RS6000_BTI_V16QI, RS6000_BTI_UINTDI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTDI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI },
+ { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI },
+
{ P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
{ P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
===================================================================
@@ -15546,7 +15546,7 @@ altivec_expand_builtin (tree exp, rtx ta
size_t i;
enum insn_code icode;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0;
+ tree arg0, arg1, arg2;
rtx op0, pat;
machine_mode tmode, mode0;
enum rs6000_builtins fcode
@@ -15766,6 +15766,40 @@ altivec_expand_builtin (tree exp, rtx ta
case VSX_BUILTIN_VEC_EXT_V1TI:
return altivec_expand_vec_ext_builtin (exp, target);
+ case P9V_BUILTIN_VEXTRACT4B:
+ case P9V_BUILTIN_VEC_VEXTRACT4B:
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ STRIP_NOPS (arg1);
+
+ /* Generate a normal call if it is invalid. */
+ /* If we got invalid arguments bail out before generating bad rtl. */
+ if (arg1 == error_mark_node)
+ return expand_call (exp, target, false);
+
+ if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 11)
+ {
+ error ("second argument to vec_vextract4b must 0..11");
+ return expand_call (exp, target, false);
+ }
+ break;
+
+ case P9V_BUILTIN_VINSERT4B:
+ case P9V_BUILTIN_VINSERT4B_DI:
+ case P9V_BUILTIN_VEC_VINSERT4B:
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ STRIP_NOPS (arg2);
+
+ /* If we got invalid arguments bail out before generating bad rtl. */
+ if (arg2 == error_mark_node)
+ return expand_call (exp, target, false);
+
+ if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 11)
+ {
+ error ("third argument to vec_vinsert4b must 0..11");
+ return expand_call (exp, target, false);
+ }
+ break;
+
default:
break;
/* Fall through. */
===================================================================
@@ -366,6 +366,8 @@ (define_c_enum "unspec"
UNSPEC_VCMPNEZH
UNSPEC_VCMPNEW
UNSPEC_VCMPNEZW
+ UNSPEC_XXEXTRACTUW
+ UNSPEC_XXINSERTW
])
;; VSX moves
@@ -3686,3 +3688,94 @@ (define_insn "vextuwrx"
"TARGET_P9_VECTOR"
"vextuwrx %0,%1,%2"
[(set_attr "type" "vecsimple")])
+
+;; Vector insert/extract word at arbitrary byte values. Note, the little
+;; endian version needs to adjust the byte number, and the V4SI element in
+;; vinsert4b.
+(define_expand "vextract4b"
+ [(set (match_operand:DI 0 "gpc_reg_operand")
+ (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
+ (match_operand:QI 2 "const_0_to_11_operand")]
+ UNSPEC_XXEXTRACTUW))]
+ "TARGET_P9_VECTOR"
+{
+ if (!VECTOR_ELT_ORDER_BIG)
+ operands[2] = GEN_INT (12 - INTVAL (operands[2]));
+})
+
+(define_insn_and_split "*vextract4b_internal"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
+ (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
+ (match_operand:QI 2 "const_0_to_11_operand" "n,n")]
+ UNSPEC_XXEXTRACTUW))]
+ "TARGET_P9_VECTOR"
+ "@
+ xxextractuw %x0,%x1,%2
+ #"
+ "&& reload_completed && int_reg_operand (operands[0], DImode)"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
+ rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
+
+ emit_move_insn (op0, op2);
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
+ else
+ emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
+ DONE;
+}
+ [(set_attr "type" "vecperm")])
+
+(define_expand "vinsert4b"
+ [(set (match_operand:V16QI 0 "vsx_register_operand")
+ (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
+ (match_operand:V16QI 2 "vsx_register_operand")
+ (match_operand:QI 3 "const_0_to_11_operand")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+{
+ if (!VECTOR_ELT_ORDER_BIG)
+ {
+ rtx op1 = operands[1];
+ rtx v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vsx_xxpermdi_v4si (v4si_tmp, op1, op1, const1_rtx));
+ operands[1] = v4si_tmp;
+ operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+ }
+})
+
+(define_insn "*vinsert4b_internal"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "0")
+ (match_operand:QI 3 "const_0_to_11_operand" "n")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+ "xxinsertw %x0,%x1,%3"
+ [(set_attr "type" "vecperm")])
+
+(define_expand "vinsert4b_di"
+ [(set (match_operand:V16QI 0 "vsx_register_operand")
+ (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
+ (match_operand:V16QI 2 "vsx_register_operand")
+ (match_operand:QI 3 "const_0_to_11_operand")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+{
+ if (!VECTOR_ELT_ORDER_BIG)
+ operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+})
+
+(define_insn "*vinsert4b_di_internal"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
+ (match_operand:V16QI 2 "vsx_register_operand" "0")
+ (match_operand:QI 3 "const_0_to_11_operand" "n")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+ "xxinsertw %x0,%x1,%3"
+ [(set_attr "type" "vecperm")])
===================================================================
@@ -394,6 +394,8 @@
#define vec_vctzd __builtin_vec_vctzd
#define vec_vctzh __builtin_vec_vctzh
#define vec_vctzw __builtin_vec_vctzw
+#define vec_vextract4b __builtin_vec_vextract4b
+#define vec_vinsert4b __builtin_vec_vinsert4b
#define vec_vprtyb __builtin_vec_vprtyb
#define vec_vprtybd __builtin_vec_vprtybd
#define vec_vprtybw __builtin_vec_vprtybw
===================================================================
@@ -17988,6 +17988,15 @@ vector unsigned short vec_vctzh (vector
vector int vec_vctzw (vector int);
vector unsigned int vec_vctzw (vector int);
+long long vec_vextract4b (const vector signed char, const int);
+long long vec_vextract4b (const vector unsigned char, const int);
+
+vector signed char vec_insert4b (vector int, vector signed char, const int);
+vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
+ const int);
+vector signed char vec_insert4b (long long, vector signed char, const int);
+vector unsigned char vec_insert4b (long long, vector unsigned char, const int);
+
vector int vec_vprtyb (vector int);
vector unsigned int vec_vprtyb (vector unsigned int);
vector long long vec_vprtyb (vector long long);
===================================================================
@@ -0,0 +1,39 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector signed char
+vins_v4si (vector int *vi, vector signed char *vc)
+{
+ return vec_vinsert4b (*vi, *vc, 1);
+}
+
+vector unsigned char
+vins_di (long di, vector unsigned char *vc)
+{
+ return vec_vinsert4b (di, *vc, 2);
+}
+
+vector char
+vins_di2 (long *p_di, vector char *vc)
+{
+ return vec_vinsert4b (*p_di, *vc, 3);
+}
+
+vector unsigned char
+vins_di0 (vector unsigned char *vc)
+{
+ return vec_vinsert4b (0, *vc, 4);
+}
+
+long
+vext (vector signed char *vc)
+{
+ return vec_vextract4b (*vc, 5);
+}
+
+/* { dg-final { scan-assembler "xxextractuw\|vextuw\[lr\]x" } } */
+/* { dg-final { scan-assembler "xxinsertw" } } */
===================================================================
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector signed char
+ins_v4si (vector int vi, vector signed char vc)
+{
+ return vec_vinsert4b (vi, vc, 12); /* { dg-error "vec_vinsert4b" } */
+}
+
+vector unsigned char
+ins_di (long di, vector unsigned char vc, long n)
+{
+ return vec_vinsert4b (di, vc, n); /* { dg-error "vec_vinsert4b" } */
+}
+
+long
+vext1 (vector signed char vc)
+{
+ return vec_vextract4b (vc, 12); /* { dg-error "vec_vextract4b" } */
+}
+
+long
+vextn (vector unsigned char vc, long n)
+{
+ return vec_vextract4b (vc, n); /* { dg-error "vec_vextract4b" } */
+}