===================================================================
@@ -13991,6 +13991,38 @@ int vec_any_le (vector long long, vector
int vec_any_lt (vector long long, vector long long);
int vec_any_ne (vector long long, vector long long);
+vector long long vec_eqv (vector long long, vector long long);
+vector long long vec_eqv (vector bool long long, vector long long);
+vector long long vec_eqv (vector long long, vector bool long long);
+vector unsigned long long vec_eqv (vector unsigned long long,
+ vector unsigned long long);
+vector unsigned long long vec_eqv (vector bool long long,
+ vector unsigned long long);
+vector unsigned long long vec_eqv (vector unsigned long long,
+ vector bool long long);
+vector int vec_eqv (vector int, vector int);
+vector int vec_eqv (vector bool int, vector int);
+vector int vec_eqv (vector int, vector bool int);
+vector unsigned int vec_eqv (vector unsigned int, vector unsigned int);
+vector unsigned int vec_eqv (vector bool unsigned int,
+ vector unsigned int);
+vector unsigned int vec_eqv (vector unsigned int,
+ vector bool unsigned int);
+vector short vec_eqv (vector short, vector short);
+vector short vec_eqv (vector bool short, vector short);
+vector short vec_eqv (vector short, vector bool short);
+vector unsigned short vec_eqv (vector unsigned short, vector unsigned short);
+vector unsigned short vec_eqv (vector bool unsigned short,
+ vector unsigned short);
+vector unsigned short vec_eqv (vector unsigned short,
+ vector bool unsigned short);
+vector signed char vec_eqv (vector signed char, vector signed char);
+vector signed char vec_eqv (vector bool signed char, vector signed char);
+vector signed char vec_eqv (vector signed char, vector bool signed char);
+vector unsigned char vec_eqv (vector unsigned char, vector unsigned char);
+vector unsigned char vec_eqv (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_eqv (vector unsigned char, vector bool unsigned char);
+
vector long long vec_max (vector long long, vector long long);
vector unsigned long long vec_max (vector unsigned long long,
vector unsigned long long);
@@ -13999,6 +14031,70 @@ vector long long vec_min (vector long lo
vector unsigned long long vec_min (vector unsigned long long,
vector unsigned long long);
+vector long long vec_nand (vector long long, vector long long);
+vector long long vec_nand (vector bool long long, vector long long);
+vector long long vec_nand (vector long long, vector bool long long);
+vector unsigned long long vec_nand (vector unsigned long long,
+ vector unsigned long long);
+vector unsigned long long vec_nand (vector bool long long,
+ vector unsigned long long);
+vector unsigned long long vec_nand (vector unsigned long long,
+ vector bool long long);
+vector int vec_nand (vector int, vector int);
+vector int vec_nand (vector bool int, vector int);
+vector int vec_nand (vector int, vector bool int);
+vector unsigned int vec_nand (vector unsigned int, vector unsigned int);
+vector unsigned int vec_nand (vector bool unsigned int,
+ vector unsigned int);
+vector unsigned int vec_nand (vector unsigned int,
+ vector bool unsigned int);
+vector short vec_nand (vector short, vector short);
+vector short vec_nand (vector bool short, vector short);
+vector short vec_nand (vector short, vector bool short);
+vector unsigned short vec_nand (vector unsigned short, vector unsigned short);
+vector unsigned short vec_nand (vector bool unsigned short,
+ vector unsigned short);
+vector unsigned short vec_nand (vector unsigned short,
+ vector bool unsigned short);
+vector signed char vec_nand (vector signed char, vector signed char);
+vector signed char vec_nand (vector bool signed char, vector signed char);
+vector signed char vec_nand (vector signed char, vector bool signed char);
+vector unsigned char vec_nand (vector unsigned char, vector unsigned char);
+vector unsigned char vec_nand (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_nand (vector unsigned char, vector bool unsigned char);
+
+vector long long vec_orc (vector long long, vector long long);
+vector long long vec_orc (vector bool long long, vector long long);
+vector long long vec_orc (vector long long, vector bool long long);
+vector unsigned long long vec_orc (vector unsigned long long,
+ vector unsigned long long);
+vector unsigned long long vec_orc (vector bool long long,
+ vector unsigned long long);
+vector unsigned long long vec_orc (vector unsigned long long,
+ vector bool long long);
+vector int vec_orc (vector int, vector int);
+vector int vec_orc (vector bool int, vector int);
+vector int vec_orc (vector int, vector bool int);
+vector unsigned int vec_orc (vector unsigned int, vector unsigned int);
+vector unsigned int vec_orc (vector bool unsigned int,
+ vector unsigned int);
+vector unsigned int vec_orc (vector unsigned int,
+ vector bool unsigned int);
+vector short vec_orc (vector short, vector short);
+vector short vec_orc (vector bool short, vector short);
+vector short vec_orc (vector short, vector bool short);
+vector unsigned short vec_orc (vector unsigned short, vector unsigned short);
+vector unsigned short vec_orc (vector bool unsigned short,
+ vector unsigned short);
+vector unsigned short vec_orc (vector unsigned short,
+ vector bool unsigned short);
+vector signed char vec_orc (vector signed char, vector signed char);
+vector signed char vec_orc (vector bool signed char, vector signed char);
+vector signed char vec_orc (vector signed char, vector bool signed char);
+vector unsigned char vec_orc (vector unsigned char, vector unsigned char);
+vector unsigned char vec_orc (vector bool unsigned char, vector unsigned char);
+vector unsigned char vec_orc (vector unsigned char, vector bool unsigned char);
+
vector int vec_pack (vector long long, vector long long);
vector unsigned int vec_pack (vector unsigned long long,
vector unsigned long long);
@@ -14047,6 +14143,27 @@ vector unsigned long long vec_vaddudm (v
vector unsigned long long vec_vaddudm (vector unsigned long long,
vector bool unsigned long long);
+vector long long vec_vclz (vector long long);
+vector unsigned long long vec_vclz (vector unsigned long long);
+vector int vec_vclz (vector int);
+vector unsigned int vec_vclz (vector int);
+vector short vec_vclz (vector short);
+vector unsigned short vec_vclz (vector unsigned short);
+vector signed char vec_vclz (vector signed char);
+vector unsigned char vec_vclz (vector unsigned char);
+
+vector signed char vec_vclzb (vector signed char);
+vector unsigned char vec_vclzb (vector unsigned char);
+
+vector long long vec_vclzd (vector long long);
+vector unsigned long long vec_vclzd (vector unsigned long long);
+
+vector short vec_vclzh (vector short);
+vector unsigned short vec_vclzh (vector unsigned short);
+
+vector int vec_vclzw (vector int);
+vector unsigned int vec_vclzw (vector int);
+
vector long long vec_vmaxsd (vector long long, vector long long);
vector unsigned long long vec_vmaxud (vector unsigned long long,
@@ -14068,6 +14185,27 @@ vector unsigned int vec_vpkudum (vector
vector unsigned long long);
vector bool int vec_vpkudum (vector bool long long, vector bool long long);
+vector long long vec_vpopcnt (vector long long);
+vector unsigned long long vec_vpopcnt (vector unsigned long long);
+vector int vec_vpopcnt (vector int);
+vector unsigned int vec_vpopcnt (vector int);
+vector short vec_vpopcnt (vector short);
+vector unsigned short vec_vpopcnt (vector unsigned short);
+vector signed char vec_vpopcnt (vector signed char);
+vector unsigned char vec_vpopcnt (vector unsigned char);
+
+vector signed char vec_vpopcntb (vector signed char);
+vector unsigned char vec_vpopcntb (vector unsigned char);
+
+vector long long vec_vpopcntd (vector long long);
+vector unsigned long long vec_vpopcntd (vector unsigned long long);
+
+vector short vec_vpopcnth (vector short);
+vector unsigned short vec_vpopcnth (vector unsigned short);
+
+vector int vec_vpopcntw (vector int);
+vector unsigned int vec_vpopcntw (vector int);
+
vector long long vec_vrld (vector long long, vector unsigned long long);
vector unsigned long long vec_vrld (vector unsigned long long,
vector unsigned long long);
===================================================================
@@ -1,6 +1,6 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-options "-mcpu=power8 -O2 -ftree-vectorize -fvect-cost-model -fno-unroll-loops -fno-unroll-all-loops" } */
typedef vector unsigned long long crypto_t;
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "eqv" } } */
+/* { dg-final { scan-assembler "nand" } } */
+/* { dg-final { scan-assembler "nor" } } */
+
+#ifndef TYPE
+#define TYPE unsigned long
+#endif
+
+TYPE op1 (TYPE a, TYPE b) { return ~(a ^ b); } /* eqv */
+TYPE op2 (TYPE a, TYPE b) { return ~(a & b); } /* nand */
+TYPE op3 (TYPE a, TYPE b) { return ~(a | b); } /* nor */
+
===================================================================
@@ -1311,6 +1311,32 @@ proc check_effective_target_avx_runtime
return 0
}
+# Return 1 if the target supports executing power8 vector instructions, 0
+# otherwise. Cache the result.
+
+proc check_p8vector_hw_available { } {
+ return [check_cached_effective_target p8vector_hw_available {
+ # Some simulators are known to not support VSX/power8 instructions.
+ # For now, disable on Darwin
+ if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} {
+ expr 0
+ } else {
+ set options "-mpower8-vector"
+ check_runtime_nocache p8vector_hw_available {
+ int main()
+ {
+ #ifdef __MACH__
+ asm volatile ("xxlorc vs0,vs0,vs0");
+ #else
+ asm volatile ("xxlorc 0,0,0");
+ #endif
+ return 0;
+ }
+ } $options
+ }
+ }]
+}
+
# Return 1 if the target supports executing VSX instructions, 0
# otherwise. Cache the result.
@@ -2749,6 +2775,33 @@ proc check_effective_target_powerpc_alti
}
}
+# Return 1 if this is a PowerPC target supporting -mpower8-vector
+
+proc check_effective_target_powerpc_p8vector_ok { } {
+ if { ([istarget powerpc*-*-*]
+ && ![istarget powerpc-*-linux*paired*])
+ || [istarget rs6000-*-*] } {
+ # AltiVec is not supported on AIX before 5.3.
+ if { [istarget powerpc*-*-aix4*]
+ || [istarget powerpc*-*-aix5.1*]
+ || [istarget powerpc*-*-aix5.2*] } {
+ return 0
+ }
+ return [check_no_compiler_messages powerpc_p8vector_ok object {
+ int main (void) {
+#ifdef __MACH__
+ asm volatile ("xxlorc vs0,vs0,vs0");
+#else
+ asm volatile ("xxlorc 0,0,0");
+#endif
+ return 0;
+ }
+ } "-mpower8-vector"]
+ } else {
+ return 0
+ }
+}
+
# Return 1 if this is a PowerPC target supporting -mvsx
proc check_effective_target_powerpc_vsx_ok { } {
@@ -4576,6 +4629,7 @@ proc is-effective-target { arg } {
switch $arg {
"vmx_hw" { set selected [check_vmx_hw_available] }
"vsx_hw" { set selected [check_vsx_hw_available] }
+ "p8vector_hw" { set selected [check_p8vector_hw_available] }
"ppc_recip_hw" { set selected [check_ppc_recip_hw_available] }
"named_sections" { set selected [check_named_sections_available] }
"gc_sections" { set selected [check_gc_sections_available] }
@@ -4597,6 +4651,7 @@ proc is-effective-target-keyword { arg }
switch $arg {
"vmx_hw" { return 1 }
"vsx_hw" { return 1 }
+ "p8vector_hw" { return 1 }
"ppc_recip_hw" { return 1 }
"named_sections" { return 1 }
"gc_sections" { return 1 }
@@ -5181,7 +5236,9 @@ proc check_vect_support_and_set_flags {
}
lappend DEFAULT_VECTCFLAGS "-maltivec"
- if [check_vsx_hw_available] {
+ if [check_p8vector_hw_available] {
+ lappend DEFAULT_VECTCFLAGS "-mpower8-vector" "-mno-allow-movmisalign"
+ } elseif [check_vsx_hw_available] {
lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
}
===================================================================
@@ -730,9 +730,10 @@ (define_expand "ior<mode>3"
"")
(define_expand "and<mode>3"
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
+ [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+ (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+ (match_operand:VEC_L 2 "vlogical_operand" "")))
+ (clobber (match_scratch:CC 3 ""))])]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -746,8 +747,8 @@ (define_expand "one_cmpl<mode>2"
(define_expand "nor<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+ (and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -760,6 +761,47 @@ (define_expand "andc<mode>3"
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (not:VEC_L
+ (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "")
+ (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (match_operand:VEC_L 2 "register_operand" "")))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+
;; Same size conversions
(define_expand "float<VEC_int><mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
===================================================================
@@ -207,7 +207,7 @@ (define_predicate "int_reg_operand"
if (!REG_P (op))
return 0;
- if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
return 1;
return INT_REGNO_P (REGNO (op));
===================================================================
@@ -1234,10 +1234,24 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw")
BU_VSX_OVERLOAD_X (LD, "ld")
BU_VSX_OVERLOAD_X (ST, "st")
+/* 1 argument instruction added in ISA 2.07 that is classified as a VSX
+ instruction. */
+BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
+
/* 1 argument altivec instructions added in ISA 2.07. */
BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw)
BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2)
+BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2)
+BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2)
+BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2)
+BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
/* 2 argument altivec instructions added in ISA 2.07. */
BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
@@ -1245,6 +1259,8 @@ BU_P8V_AV_2 (VMINSD, "vminsd", CONST, s
BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
+BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
@@ -1255,6 +1271,29 @@ BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlsh
BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
+/* 2 argument VSX instructions added in ISA 2.07. For the logical
+ instructions, we define a builtin for each vector type. */
+BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
+
/* Vector comparison instructions added in ISA 2.07. */
BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
@@ -1268,13 +1307,29 @@ BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", C
/* ISA 2.07 vector overloaded 1 argument functions. */
BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw")
BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ, "vclz")
+BU_P8V_OVERLOAD_1 (VCLZB, "vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH, "vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW, "vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD, "vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
/* ISA 2.07 vector overloaded 2 argument functions. */
+BU_P8V_OVERLOAD_2 (EQV, "eqv")
+BU_P8V_OVERLOAD_2 (NAND, "nand")
+BU_P8V_OVERLOAD_2 (ORC, "orc")
BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
===================================================================
@@ -3515,6 +3515,404 @@ const struct altivec_builtin_types altiv
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+ /* Power8 vector overloaded functions. */
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, 0, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
/* Crypto builtins. */
{ CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
===================================================================
@@ -2859,6 +2859,16 @@ rs6000_option_override_internal (bool gl
}
}
+ /* Quad memory only works in 64-bit mode, if the user did -mcpu=power8 -m32,
+ silently turn off quad memory mode. */
+ if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
@@ -4082,6 +4092,22 @@ rs6000_builtin_vectorized_function (tree
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
switch (fn)
{
+ case BUILT_IN_CLZIMAX:
+ case BUILT_IN_CLZLL:
+ case BUILT_IN_CLZL:
+ case BUILT_IN_CLZ:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+ }
+ break;
case BUILT_IN_COPYSIGN:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4097,6 +4123,22 @@ rs6000_builtin_vectorized_function (tree
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
break;
+ case BUILT_IN_POPCOUNTIMAX:
+ case BUILT_IN_POPCOUNTLL:
+ case BUILT_IN_POPCOUNTL:
+ case BUILT_IN_POPCOUNT:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+ }
+ break;
case BUILT_IN_SQRT:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4955,8 +4997,11 @@ rs6000_expand_vector_init (rtx target, r
{
rtx freg = gen_reg_rtx (V4SFmode);
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+ rtx cvt = ((TARGET_XSCVDPSPN)
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+ emit_insn (cvt);
emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
}
else
@@ -12857,6 +12902,7 @@ builtin_function_type (enum machine_mode
{
/* unsigned 1 argument functions. */
case CRYPTO_BUILTIN_VSBOX:
+ case P8V_BUILTIN_VGBBD:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
break;
@@ -27214,26 +27260,31 @@ bool
altivec_expand_vec_perm_const (rtx operands[4])
{
struct altivec_perm_insn {
+ HOST_WIDE_INT mask;
enum insn_code impl;
unsigned char perm[16];
};
static const struct altivec_perm_insn patterns[] = {
- { CODE_FOR_altivec_vpkuhum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
- { CODE_FOR_altivec_vpkuwum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
- { CODE_FOR_altivec_vmrghb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
- { CODE_FOR_altivec_vmrghh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
- { CODE_FOR_altivec_vmrghw,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
- { CODE_FOR_altivec_vmrglb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
- { CODE_FOR_altivec_vmrglh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
- { CODE_FOR_altivec_vmrglw,
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
};
unsigned int i, j, elt, which;
@@ -27333,6 +27384,9 @@ altivec_expand_vec_perm_const (rtx opera
{
bool swapped;
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
+ continue;
+
elt = patterns[j].perm[0];
if (perm[0] == elt)
swapped = false;
===================================================================
@@ -36,10 +36,22 @@ (define_mode_iterator VSX_F [V4SF V2DF])
;; Iterator for logical types supported by VSX
(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+;; Like VSX_L, but don't support TImode for doing logical instructions in
+;; 32-bit
+(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF])
+
;; Iterator for memory move. Handle TImode specially to allow
;; it to use gprs as well as vsx registers.
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VSX_M2 [V16QI
+ V8HI
+ V4SI
+ V2DI
+ V4SF
+ V2DF
+ (TI "TARGET_VSX_TIMODE")])
+
;; Map into the appropriate load/store name based on the type
(define_mode_attr VSm [(V16QI "vw4")
(V8HI "vw4")
@@ -191,6 +203,8 @@ (define_c_enum "unspec"
UNSPEC_VSX_CVDPSXWS
UNSPEC_VSX_CVDPUXWS
UNSPEC_VSX_CVSPDP
+ UNSPEC_VSX_CVSPDPN
+ UNSPEC_VSX_CVDPSPN
UNSPEC_VSX_CVSXWDP
UNSPEC_VSX_CVUXWDP
UNSPEC_VSX_CVSXDSP
@@ -1003,6 +1017,40 @@ (define_insn "vsx_xscvspdp_scalar2"
"xscvspdp %x0,%x1"
[(set_attr "type" "fp")])
+;; Power8 versions using xscvdpspn/xscvspdpn
+(define_insn "vsx_xscvdpspn"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; Used by direct move of SFmode from gpr to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1088,70 +1136,370 @@ (define_insn "*vsx_float_fix_<mode>2"
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Logical operations
-;; Do not support TImode logical instructions on 32-bit at present, because the
-;; compiler will see that we have a TImode and when it wanted DImode, and
-;; convert the DImode to TImode, store it on the stack, and load it in a VSX
-;; register.
-(define_insn "*vsx_and<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (and:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+;; Logical operations. Do not support TImode logical instructions on 32-bit at
+;; present, because the compiler will see that we have a TImode and when it
+;; wanted DImode, and convert the DImode to TImode, store it on the stack, and
+;; load it in a VSX register or generate extra logical instructions in GPR
+;; registers.
+
+;; When we are splitting the operations to GPRs, we use three alternatives, two
+;; where the first/second inputs and output are in the same register, and the
+;; third where the output specifies an early clobber so that we don't have to
+;; worry about overlapping registers.
+
+(define_insn "*vsx_and<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))
+ (clobber (match_scratch:CC 3 "X"))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxland %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_ior<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_and<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))
+ (clobber (match_scratch:CC 3 "X,X,X,X"))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxland %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6)))
+ (clobber (match_dup 3))])
+ (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9)))
+ (clobber (match_dup 3))])]
+{
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_ior<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_xor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (xor:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_ior<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (ior:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(const_int 0)]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+
+ if (operands[5] == constm1_rtx)
+ emit_move_insn (operands[3], constm1_rtx);
+
+ else if (operands[5] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[3], operands[4], operands[5]));
+
+ if (operands[8] == constm1_rtx)
+ emit_move_insn (operands[8], constm1_rtx);
+
+ else if (operands[8] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[6], operands[7]))
+ emit_move_insn (operands[6], operands[7]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[6], operands[7], operands[8]));
+ DONE;
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_xor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64"
"xxlxor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_one_cmpl<mode>2"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_xor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (xor:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlxor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5)))
+ (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_one_cmpl<mode>2_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x1"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r")
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x1
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 2) (not:DI (match_dup 3)))
+ (set (match_dup 4) (not:DI (match_dup 5)))]
+{
+ operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two")
+ (set_attr "length" "4,8,8")])
-(define_insn "*vsx_nor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (ior:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn "*vsx_nor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa"))
+ (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_andc<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+(define_insn_and_split "*vsx_nor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_andc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_andc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
(and:VSX_L
(not:VSX_L
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
- "xxlandc %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlandc %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Power8 vector logical instructions. We only generate the VSX form of the
+;; instruction (xxl<xxx> vs. v<xxx>).
+(define_insn "*vsx_eqv<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxleqv %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_eqv<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (not:VSX_L
+ (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxleqv %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5))))
+ (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Rewrite nand into canonical form
+(define_insn "*vsx_nand<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlnand %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nand<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnand %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_insn "*vsx_orc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlorc %x0,%x2,%x1"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_orc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlorc %x0,%x2,%x1
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
;; Permute operations
===================================================================
@@ -1114,10 +1114,10 @@ extern unsigned rs6000_pointer_size;
#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
/* Alternate name for any vector register supporting logical operations, no
- matter which instruction set(s) are available. Under VSX, we allow GPRs as
- well as vector registers on 64-bit systems. We don't allow 32-bit systems,
- due to the number of registers involved, and the number of instructions to
- load/store the values.. */
+ matter which instruction set(s) are available. If we in 64-bit mode, we
+ also allow logical operations in the GPRS. This is to allow atomic quad
+ word builtins not to need the VSX registers for lqarx/stqcx. It also helps
+ with __int128_t arguments that are passed in GPRs. */
#define VLOGICAL_REGNO_P(N) \
(ALTIVEC_REGNO_P (N) \
|| (TARGET_VSX && FP_REGNO_P (N)) \
===================================================================
@@ -128,6 +128,7 @@ (define_c_enum "unspec"
UNSPEC_VUPKLS_V4SF
UNSPEC_VUPKHU_V4SF
UNSPEC_VUPKLU_V4SF
+ UNSPEC_VGBBD
])
(define_c_enum "unspecv"
@@ -941,6 +942,31 @@ (define_insn "*altivec_vmrglsf"
"vmrglw %0,%1,%2"
[(set_attr "type" "vecperm")])
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgew %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgow %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
(define_insn "vec_widen_umult_even_v16qi"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1017,10 +1043,13 @@ (define_insn "vec_widen_smult_odd_v8hi"
;; logical ops. Have the logical ops follow the memory ops in
;; terms of whether to prefer VSX or Altivec
+;; For and, add the clobber to be consistant with VSX, which adds splitters for
+;; using the GPR registers.
(define_insn "*altivec_and<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
(and:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v")))]
+ (match_operand:VM 2 "register_operand" "v")))
+ (clobber (match_scratch:CC 3 "=X"))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vand %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1050,8 +1079,8 @@ (define_insn "*altivec_one_cmpl<mode>2"
(define_insn "*altivec_nor<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
- (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v"))))]
+ (and:VM (not:VM (match_operand:VM 1 "register_operand" "v"))
+ (not:VM (match_operand:VM 2 "register_operand" "v"))))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vnor %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -2370,3 +2399,34 @@ (define_expand "vec_unpacku_float_lo_v8h
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
DONE;
}")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vclz<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vpopcnt<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+ UNSPEC_VGBBD))]
+ "TARGET_P8_VECTOR"
+ "vgbbd %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
===================================================================
@@ -8290,6 +8290,20 @@ (define_split
(compare:CC (match_dup 0)
(const_int 0)))]
"")
+
+;; Eqv operation.
+;; It probably is not worth it to add combiner insns to recognize eqv compared
+;; to 0 operations.
+(define_insn "*eqv<mode>3"
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+ (not:GPR
+ (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+ ""
+ "eqv %0,%1,%2"
+ [(set_attr "type" "integer")
+ (set_attr "length" "4")])
+
;; Now define ways of moving data around.
===================================================================
@@ -323,15 +323,31 @@
#ifdef _ARCH_PWR8
/* Vector additions added in ISA 2.07. */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
#define vec_vmaxsd __builtin_vec_vmaxsd
#define vec_vmaxud __builtin_vec_vmaxud
#define vec_vminsd __builtin_vec_vminsd
#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
#define vec_vpksdss __builtin_vec_vpksdss
#define vec_vpksdus __builtin_vec_vpksdus
#define vec_vpkudum __builtin_vec_vpkudum
#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
#define vec_vrld __builtin_vec_vrld
#define vec_vsld __builtin_vec_vsld
#define vec_vsrad __builtin_vec_vsrad