===================================================================
@@ -36,13 +36,13 @@ (define_mode_iterator VEC_A [V16QI V8HI
(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
;; Vector logical modes
-(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
;; Vector modes for moves. Don't do TImode here.
-(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
;; Vector modes for types that don't need a realignment under VSX
-(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
+(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI])
;; Vector comparison modes
(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -54,7 +54,8 @@ (define_mode_iterator VEC_E [V16QI V8HI
(define_mode_iterator VEC_64 [V2DI V2DF])
;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
+ SF SD SI DF DD DI TI])
;; Base type from vector mode
(define_mode_attr VEC_base [(V16QI "QI")
@@ -63,6 +64,7 @@ (define_mode_attr VEC_base [(V16QI "QI")
(V2DI "DI")
(V4SF "SF")
(V2DF "DF")
+ (V1TI "TI")
(TI "TI")])
;; Same size integer type for floating point data
@@ -107,7 +109,6 @@ (define_expand "mov<mode>"
}
if (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (<MODE>mode)
- && <MODE>mode != TImode
&& !gpr_or_gpr_p (operands[0], operands[1])
&& (memory_operand (operands[0], <MODE>mode)
^ memory_operand (operands[1], <MODE>mode)))
===================================================================
@@ -38,6 +38,7 @@ VECTOR_MODES (INT, 8); /* V
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODE (INT, DI, 1);
+VECTOR_MODE (INT, TI, 1);
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
===================================================================
@@ -51,6 +51,8 @@ extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
extern const char *rs6000_output_move_128bit (rtx *);
+extern bool rs6000_move_128bit_ok_p (rtx []);
+extern bool rs6000_split_128bit_ok_p (rtx []);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
===================================================================
@@ -325,6 +325,14 @@
| RS6000_BTC_BINARY), \
CODE_FOR_ ## ICODE) /* ICODE */
+#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_altivec_" NAME, /* NAME */ \
@@ -359,6 +367,14 @@
| RS6000_BTC_BINARY), \
CODE_FOR_nothing) /* ICODE */
+#define BU_P8V_OVERLOAD_3(ENUM, NAME) \
+ RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
+ "__builtin_vec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_nothing) /* ICODE */
+
/* Crypto convenience macros. */
#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
@@ -571,12 +587,14 @@ BU_ALTIVEC_3 (VMSUMSHM, "vmsumshm"
BU_ALTIVEC_3 (VMSUMUHS, "vmsumuhs", SAT, altivec_vmsumuhs)
BU_ALTIVEC_3 (VMSUMSHS, "vmsumshs", SAT, altivec_vmsumshs)
BU_ALTIVEC_3 (VNMSUBFP, "vnmsubfp", FP, nfmsv4sf4)
+BU_ALTIVEC_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti)
BU_ALTIVEC_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df)
BU_ALTIVEC_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di)
BU_ALTIVEC_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf)
BU_ALTIVEC_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si)
BU_ALTIVEC_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi)
BU_ALTIVEC_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi_uns)
+BU_ALTIVEC_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns)
BU_ALTIVEC_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns)
BU_ALTIVEC_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns)
BU_ALTIVEC_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns)
@@ -587,10 +605,12 @@ BU_ALTIVEC_3 (VSEL_8HI, "vsel_8hi"
BU_ALTIVEC_3 (VSEL_16QI, "vsel_16qi", CONST, vector_select_v16qi)
BU_ALTIVEC_3 (VSEL_2DF, "vsel_2df", CONST, vector_select_v2df)
BU_ALTIVEC_3 (VSEL_2DI, "vsel_2di", CONST, vector_select_v2di)
+BU_ALTIVEC_3 (VSEL_1TI, "vsel_1ti", CONST, vector_select_v1ti)
BU_ALTIVEC_3 (VSEL_4SI_UNS, "vsel_4si_uns", CONST, vector_select_v4si_uns)
BU_ALTIVEC_3 (VSEL_8HI_UNS, "vsel_8hi_uns", CONST, vector_select_v8hi_uns)
BU_ALTIVEC_3 (VSEL_16QI_UNS, "vsel_16qi_uns", CONST, vector_select_v16qi_uns)
BU_ALTIVEC_3 (VSEL_2DI_UNS, "vsel_2di_uns", CONST, vector_select_v2di_uns)
+BU_ALTIVEC_3 (VSEL_1TI_UNS, "vsel_1ti_uns", CONST, vector_select_v1ti_uns)
BU_ALTIVEC_3 (VSLDOI_16QI, "vsldoi_16qi", CONST, altivec_vsldoi_v16qi)
BU_ALTIVEC_3 (VSLDOI_8HI, "vsldoi_8hi", CONST, altivec_vsldoi_v8hi)
BU_ALTIVEC_3 (VSLDOI_4SI, "vsldoi_4si", CONST, altivec_vsldoi_v4si)
@@ -783,6 +803,8 @@ BU_ALTIVEC_X (ST_INTERNAL_2df, "st_inter
BU_ALTIVEC_X (LD_INTERNAL_2df, "ld_internal_2df", MEM)
BU_ALTIVEC_X (ST_INTERNAL_2di, "st_internal_2di", MEM)
BU_ALTIVEC_X (LD_INTERNAL_2di, "ld_internal_2di", MEM)
+BU_ALTIVEC_X (ST_INTERNAL_1ti, "st_internal_1ti", MEM)
+BU_ALTIVEC_X (LD_INTERNAL_1ti, "ld_internal_1ti", MEM)
BU_ALTIVEC_X (MTVSCR, "mtvscr", MISC)
BU_ALTIVEC_X (MFVSCR, "mfvscr", MISC)
BU_ALTIVEC_X (DSSALL, "dssall", MISC)
@@ -1085,34 +1107,40 @@ BU_VSX_3 (XVMSUBDP, "xvmsubdp"
BU_VSX_3 (XVNMADDDP, "xvnmadddp", CONST, nfmav2df4)
BU_VSX_3 (XVNMSUBDP, "xvnmsubdp", CONST, nfmsv2df4)
+BU_VSX_3 (XXSEL_1TI, "xxsel_1ti", CONST, vector_select_v1ti)
BU_VSX_3 (XXSEL_2DI, "xxsel_2di", CONST, vector_select_v2di)
BU_VSX_3 (XXSEL_2DF, "xxsel_2df", CONST, vector_select_v2df)
BU_VSX_3 (XXSEL_4SF, "xxsel_4sf", CONST, vector_select_v4sf)
BU_VSX_3 (XXSEL_4SI, "xxsel_4si", CONST, vector_select_v4si)
BU_VSX_3 (XXSEL_8HI, "xxsel_8hi", CONST, vector_select_v8hi)
BU_VSX_3 (XXSEL_16QI, "xxsel_16qi", CONST, vector_select_v16qi)
+BU_VSX_3 (XXSEL_1TI_UNS, "xxsel_1ti_uns", CONST, vector_select_v1ti_uns)
BU_VSX_3 (XXSEL_2DI_UNS, "xxsel_2di_uns", CONST, vector_select_v2di_uns)
BU_VSX_3 (XXSEL_4SI_UNS, "xxsel_4si_uns", CONST, vector_select_v4si_uns)
BU_VSX_3 (XXSEL_8HI_UNS, "xxsel_8hi_uns", CONST, vector_select_v8hi_uns)
BU_VSX_3 (XXSEL_16QI_UNS, "xxsel_16qi_uns", CONST, vector_select_v16qi_uns)
+BU_VSX_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti)
BU_VSX_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di)
BU_VSX_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df)
BU_VSX_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf)
BU_VSX_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si)
BU_VSX_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi)
BU_VSX_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi)
+BU_VSX_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns)
BU_VSX_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns)
BU_VSX_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns)
BU_VSX_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns)
BU_VSX_3 (VPERM_16QI_UNS, "vperm_16qi_uns", CONST, altivec_vperm_v16qi_uns)
+BU_VSX_3 (XXPERMDI_1TI, "xxpermdi_1ti", CONST, vsx_xxpermdi_v1ti)
BU_VSX_3 (XXPERMDI_2DF, "xxpermdi_2df", CONST, vsx_xxpermdi_v2df)
BU_VSX_3 (XXPERMDI_2DI, "xxpermdi_2di", CONST, vsx_xxpermdi_v2di)
BU_VSX_3 (XXPERMDI_4SF, "xxpermdi_4sf", CONST, vsx_xxpermdi_v4sf)
BU_VSX_3 (XXPERMDI_4SI, "xxpermdi_4si", CONST, vsx_xxpermdi_v4si)
BU_VSX_3 (XXPERMDI_8HI, "xxpermdi_8hi", CONST, vsx_xxpermdi_v8hi)
BU_VSX_3 (XXPERMDI_16QI, "xxpermdi_16qi", CONST, vsx_xxpermdi_v16qi)
+BU_VSX_3 (SET_1TI, "set_1ti", CONST, vsx_set_v1ti)
BU_VSX_3 (SET_2DF, "set_2df", CONST, vsx_set_v2df)
BU_VSX_3 (SET_2DI, "set_2di", CONST, vsx_set_v2di)
BU_VSX_3 (XXSLDWI_2DI, "xxsldwi_2di", CONST, vsx_xxsldwi_v2di)
@@ -1247,6 +1275,7 @@ BU_VSX_P (XVCMPGTDP_P, "xvcmpgtdp_
/* VSX builtins that are handled as special cases. */
BU_VSX_X (LXSDX, "lxsdx", MEM)
+BU_VSX_X (LXVD2X_V1TI, "lxvd2x_v1ti", MEM)
BU_VSX_X (LXVD2X_V2DF, "lxvd2x_v2df", MEM)
BU_VSX_X (LXVD2X_V2DI, "lxvd2x_v2di", MEM)
BU_VSX_X (LXVDSX, "lxvdsx", MEM)
@@ -1255,6 +1284,7 @@ BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4
BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM)
BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM)
BU_VSX_X (STXSDX, "stxsdx", MEM)
+BU_VSX_X (STXVD2X_V1TI, "stxsdx_v1ti", MEM)
BU_VSX_X (STXVD2X_V2DF, "stxsdx_v2df", MEM)
BU_VSX_X (STXVD2X_V2DI, "stxsdx_v2di", MEM)
BU_VSX_X (STXVW4X_V4SF, "stxsdx_v4sf", MEM)
@@ -1285,10 +1315,13 @@ BU_VSX_X (XSNMADDMDP, "xsnmaddmdp"
BU_VSX_X (XSNMSUBADP, "xsnmsubadp", FP)
BU_VSX_X (XSNMSUBMDP, "xsnmsubmdp", FP)
BU_VSX_X (XSSUBDP, "xssubdp", FP)
+BU_VSX_X (VEC_INIT_V1TI, "vec_init_v1ti", CONST)
BU_VSX_X (VEC_INIT_V2DF, "vec_init_v2df", CONST)
BU_VSX_X (VEC_INIT_V2DI, "vec_init_v2di", CONST)
+BU_VSX_X (VEC_SET_V1TI, "vec_set_v1ti", CONST)
BU_VSX_X (VEC_SET_V2DF, "vec_set_v2df", CONST)
BU_VSX_X (VEC_SET_V2DI, "vec_set_v2di", CONST)
+BU_VSX_X (VEC_EXT_V1TI, "vec_ext_v1ti", CONST)
BU_VSX_X (VEC_EXT_V2DF, "vec_ext_v2df", CONST)
BU_VSX_X (VEC_EXT_V2DI, "vec_ext_v2di", CONST)
@@ -1332,7 +1365,9 @@ BU_P8V_AV_1 (VPOPCNTD, "vpopcntd",
BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
/* 2 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_2 (VADDCUQ, "vaddcuq", CONST, altivec_vaddcuq)
BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
+BU_P8V_AV_2 (VADDUQM, "vadduqm", CONST, altivec_vadduqm)
BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
@@ -1347,12 +1382,15 @@ BU_P8V_AV_2 (VRLD, "vrld", CONST, vrot
BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
+BU_P8V_AV_2 (VSUBCUQ, "vsubcuq", CONST, altivec_vsubcuq)
BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
+BU_P8V_AV_2 (VSUBUQM, "vsubuqm", CONST, altivec_vsubuqm)
BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
+BU_P8V_AV_2 (EQV_V1TI, "eqv_v1ti", CONST, eqvv1ti3)
BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
@@ -1360,6 +1398,7 @@ BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", C
BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
+BU_P8V_AV_2 (NAND_V1TI, "nand_v1ti", CONST, nandv1ti3)
BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
@@ -1367,9 +1406,16 @@ BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CO
BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
+BU_P8V_AV_2 (ORC_V1TI, "orc_v1ti", CONST, orcv1ti3)
BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
+/* 3 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_3 (VADDEUQM, "vaddeuqm", CONST, altivec_vaddeuqm)
+BU_P8V_AV_3 (VADDECUQ, "vaddecuq", CONST, altivec_vaddecuq)
+BU_P8V_AV_3 (VSUBEUQM, "vsubeuqm", CONST, altivec_vsubeuqm)
+BU_P8V_AV_3 (VSUBECUQ, "vsubecuq", CONST, altivec_vsubecuq)
+
/* Vector comparison instructions added in ISA 2.07. */
BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
@@ -1399,7 +1445,9 @@ BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
BU_P8V_OVERLOAD_2 (EQV, "eqv")
BU_P8V_OVERLOAD_2 (NAND, "nand")
BU_P8V_OVERLOAD_2 (ORC, "orc")
+BU_P8V_OVERLOAD_2 (VADDCUQ, "vaddcuq")
BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
+BU_P8V_OVERLOAD_2 (VADDUQM, "vadduqm")
BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
@@ -1414,7 +1462,15 @@ BU_P8V_OVERLOAD_2 (VRLD, "vrld")
BU_P8V_OVERLOAD_2 (VSLD, "vsld")
BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
BU_P8V_OVERLOAD_2 (VSRD, "vsrd")
+BU_P8V_OVERLOAD_2 (VSUBCUQ, "vsubcuq")
BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm")
+BU_P8V_OVERLOAD_2 (VSUBUQM, "vsubuqm")
+
+/* ISA 2.07 vector overloaded 3 argument functions. */
+BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq")
+BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm")
+BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq")
+BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm")
/* 1 argument crypto functions. */
===================================================================
@@ -92,6 +92,8 @@ static GTY(()) tree pixel_keyword;
static GTY(()) tree __bool_keyword;
static GTY(()) tree bool_keyword;
static GTY(()) tree _Bool_keyword;
+static GTY(()) tree __int128_type;
+static GTY(()) tree __uint128_type;
/* Preserved across calls. */
static tree expand_bool_pixel;
@@ -124,9 +126,10 @@ altivec_categorize_keyword (const cpp_to
static void
init_vector_keywords (void)
{
- /* Keywords without two leading underscores are context-sensitive,
- and hence implemented as conditional macros, controlled by the
- rs6000_macro_to_expand() function below. */
+ /* Keywords without two leading underscores are context-sensitive, and hence
+ implemented as conditional macros, controlled by the
+ rs6000_macro_to_expand() function below. If we have ISA 2.07 64-bit
+ support, record the __int128_t and __uint128_t types. */
__vector_keyword = get_identifier ("__vector");
C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
@@ -148,6 +151,12 @@ init_vector_keywords (void)
_Bool_keyword = get_identifier ("_Bool");
C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL;
+
+ if (TARGET_VADDUQM)
+ {
+ __int128_type = get_identifier ("__int128_t");
+ __uint128_type = get_identifier ("__uint128_t");
+ }
}
/* Called to decide whether a conditional macro should be expanded.
@@ -223,7 +232,8 @@ rs6000_macro_to_expand (cpp_reader *pfil
|| rid_code == RID_SHORT || rid_code == RID_SIGNED
|| rid_code == RID_INT || rid_code == RID_CHAR
|| rid_code == RID_FLOAT
- || (rid_code == RID_DOUBLE && TARGET_VSX))
+ || (rid_code == RID_DOUBLE && TARGET_VSX)
+ || (rid_code == RID_INT128 && TARGET_VADDUQM))
{
expand_this = C_CPP_HASHNODE (__vector_keyword);
/* If the next keyword is bool or pixel, it
@@ -250,6 +260,13 @@ rs6000_macro_to_expand (cpp_reader *pfil
expand_bool_pixel = __bool_keyword;
}
}
+
+ /* Support vector __int128_t, but we don't need to worry about bool
+ or pixel on this type. */
+ else if (TARGET_VADDUQM
+ && (ident == C_CPP_HASHNODE (__int128_type)
+ || ident == C_CPP_HASHNODE (__uint128_type)))
+ expand_this = C_CPP_HASHNODE (__vector_keyword);
}
}
else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword))
@@ -706,6 +723,11 @@ const struct altivec_builtin_types altiv
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
@@ -2327,6 +2349,11 @@ const struct altivec_builtin_types altiv
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
@@ -3726,6 +3753,12 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
+
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
@@ -3739,6 +3772,12 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
+
{ P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
@@ -3781,6 +3820,30 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+ { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+ { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+ { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+ { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+ { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+ { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+ { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
{ P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
@@ -3900,6 +3963,12 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
+
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
@@ -3913,6 +3982,12 @@ const struct altivec_builtin_types altiv
{ P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+ RS6000_BTI_unsigned_V1TI, 0 },
+
{ P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
{ P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
@@ -4145,6 +4220,10 @@ altivec_resolve_overloaded_builtin (loca
unsigned_p = TYPE_UNSIGNED (type);
switch (TYPE_MODE (type))
{
+ case TImode:
+ type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+ size = 1;
+ break;
case DImode:
type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
size = 2;
@@ -4232,6 +4311,14 @@ altivec_resolve_overloaded_builtin (loca
if (call)
return build_call_expr (call, 2, arg1, arg2);
}
+ else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode)
+ && TREE_CODE (arg2) == INTEGER_CST
+ && TREE_INT_CST_HIGH (arg2) == 0
+ && TREE_INT_CST_LOW (arg2) == 0)
+ {
+ tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+ return build_call_expr (call, 2, arg1, arg2);
+ }
/* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
arg1_inner_type = TREE_TYPE (arg1_type);
@@ -4331,6 +4418,17 @@ altivec_resolve_overloaded_builtin (loca
if (call)
return build_call_expr (call, 3, arg1, arg0, arg2);
}
+ else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+ && TREE_CODE (arg2) == INTEGER_CST
+ && TREE_INT_CST_HIGH (arg2) == 0
+ && TREE_INT_CST_LOW (arg2) == 0)
+ {
+ tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+
+ /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+ reversed. */
+ return build_call_expr (call, 3, arg1, arg0, arg2);
+ }
/* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
arg1_inner_type = TREE_TYPE (arg1_type);
===================================================================
@@ -1725,7 +1725,8 @@ rs6000_hard_regno_mode_ok (int regno, en
&& (VECTOR_MEM_VSX_P (mode)
|| (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
|| (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
- || (TARGET_VSX_TIMODE && mode == TImode)))
+ || (TARGET_VSX_TIMODE && mode == TImode)
+ || (TARGET_VADDUQM && mode == V1TImode)))
{
if (FP_REGNO_P (regno))
return FP_REGNO_P (last_regno);
@@ -1776,7 +1777,8 @@ rs6000_hard_regno_mode_ok (int regno, en
/* AltiVec only in AldyVec registers. */
if (ALTIVEC_REGNO_P (regno))
- return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
+ return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+ || mode == V1TImode);
/* ...but GPRs can hold SIMD data on the SPE in one register. */
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
@@ -1971,10 +1973,12 @@ rs6000_debug_reg_global (void)
V8HImode,
V4SImode,
V2DImode,
+ V1TImode,
V32QImode,
V16HImode,
V8SImode,
V4DImode,
+ V2TImode,
V2SFmode,
V4SFmode,
V2DFmode,
@@ -2553,6 +2557,11 @@ rs6000_init_hard_regno_mode_ok (bool glo
rs6000_vector_unit[V2DImode]
= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
rs6000_vector_align[V2DImode] = align64;
+
+ rs6000_vector_mem[V1TImode] = VECTOR_VSX;
+ rs6000_vector_unit[V1TImode]
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
+ rs6000_vector_align[V1TImode] = 128;
}
/* DFmode, see if we want to use the VSX unit. */
@@ -2676,6 +2685,8 @@ rs6000_init_hard_regno_mode_ok (bool glo
reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
+ reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
+ reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
@@ -2704,6 +2715,7 @@ rs6000_init_hard_regno_mode_ok (bool glo
if (TARGET_POWERPC64)
{
reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
+ reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
@@ -2713,6 +2725,7 @@ rs6000_init_hard_regno_mode_ok (bool glo
reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
+ reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
@@ -2739,6 +2752,8 @@ rs6000_init_hard_regno_mode_ok (bool glo
reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
+ reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
+ reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
@@ -4250,6 +4265,8 @@ rs6000_preferred_simd_mode (enum machine
{
case SFmode:
return V4SFmode;
+ case TImode:
+ return V1TImode;
case DImode:
return V2DImode;
case SImode:
@@ -5011,7 +5028,7 @@ vspltis_constant (rtx op, unsigned step,
HOST_WIDE_INT splat_val;
HOST_WIDE_INT msb_val;
- if (mode == V2DImode || mode == V2DFmode)
+ if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
return false;
nunits = GET_MODE_NUNITS (mode);
@@ -5090,7 +5107,7 @@ easy_altivec_constant (rtx op, enum mach
if (mode == V2DFmode)
return zero_constant (op, mode);
- if (mode == V2DImode)
+ else if (mode == V2DImode)
{
/* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
easy. */
@@ -5108,6 +5125,10 @@ easy_altivec_constant (rtx op, enum mach
return false;
}
+ /* V1TImode is a special container for TImode. Ignore for now. */
+ else if (mode == V1TImode)
+ return false;
+
/* Start with a vspltisw. */
step = GET_MODE_NUNITS (mode) / 4;
copies = 1;
@@ -5189,7 +5210,7 @@ output_vec_const_move (rtx *operands)
if (zero_constant (vec, mode))
return "xxlxor %x0,%x0,%x0";
- if (mode == V2DImode
+ if ((mode == V2DImode || mode == V1TImode)
&& INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
&& INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
return "vspltisw %0,-1";
@@ -5575,6 +5596,13 @@ rs6000_expand_vector_set (rtx target, rt
return;
}
+ /* Simplify setting single element vectors like V1TImode. */
+ if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
+ {
+ emit_move_insn (target, gen_lowpart (mode, val));
+ return;
+ }
+
/* Load single variable value. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
@@ -5635,6 +5663,10 @@ rs6000_expand_vector_extract (rtx target
{
default:
break;
+ case V1TImode:
+ gcc_assert (elt == 0 && inner_mode == TImode);
+ emit_move_insn (target, gen_lowpart (TImode, vec));
+ break;
case V2DFmode:
emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
return;
@@ -6076,6 +6108,7 @@ reg_offset_addressing_ok_p (enum machine
case V4SImode:
case V2DFmode:
case V2DImode:
+ case V1TImode:
case TImode:
/* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
TImode is not a vector mode, if we want to use the VSX registers to
@@ -7960,6 +7993,9 @@ rs6000_const_vec (enum machine_mode mode
switch (mode)
{
+ case V1TImode:
+ subparts = 1;
+ break;
case V2DFmode:
case V2DImode:
subparts = 2;
@@ -8003,9 +8039,20 @@ rs6000_gen_le_vsx_permute (rtx source, e
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
{
- rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
- rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode);
- rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
+ rtx tmp, permute_mem, permute_reg;
+
+ /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ V1TImode). */
+ if (mode == TImode || mode == V1TImode)
+ {
+ mode = V2DImode;
+ dest = gen_lowpart (V2DImode, dest);
+ source = adjust_address (source, V2DImode, 0);
+ }
+
+ tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ permute_mem = rs6000_gen_le_vsx_permute (source, mode);
+ permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
}
@@ -8016,9 +8063,20 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
{
- rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
- rtx permute_src = rs6000_gen_le_vsx_permute (source, mode);
- rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
+ rtx tmp, permute_src, permute_tmp;
+
+ /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ V1TImode). */
+ if (mode == TImode || mode == V1TImode)
+ {
+ mode = V2DImode;
+ dest = adjust_address (dest, V2DImode, 0);
+ source = gen_lowpart (V2DImode, source);
+ }
+
+ tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ permute_src = rs6000_gen_le_vsx_permute (source, mode);
+ permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
}
@@ -8034,7 +8092,6 @@ rs6000_emit_le_vsx_move (rtx dest, rtx s
{
gcc_assert (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (mode)
- && mode != TImode
&& !gpr_or_gpr_p (dest, source)
&& (MEM_P (source) ^ MEM_P (dest)));
@@ -8318,6 +8375,7 @@ rs6000_emit_move (rtx dest, rtx source,
case V1DImode:
case V2DFmode:
case V2DImode:
+ case V1TImode:
if (CONSTANT_P (operands[1])
&& !easy_vector_constant (operands[1], mode))
operands[1] = force_const_mem (mode, operands[1]);
@@ -12403,6 +12461,8 @@ altivec_expand_ld_builtin (tree exp, rtx
break;
case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
icode = CODE_FOR_vector_altivec_load_v2di;
+ case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
+ icode = CODE_FOR_vector_altivec_load_v1ti;
break;
default:
*expandedp = false;
@@ -12462,6 +12522,8 @@ altivec_expand_st_builtin (tree exp, rtx
break;
case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
icode = CODE_FOR_vector_altivec_store_v2di;
+ case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
+ icode = CODE_FOR_vector_altivec_store_v1ti;
break;
default:
*expandedp = false;
@@ -12554,21 +12616,33 @@ altivec_expand_vec_init_builtin (tree ty
enum machine_mode tmode = TYPE_MODE (type);
enum machine_mode inner_mode = GET_MODE_INNER (tmode);
int i, n_elt = GET_MODE_NUNITS (tmode);
- rtvec v = rtvec_alloc (n_elt);
gcc_assert (VECTOR_MODE_P (tmode));
gcc_assert (n_elt == call_expr_nargs (exp));
- for (i = 0; i < n_elt; ++i)
+ if (!target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ /* If we have a vector compromised of a single element, such as V1TImode, do
+ the initialization directly. */
+ if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
{
- rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+ rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
+ emit_move_insn (target, gen_lowpart (tmode, x));
}
+ else
+ {
+ rtvec v = rtvec_alloc (n_elt);
- if (!target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
+ for (i = 0; i < n_elt; ++i)
+ {
+ rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+ }
+
+ rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
+ }
- rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
return target;
}
@@ -12733,6 +12807,8 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_STVRXL:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
+ case VSX_BUILTIN_STXVD2X_V1TI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
case VSX_BUILTIN_STXVD2X_V2DF:
return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
case VSX_BUILTIN_STXVD2X_V2DI:
@@ -12813,6 +12889,7 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
case VSX_BUILTIN_VEC_INIT_V2DF:
case VSX_BUILTIN_VEC_INIT_V2DI:
+ case VSX_BUILTIN_VEC_INIT_V1TI:
return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
case ALTIVEC_BUILTIN_VEC_SET_V4SI:
@@ -12821,6 +12898,7 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_VEC_SET_V4SF:
case VSX_BUILTIN_VEC_SET_V2DF:
case VSX_BUILTIN_VEC_SET_V2DI:
+ case VSX_BUILTIN_VEC_SET_V1TI:
return altivec_expand_vec_set_builtin (exp);
case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
@@ -12829,6 +12907,7 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
case VSX_BUILTIN_VEC_EXT_V2DF:
case VSX_BUILTIN_VEC_EXT_V2DI:
+ case VSX_BUILTIN_VEC_EXT_V1TI:
return altivec_expand_vec_ext_builtin (exp, target);
default:
@@ -12916,6 +12995,9 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_LVRXL:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
exp, target, true);
+ case VSX_BUILTIN_LXVD2X_V1TI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
+ exp, target, false);
case VSX_BUILTIN_LXVD2X_V2DF:
return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
exp, target, false);
@@ -13640,6 +13722,14 @@ rs6000_init_builtins (void)
opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
+ /* We use V1TI mode as a special container to hold __int128_t items that
+ must live in VSX registers. */
+ if (intTI_type_node)
+ {
+ V1TI_type_node = build_vector_type (intTI_type_node, 1);
+ unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
+ }
+
/* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
types, especially in C++ land. Similarly, 'vector pixel' is distinct from
'vector unsigned short'. */
@@ -13662,6 +13752,8 @@ rs6000_init_builtins (void)
uintSI_type_internal_node = unsigned_intSI_type_node;
intDI_type_internal_node = intDI_type_node;
uintDI_type_internal_node = unsigned_intDI_type_node;
+ intTI_type_internal_node = intTI_type_node;
+ uintTI_type_internal_node = unsigned_intTI_type_node;
float_type_internal_node = float_type_node;
double_type_internal_node = double_type_node;
void_type_internal_node = void_type_node;
@@ -13674,8 +13766,12 @@ rs6000_init_builtins (void)
builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
builtin_mode_to_type[DImode][0] = intDI_type_node;
builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+ builtin_mode_to_type[TImode][0] = intTI_type_node;
+ builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
builtin_mode_to_type[SFmode][0] = float_type_node;
builtin_mode_to_type[DFmode][0] = double_type_node;
+ builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
+ builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
@@ -13744,14 +13840,41 @@ rs6000_init_builtins (void)
tdecl = add_builtin_type ("__vector double", V2DF_type_node);
TYPE_NAME (V2DF_type_node) = tdecl;
- tdecl = add_builtin_type ("__vector long", V2DI_type_node);
- TYPE_NAME (V2DI_type_node) = tdecl;
+ if (TARGET_POWERPC64)
+ {
+ tdecl = add_builtin_type ("__vector long", V2DI_type_node);
+ TYPE_NAME (V2DI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector unsigned long",
+ unsigned_V2DI_type_node);
+ TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
- tdecl = add_builtin_type ("__vector unsigned long", unsigned_V2DI_type_node);
- TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+ tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
+ TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ }
+ else
+ {
+ tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
+ TYPE_NAME (V2DI_type_node) = tdecl;
- tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
- TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ tdecl = add_builtin_type ("__vector unsigned long long",
+ unsigned_V2DI_type_node);
+ TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector __bool long long",
+ bool_V2DI_type_node);
+ TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ }
+
+ if (V1TI_type_node)
+ {
+ tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
+ TYPE_NAME (V1TI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector unsigned __int128",
+ unsigned_V1TI_type_node);
+ TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
+ }
/* Paired and SPE builtins are only available if you build a compiler with
the appropriate options, so only create those builtins with the
@@ -14560,6 +14683,34 @@ altivec_init_builtins (void)
ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
integer_type_node, NULL_TREE);
def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
+
+
+ if (V1TI_type_node)
+ {
+ tree v1ti_ftype_long_pcvoid
+ = build_function_type_list (V1TI_type_node,
+ long_integer_type_node, pcvoid_type_node,
+ NULL_TREE);
+ tree void_ftype_v1ti_long_pvoid
+ = build_function_type_list (void_type_node,
+ V1TI_type_node, long_integer_type_node,
+ pvoid_type_node, NULL_TREE);
+ def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
+ VSX_BUILTIN_LXVD2X_V1TI);
+ def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
+ VSX_BUILTIN_STXVD2X_V1TI);
+ ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
+ NULL_TREE, NULL_TREE);
+ def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
+ ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
+ intTI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
+ ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
+ }
+
}
static void
@@ -16772,7 +16923,8 @@ rs6000_preferred_reload_class (rtx x, en
if (GET_MODE_SIZE (mode) <= 8)
return FLOAT_REGS;
- if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode))
+ if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
+ || mode == V1TImode)
return ALTIVEC_REGS;
return rclass;
@@ -17209,6 +17361,31 @@ rs6000_output_move_128bit (rtx operands[
gcc_unreachable ();
}
+/* Validate a 128-bit move. */
+bool
+rs6000_move_128bit_ok_p (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ return (gpc_reg_operand (operands[0], mode)
+ || gpc_reg_operand (operands[1], mode));
+}
+
+/* Return true if a 128-bit move needs to be split. */
+bool
+rs6000_split_128bit_ok_p (rtx operands[])
+{
+ if (!reload_completed)
+ return false;
+
+ if (!gpr_or_gpr_p (operands[0], operands[1]))
+ return false;
+
+ if (quad_load_store_p (operands[0], operands[1]))
+ return false;
+
+ return true;
+}
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
@@ -27796,6 +27973,9 @@ rs6000_handle_altivec_attribute (tree *n
unsigned_p = TYPE_UNSIGNED (type);
switch (mode)
{
+ case TImode:
+ result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+ break;
case DImode:
result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
break;
===================================================================
@@ -34,11 +34,11 @@ (define_mode_iterator VSX_DF [V2DF DF])
(define_mode_iterator VSX_F [V4SF V2DF])
;; Iterator for logical types supported by VSX
-(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
;; Iterator for memory move. Handle TImode specially to allow
;; it to use gprs as well as vsx registers.
-(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
(define_mode_iterator VSX_M2 [V16QI
V8HI
@@ -46,6 +46,7 @@ (define_mode_iterator VSX_M2 [V16QI
V2DI
V4SF
V2DF
+ V1TI
(TI "TARGET_VSX_TIMODE")])
;; Map into the appropriate load/store name based on the type
@@ -56,6 +57,7 @@ (define_mode_attr VSm [(V16QI "vw4")
(V2DF "vd2")
(V2DI "vd2")
(DF "d")
+ (V1TI "vd2")
(TI "vd2")])
;; Map into the appropriate suffix based on the type
@@ -67,6 +69,7 @@ (define_mode_attr VSs [(V16QI "sp")
(V2DI "dp")
(DF "dp")
(SF "sp")
+ (V1TI "dp")
(TI "dp")])
;; Map the register class used
@@ -78,6 +81,7 @@ (define_mode_attr VSr [(V16QI "v")
(V2DF "wd")
(DF "ws")
(SF "d")
+ (V1TI "v")
(TI "wt")])
;; Map the register class used for float<->int conversions
@@ -123,6 +127,7 @@ (define_mode_attr VSv [(V16QI "v")
(V4SF "v")
(V2DI "v")
(V2DF "v")
+ (V1TI "v")
(DF "s")])
;; Appropriate type for add ops (and other simple FP ops)
@@ -180,7 +185,8 @@ (define_mode_attr VS_spdp_type [(DF "fp"
(V2DF "vecdouble")])
;; Map the scalar mode for a vector type
-(define_mode_attr VS_scalar [(V2DF "DF")
+(define_mode_attr VS_scalar [(V1TI "TI")
+ (V2DF "DF")
(V2DI "DI")
(V4SF "SF")
(V4SI "SI")
@@ -191,7 +197,8 @@ (define_mode_attr VS_scalar [(V2DF "DF")
(define_mode_attr VS_double [(V4SI "V8SI")
(V4SF "V8SF")
(V2DI "V4DI")
- (V2DF "V4DF")])
+ (V2DF "V4DF")
+ (V1TI "V2TI")])
;; Constants for creating unspecs
(define_c_enum "unspec"
@@ -1489,6 +1496,21 @@ (define_insn "*vsx_stxvd2x16_le_V16QI"
"stxvd2x %x1,%y0"
[(set_attr "type" "vecstore")])
+;; Convert a TImode value into V1TImode
+(define_expand "vsx_set_v1ti"
+ [(match_operand:V1TI 0 "nonimmediate_operand" "")
+ (match_operand:V1TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "input_operand" "")
+ (match_operand:QI 3 "u5bit_cint_operand" "")]
+ "VECTOR_MEM_VSX_P (V1TImode)"
+{
+ if (operands[3] != const0_rtx)
+ gcc_unreachable ();
+
+ emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
+ DONE;
+})
+
;; Set the element of a V2DI/VD2F mode
(define_insn "vsx_set_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
===================================================================
@@ -529,6 +529,7 @@ extern int rs6000_vector_align[];
#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64)
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
@@ -1194,7 +1195,7 @@ enum data_align { align_abi, align_opt,
#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE) \
(ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \
- || (MODE) == V2DImode)
+ || (MODE) == V2DImode || (MODE) == V1TImode)
#define SPE_VECTOR_MODE(MODE) \
((MODE) == V4HImode \
@@ -2577,6 +2578,7 @@ enum rs6000_builtin_type_index
RS6000_BTI_opaque_p_V2SI,
RS6000_BTI_opaque_V4SI,
RS6000_BTI_V16QI,
+ RS6000_BTI_V1TI,
RS6000_BTI_V2SI,
RS6000_BTI_V2SF,
RS6000_BTI_V2DI,
@@ -2586,6 +2588,7 @@ enum rs6000_builtin_type_index
RS6000_BTI_V4SF,
RS6000_BTI_V8HI,
RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V1TI,
RS6000_BTI_unsigned_V8HI,
RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V2DI,
@@ -2611,6 +2614,8 @@ enum rs6000_builtin_type_index
RS6000_BTI_UINTSI, /* unsigned_intSI_type_node */
RS6000_BTI_INTDI, /* intDI_type_node */
RS6000_BTI_UINTDI, /* unsigned_intDI_type_node */
+ RS6000_BTI_INTTI, /* intTI_type_node */
+ RS6000_BTI_UINTTI, /* unsigned_intTI_type_node */
RS6000_BTI_float, /* float_type_node */
RS6000_BTI_double, /* double_type_node */
RS6000_BTI_void, /* void_type_node */
@@ -2623,6 +2628,7 @@ enum rs6000_builtin_type_index
#define opaque_p_V2SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI])
#define opaque_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V4SI])
#define V16QI_type_node (rs6000_builtin_types[RS6000_BTI_V16QI])
+#define V1TI_type_node (rs6000_builtin_types[RS6000_BTI_V1TI])
#define V2DI_type_node (rs6000_builtin_types[RS6000_BTI_V2DI])
#define V2DF_type_node (rs6000_builtin_types[RS6000_BTI_V2DF])
#define V2SI_type_node (rs6000_builtin_types[RS6000_BTI_V2SI])
@@ -2632,6 +2638,7 @@ enum rs6000_builtin_type_index
#define V4SF_type_node (rs6000_builtin_types[RS6000_BTI_V4SF])
#define V8HI_type_node (rs6000_builtin_types[RS6000_BTI_V8HI])
#define unsigned_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI])
+#define unsigned_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V1TI])
#define unsigned_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI])
#define unsigned_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI])
#define unsigned_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI])
@@ -2658,6 +2665,8 @@ enum rs6000_builtin_type_index
#define uintSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTSI])
#define intDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTDI])
#define uintDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTDI])
+#define intTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTTI])
+#define uintTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTTI])
#define float_type_internal_node (rs6000_builtin_types[RS6000_BTI_float])
#define double_type_internal_node (rs6000_builtin_types[RS6000_BTI_double])
#define void_type_internal_node (rs6000_builtin_types[RS6000_BTI_void])
===================================================================
@@ -136,6 +136,12 @@ (define_c_enum "unspec"
UNSPEC_VMRGL_DIRECT
UNSPEC_VSPLT_DIRECT
UNSPEC_VSUMSWS_DIRECT
+ UNSPEC_VADDCUQ
+ UNSPEC_VADDEUQM
+ UNSPEC_VADDECUQ
+ UNSPEC_VSUBCUQ
+ UNSPEC_VSUBEUQM
+ UNSPEC_VSUBECUQ
])
(define_c_enum "unspecv"
@@ -158,17 +164,18 @@ (define_mode_iterator VF [V4SF])
(define_mode_iterator V [V4SI V8HI V16QI V4SF])
;; Vec modes for move/logical/permute ops, include vector types for move not
;; otherwise handled by altivec (v2df, v2di, ti)
-(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI])
+(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI V1TI TI])
;; Like VM, except don't do TImode
-(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
+(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI V1TI])
(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
(V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
(V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
- (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
+ (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")
+ (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")])
;; Vector pack/unpack
(define_mode_iterator VP [V2DI V4SI V8HI])
@@ -3226,3 +3233,92 @@ (define_insn "p8v_vgbbd"
"vgbbd %0,%1"
[(set_attr "length" "4")
(set_attr "type" "vecsimple")])
+
+
+;; 128-bit binary integer arithmetic
+;; We have a special container type (V1TImode) to allow operations using the
+;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without
+;; having to worry about the register allocator deciding GPRs are better.
+
+(define_insn "altivec_vadduqm"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (plus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")))]
+ "TARGET_VADDUQM"
+ "vadduqm %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddcuq"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")]
+ UNSPEC_VADDCUQ))]
+ "TARGET_VADDUQM"
+ "vaddcuq %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubuqm"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (minus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")))]
+ "TARGET_VADDUQM"
+ "vsubuqm %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubcuq"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")]
+ UNSPEC_VSUBCUQ))]
+ "TARGET_VADDUQM"
+ "vsubcuq %0,%1,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddeuqm"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")
+ (match_operand:V1TI 3 "register_operand" "v")]
+ UNSPEC_VADDEUQM))]
+ "TARGET_VADDUQM"
+ "vaddeuqm %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddecuq"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")
+ (match_operand:V1TI 3 "register_operand" "v")]
+ UNSPEC_VADDECUQ))]
+ "TARGET_VADDUQM"
+ "vaddecuq %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubeuqm"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")
+ (match_operand:V1TI 3 "register_operand" "v")]
+ UNSPEC_VSUBEUQM))]
+ "TARGET_VADDUQM"
+ "vsubeuqm %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubecuq"
+ [(set (match_operand:V1TI 0 "register_operand" "=v")
+ (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+ (match_operand:V1TI 2 "register_operand" "v")
+ (match_operand:V1TI 3 "register_operand" "v")]
+ UNSPEC_VSUBECUQ))]
+ "TARGET_VADDUQM"
+ "vsubecuq %0,%1,%2,%3"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
===================================================================
@@ -288,7 +288,8 @@ (define_mode_iterator FMOVE128_GPR [(TI
(V4SI "")
(V4SF "")
(V2DI "")
- (V2DF "")])
+ (V2DF "")
+ (V1TI "")])
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
@@ -412,7 +413,8 @@ (define_mode_iterator BOOL_128 [TI
(V4SI "TARGET_ALTIVEC")
(V4SF "TARGET_ALTIVEC")
(V2DI "TARGET_ALTIVEC")
- (V2DF "TARGET_ALTIVEC")])
+ (V2DF "TARGET_ALTIVEC")
+ (V1TI "TARGET_ALTIVEC")])
;; For the GPRs we use 3 constraints for register outputs, two that are the
;; same as the output register, and a third where the output register is an
@@ -428,7 +430,8 @@ (define_mode_attr BOOL_REGS_OUTPUT [(TI
(V4SI "wa,v,&?r,?r,?r")
(V4SF "wa,v,&?r,?r,?r")
(V2DI "wa,v,&?r,?r,?r")
- (V2DF "wa,v,&?r,?r,?r")])
+ (V2DF "wa,v,&?r,?r,?r")
+ (V1TI "wa,v,&?r,?r,?r")])
;; Mode attribute for boolean operation register constraints for operand1
(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v")
@@ -438,7 +441,8 @@ (define_mode_attr BOOL_REGS_OP1 [(TI "r
(V4SI "wa,v,r,0,r")
(V4SF "wa,v,r,0,r")
(V2DI "wa,v,r,0,r")
- (V2DF "wa,v,r,0,r")])
+ (V2DF "wa,v,r,0,r")
+ (V1TI "wa,v,r,0,r")])
;; Mode attribute for boolean operation register constraints for operand2
(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v")
@@ -448,7 +452,8 @@ (define_mode_attr BOOL_REGS_OP2 [(TI "r
(V4SI "wa,v,r,r,0")
(V4SF "wa,v,r,r,0")
(V2DI "wa,v,r,r,0")
- (V2DF "wa,v,r,r,0")])
+ (V2DF "wa,v,r,r,0")
+ (V1TI "wa,v,r,r,0")])
;; Mode attribute for boolean operation register constraints for operand1
;; for one_cmpl. To simplify things, we repeat the constraint where 0
@@ -460,7 +465,8 @@ (define_mode_attr BOOL_REGS_UNARY [(TI "
(V4SI "wa,v,r,0,0")
(V4SF "wa,v,r,0,0")
(V2DI "wa,v,r,0,0")
- (V2DF "wa,v,r,0,0")])
+ (V2DF "wa,v,r,0,0")
+ (V1TI "wa,v,r,0,0")])
;; Mode attribute for the clobber of CC0 for AND expansion.
;; For the 128-bit types, we never do AND immediate, but we need to
@@ -472,7 +478,8 @@ (define_mode_attr BOOL_REGS_AND_CR0 [(TI
(V4SI "X,X,X,X,X")
(V4SF "X,X,X,X,X")
(V2DI "X,X,X,X,X")
- (V2DF "X,X,X,X,X")])
+ (V2DF "X,X,X,X,X")
+ (V1TI "X,X,X,X,X")])
;; Start with fixed-point load and store insns. Here we put only the more
===================================================================
@@ -326,12 +326,18 @@
#define vec_eqv __builtin_vec_eqv
#define vec_nand __builtin_vec_nand
#define vec_orc __builtin_vec_orc
+#define vec_vaddcuq __builtin_vec_vaddcuq
#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vadduqm __builtin_vec_vadduqm
#define vec_vclz __builtin_vec_vclz
#define vec_vclzb __builtin_vec_vclzb
#define vec_vclzd __builtin_vec_vclzd
#define vec_vclzh __builtin_vec_vclzh
#define vec_vclzw __builtin_vec_vclzw
+#define vec_vaddecuq __builtin_vec_vaddecuq
+#define vec_vaddeuqm __builtin_vec_vaddeuqm
+#define vec_vsubecuq __builtin_vec_vsubecuq
+#define vec_vsubeuqm __builtin_vec_vsubeuqm
#define vec_vgbbd __builtin_vec_vgbbd
#define vec_vmaxsd __builtin_vec_vmaxsd
#define vec_vmaxud __builtin_vec_vmaxud
@@ -352,7 +358,9 @@
#define vec_vsld __builtin_vec_vsld
#define vec_vsrad __builtin_vec_vsrad
#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubcuq __builtin_vec_vsubcuq
#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vsubuqm __builtin_vec_vsubuqm
#define vec_vupkhsw __builtin_vec_vupkhsw
#define vec_vupklsw __builtin_vec_vupklsw
#endif
===================================================================
@@ -15127,6 +15127,51 @@ vector long long vec_vupklsw (vector int
vector unsigned long long vec_vupklsw (vector int);
@end smallexample
+If the ISA 2.07 additions to the vector/scalar (power8-vector)
+instruction set is available, the following additional functions are
+available for 64-bit targets. New vector types
+(@var{vector __int128_t} and @var{vector __uint128_t}) are available
+to hold the @var{__int128_t} and @var{__uint128_t} types to use these
+builtins.
+
+The normal vector extract, and set operations work on
+@var{vector __int128_t} and @var{vector __uint128_t} types,
+but the index value must be 0.
+
+@smallexample
+vector __int128_t vec_vaddcuq (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vaddcuq (vector __uint128_t, vector __uint128_t);
+
+vector __int128_t vec_vadduqm (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vadduqm (vector __uint128_t, vector __uint128_t);
+
+vector __int128_t vec_vaddecuq (vector __int128_t, vector __int128_t,
+ vector __int128_t);
+vector __uint128_t vec_vaddecuq (vector __uint128_t, vector __uint128_t,
+ vector __uint128_t);
+
+vector __int128_t vec_vaddeuqm (vector __int128_t, vector __int128_t,
+ vector __int128_t);
+vector __uint128_t vec_vaddeuqm (vector __uint128_t, vector __uint128_t,
+ vector __uint128_t);
+
+vector __int128_t vec_vsubecuq (vector __int128_t, vector __int128_t,
+ vector __int128_t);
+vector __uint128_t vec_vsubecuq (vector __uint128_t, vector __uint128_t,
+ vector __uint128_t);
+
+vector __int128_t vec_vsubeuqm (vector __int128_t, vector __int128_t,
+ vector __int128_t);
+vector __uint128_t vec_vsubeuqm (vector __uint128_t, vector __uint128_t,
+ vector __uint128_t);
+
+vector __int128_t vec_vsubcuq (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vsubcuq (vector __uint128_t, vector __uint128_t);
+
+__int128_t vec_vsubuqm (__int128_t, __int128_t);
+__uint128_t vec_vsubuqm (__uint128_t, __uint128_t);
+@end smallexample
+
If the cryptographic instructions are enabled (@option{-mcrypto} or
@option{-mcpu=power8}), the following builtins are enabled.
===================================================================
@@ -0,0 +1,85 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -mvsx-timode" } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE vector __int128_t
+#endif
+
+TYPE
+do_addcuq (TYPE p, TYPE q)
+{
+ return __builtin_vec_vaddcuq (p, q);
+}
+
+TYPE
+do_adduqm (TYPE p, TYPE q)
+{
+ return __builtin_vec_add (p, q);
+}
+
+TYPE
+do_addeuqm (TYPE p, TYPE q, TYPE r)
+{
+ return __builtin_vec_vaddeuqm (p, q, r);
+}
+
+TYPE
+do_addecuq (TYPE p, TYPE q, TYPE r)
+{
+ return __builtin_vec_vaddecuq (p, q, r);
+}
+
+TYPE
+do_subeuqm (TYPE p, TYPE q, TYPE r)
+{
+ return __builtin_vec_vsubeuqm (p, q, r);
+}
+
+TYPE
+do_subecuq (TYPE p, TYPE q, TYPE r)
+{
+ return __builtin_vec_vsubecuq (p, q, r);
+}
+
+TYPE
+do_subcuq (TYPE p, TYPE q)
+{
+ return __builtin_vec_vsubcuq (p, q);
+}
+
+TYPE
+do_subuqm (TYPE p, TYPE q)
+{
+ return __builtin_vec_vsubuqm (p, q);
+}
+
+TYPE
+do_zero (void)
+{
+ return (TYPE) { 0 };
+}
+
+TYPE
+do_minus_one (void)
+{
+ return (TYPE) { -1 };
+}
+
+/* { dg-final { scan-assembler "vaddcuq" } } */
+/* { dg-final { scan-assembler "vadduqm" } } */
+/* { dg-final { scan-assembler "vaddecuq" } } */
+/* { dg-final { scan-assembler "vaddeuqm" } } */
+/* { dg-final { scan-assembler "vsubecuq" } } */
+/* { dg-final { scan-assembler "vsubeuqm" } } */
+/* { dg-final { scan-assembler "vsubcuq" } } */
+/* { dg-final { scan-assembler "vsubuqm" } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "ori 2,2,0" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
===================================================================
@@ -0,0 +1,177 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#define UNUSED
+
+#ifdef __LITTLE_ENDIAN__
+#define HI_WORD 1
+#define LO_WORD 0
+#else
+#define HI_WORD 0
+#define LO_WORD 1
+#endif
+
+#else
+#define UNUSED __attribute__((__unused__))
+#endif
+
+#ifndef S_TYPE
+#define S_TYPE __uint128_t
+#endif
+
+#ifndef V_TYPE
+#define V_TYPE vector S_TYPE
+#endif
+
+static int compare (S_TYPE, V_TYPE, const char *, const char *)
+ __attribute__((__noinline__));
+
+static int
+compare (S_TYPE scalar,
+ V_TYPE vect,
+ const char *nl UNUSED,
+ const char *which UNUSED)
+{
+ unsigned long scalar_lo = (unsigned long) scalar;
+ unsigned long scalar_hi = (unsigned long) (scalar >> 64);
+ unsigned long vect_lo;
+ unsigned long vect_hi;
+ vector long long tmp;
+ int ret;
+
+ __asm__ ("mfvsrd %0,%x3\n\t"
+ "xxpermdi %x2,%x3,%x3,3\n\t"
+ "mfvsrd %1,%x2"
+ : "=r" (vect_hi),
+ "=r" (vect_lo),
+ "=wa" (tmp)
+ : "wa" (vect));
+
+ ret = (scalar_lo != vect_lo) || (scalar_hi != vect_hi);
+
+#ifdef DEBUG
+ printf ("%s%s: 0x%.16lx %.16lx %s 0x%.16lx %.16lx\n",
+ nl, which,
+ scalar_hi, scalar_lo,
+ (ret) ? "!=" : "==",
+ vect_hi, vect_lo);
+
+ fflush (stdout);
+#endif
+
+ return ret;
+}
+
+static void convert_via_mem (V_TYPE *, S_TYPE *)
+ __attribute__((__noinline__));
+
+static void
+convert_via_mem (V_TYPE *v, S_TYPE *s)
+{
+ *v = (V_TYPE) { *s };
+ __asm__ volatile ("nop"
+ : "+m" (*s), "+m" (*v)
+ :
+ : "memory");
+
+}
+
+
+/* Check if vadduqm returns the same values as normal 128-bit add. */
+
+/* Values to add together. */
+const static struct {
+ unsigned long hi_1;
+ unsigned long lo_1;
+ unsigned long hi_2;
+ unsigned long lo_2;
+} values[] = {
+ { 0x0000000000000000UL, 0xfffffffffffffffeUL,
+ 0x0000000000000000UL, 0x0000000000000002UL },
+ { 0x0000000000000000UL, 0x0000000000000002UL,
+ 0x0000000000000000UL, 0xfffffffffffffffeUL },
+ { 0xffffffffffffffffUL, 0xfffffffffffffffeUL,
+ 0x0000000000000000UL, 0x0000000000000002UL },
+ { 0xfffffffffffffff2UL, 0xffffffffffffffffUL,
+ 0x0000000000000002UL, 0x0000000000000000UL },
+ { 0x7fffffffffffffffUL, 0xfffffffffffffffeUL,
+ 0x0000000000000000UL, 0x0000000000000002UL },
+ { 0x7ffffffffffffff2UL, 0xffffffffffffffffUL,
+ 0x0000000000000002UL, 0x0000000000000000UL },
+};
+
+int
+main (void)
+{
+ int reg_errors = 0;
+ int mem_errors = 0;
+ size_t i;
+ const char *nl = "";
+
+ for (i = 0; i < sizeof (values) / sizeof (values[0]); i++)
+ {
+ S_TYPE s_reg_res, s_reg_in1, s_reg_in2, s_mem_res, s_mem_in1, s_mem_in2;
+ V_TYPE v_reg_res, v_reg_in1, v_reg_in2, v_mem_res, v_mem_in1, v_mem_in2;
+
+ s_reg_in1 = ((((S_TYPE)values[i].hi_1 << 64)) + ((S_TYPE)values[i].lo_1));
+ reg_errors += compare (s_reg_in1, (V_TYPE) { s_reg_in1 }, nl, "reg, in1");
+
+ s_reg_in2 = ((((S_TYPE)values[i].hi_2 << 64)) + ((S_TYPE)values[i].lo_2));
+ reg_errors += compare (s_reg_in2, (V_TYPE) { s_reg_in2 }, "", "reg, in2");
+
+ s_reg_res = s_reg_in1 + s_reg_in2;
+
+ v_reg_in1 = (V_TYPE) { s_reg_in1 };
+ v_reg_in2 = (V_TYPE) { s_reg_in2 };
+ v_reg_res = vec_vadduqm (v_reg_in1, v_reg_in2);
+ reg_errors += compare (s_reg_res, v_reg_res, "", "reg, res");
+
+ s_mem_in1 = s_reg_in1;
+ convert_via_mem (&v_mem_in1, &s_mem_in1);
+ mem_errors += compare (s_mem_in1, (V_TYPE) { s_mem_in1 }, "\n", "mem, in1");
+
+ s_mem_in2 = s_reg_in2;
+ convert_via_mem (&v_mem_in2, &s_mem_in2);
+ mem_errors += compare (s_mem_in2, (V_TYPE) { s_mem_in2 }, "", "mem, in2");
+
+ s_mem_res = s_mem_in1 + s_mem_in2;
+ v_mem_res = vec_vadduqm (v_mem_in1, v_mem_in2);
+ mem_errors += compare (s_mem_res, v_mem_res, "", "mem, res");
+
+ nl = "\n";
+ }
+
+#ifdef DEBUG
+ putchar ('\n');
+
+ if (!reg_errors)
+ fputs ("no errors found on register operations\n", stdout);
+ else
+ printf ("%d error%s found on register operations\n",
+ reg_errors,
+ (reg_errors == 1) ? "s" : "");
+
+ if (!mem_errors)
+ fputs ("no errors found on memory operations\n", stdout);
+ else
+ printf ("%d error%s found on memory operations\n",
+ mem_errors,
+ (mem_errors == 1) ? "s" : "");
+
+ fflush (stdout);
+#endif
+
+ if ((reg_errors + mem_errors) != 0)
+ abort ();
+
+ return 0;
+}
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do assemble { target { lp64 } } } */
-/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps" } */
+/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps -mcpu=power5" } */
typedef int TImode __attribute__ ((mode (TI)));
@@ -46,6 +46,12 @@ TImode r19 (void *x) { return *(TImode *
TImode r20 (void *x) { return *(TImode *) (x + 32748); }
/* test should really be == 616, see pr54110 */
+/* When TImode is allowed in VSX registers, the allowable address modes for
+ TImode is just a single indirect address in order for the value to be loaded
+ and store in either GPR or VSX registers. This affects the generated code,
+ and it would cause this test to fail, when such an option is used. Fall
+ back to power5 to test the code. */
+
/* { dg-final { object-size text <= 700 } } */
/* { dg-final { scan-assembler-not "(st|l)fd" } } */
/* { dg-final { cleanup-saved-temps "timode_off" } } */