===================================================================
@@ -12359,6 +12359,12 @@ vector bool long vec_cmplt (vector doubl
vector float vec_div (vector float, vector float);
vector double vec_div (vector double, vector double);
vector double vec_floor (vector double);
+vector double vec_ld (int, const vector double *);
+vector double vec_ld (int, const double *);
+vector double vec_ldl (int, const vector double *);
+vector double vec_ldl (int, const double *);
+vector unsigned char vec_lvsl (int, const volatile double *);
+vector unsigned char vec_lvsr (int, const volatile double *);
vector double vec_madd (vector double, vector double, vector double);
vector double vec_max (vector double, vector double);
vector double vec_min (vector double, vector double);
@@ -12387,6 +12393,8 @@ vector double vec_sel (vector double, ve
vector double vec_sub (vector double, vector double);
vector float vec_sqrt (vector float);
vector double vec_sqrt (vector double);
+void vec_st (vector double, int, vector double *);
+void vec_st (vector double, int, double *);
vector double vec_trunc (vector double);
vector double vec_xor (vector double, vector double);
vector double vec_xor (vector double, vector bool long);
@@ -12415,7 +12423,65 @@ int vec_any_ngt (vector double, vector d
int vec_any_nle (vector double, vector double);
int vec_any_nlt (vector double, vector double);
int vec_any_numeric (vector double);
-@end smallexample
+
+vector double vec_vsx_ld (int, const vector double *);
+vector double vec_vsx_ld (int, const double *);
+vector float vec_vsx_ld (int, const vector float *);
+vector float vec_vsx_ld (int, const float *);
+vector bool int vec_vsx_ld (int, const vector bool int *);
+vector signed int vec_vsx_ld (int, const vector signed int *);
+vector signed int vec_vsx_ld (int, const int *);
+vector signed int vec_vsx_ld (int, const long *);
+vector unsigned int vec_vsx_ld (int, const vector unsigned int *);
+vector unsigned int vec_vsx_ld (int, const unsigned int *);
+vector unsigned int vec_vsx_ld (int, const unsigned long *);
+vector bool short vec_vsx_ld (int, const vector bool short *);
+vector pixel vec_vsx_ld (int, const vector pixel *);
+vector signed short vec_vsx_ld (int, const vector signed short *);
+vector signed short vec_vsx_ld (int, const short *);
+vector unsigned short vec_vsx_ld (int, const vector unsigned short *);
+vector unsigned short vec_vsx_ld (int, const unsigned short *);
+vector bool char vec_vsx_ld (int, const vector bool char *);
+vector signed char vec_vsx_ld (int, const vector signed char *);
+vector signed char vec_vsx_ld (int, const signed char *);
+vector unsigned char vec_vsx_ld (int, const vector unsigned char *);
+vector unsigned char vec_vsx_ld (int, const unsigned char *);
+
+void vec_vsx_st (vector double, int, vector double *);
+void vec_vsx_st (vector double, int, double *);
+void vec_vsx_st (vector float, int, vector float *);
+void vec_vsx_st (vector float, int, float *);
+void vec_vsx_st (vector signed int, int, vector signed int *);
+void vec_vsx_st (vector signed int, int, int *);
+void vec_vsx_st (vector unsigned int, int, vector unsigned int *);
+void vec_vsx_st (vector unsigned int, int, unsigned int *);
+void vec_vsx_st (vector bool int, int, vector bool int *);
+void vec_vsx_st (vector bool int, int, unsigned int *);
+void vec_vsx_st (vector bool int, int, int *);
+void vec_vsx_st (vector signed short, int, vector signed short *);
+void vec_vsx_st (vector signed short, int, short *);
+void vec_vsx_st (vector unsigned short, int, vector unsigned short *);
+void vec_vsx_st (vector unsigned short, int, unsigned short *);
+void vec_vsx_st (vector bool short, int, vector bool short *);
+void vec_vsx_st (vector bool short, int, unsigned short *);
+void vec_vsx_st (vector pixel, int, vector pixel *);
+void vec_vsx_st (vector pixel, int, unsigned short *);
+void vec_vsx_st (vector pixel, int, short *);
+void vec_vsx_st (vector bool short, int, short *);
+void vec_vsx_st (vector signed char, int, vector signed char *);
+void vec_vsx_st (vector signed char, int, signed char *);
+void vec_vsx_st (vector unsigned char, int, vector unsigned char *);
+void vec_vsx_st (vector unsigned char, int, unsigned char *);
+void vec_vsx_st (vector bool char, int, vector bool char *);
+void vec_vsx_st (vector bool char, int, unsigned char *);
+void vec_vsx_st (vector bool char, int, signed char *);
+@end smallexample
+
+Note that the @samp{vec_ld} and @samp{vec_st} builtins will always
+generate the Altivec @samp{LVX} and @samp{STVX} instructions even
+if the VSX instruction set is available. The @samp{vec_vsx_ld} and
+@samp{vec_vsx_st} builtins will always generate the VSX @samp{LXVD2X},
+@samp{LXVW4X}, @samp{STXVD2X}, and @samp{STXVW4X} instructions.
GCC provides a few other builtins on Powerpc to access certain instructions:
@smallexample
===================================================================
@@ -0,0 +1,97 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -mcpu=power7" } */
+
+/* Test the various load/store varients. */
+
+#include <altivec.h>
+
+#define TEST_COPY(NAME, TYPE) \
+void NAME ## _copy_native (vector TYPE *a, vector TYPE *b) \
+{ \
+ *a = *b; \
+} \
+ \
+void NAME ## _copy_vec (vector TYPE *a, vector TYPE *b) \
+{ \
+ vector TYPE x = vec_ld (0, b); \
+ vec_st (x, 0, a); \
+} \
+
+#define TEST_COPYL(NAME, TYPE) \
+void NAME ## _lvxl (vector TYPE *a, vector TYPE *b) \
+{ \
+ vector TYPE x = vec_ldl (0, b); \
+ vec_stl (x, 0, a); \
+} \
+
+#define TEST_VSX_COPY(NAME, TYPE) \
+void NAME ## _copy_vsx (vector TYPE *a, vector TYPE *b) \
+{ \
+ vector TYPE x = vec_vsx_ld (0, b); \
+ vec_vsx_st (x, 0, a); \
+} \
+
+#define TEST_ALIGN(NAME, TYPE) \
+void NAME ## _align (vector unsigned char *a, TYPE *b) \
+{ \
+ vector unsigned char x = vec_lvsl (0, b); \
+ vector unsigned char y = vec_lvsr (0, b); \
+ vec_st (x, 0, a); \
+ vec_st (y, 8, a); \
+}
+
+#ifndef NO_COPY
+TEST_COPY(uchar, unsigned char)
+TEST_COPY(schar, signed char)
+TEST_COPY(bchar, bool char)
+TEST_COPY(ushort, unsigned short)
+TEST_COPY(sshort, signed short)
+TEST_COPY(bshort, bool short)
+TEST_COPY(uint, unsigned int)
+TEST_COPY(sint, signed int)
+TEST_COPY(bint, bool int)
+TEST_COPY(float, float)
+TEST_COPY(double, double)
+#endif /* NO_COPY */
+
+#ifndef NO_COPYL
+TEST_COPYL(uchar, unsigned char)
+TEST_COPYL(schar, signed char)
+TEST_COPYL(bchar, bool char)
+TEST_COPYL(ushort, unsigned short)
+TEST_COPYL(sshort, signed short)
+TEST_COPYL(bshort, bool short)
+TEST_COPYL(uint, unsigned int)
+TEST_COPYL(sint, signed int)
+TEST_COPYL(bint, bool int)
+TEST_COPYL(float, float)
+TEST_COPYL(double, double)
+#endif /* NO_COPYL */
+
+#ifndef NO_ALIGN
+TEST_ALIGN(uchar, unsigned char)
+TEST_ALIGN(schar, signed char)
+TEST_ALIGN(ushort, unsigned short)
+TEST_ALIGN(sshort, signed short)
+TEST_ALIGN(uint, unsigned int)
+TEST_ALIGN(sint, signed int)
+TEST_ALIGN(float, float)
+TEST_ALIGN(double, double)
+#endif /* NO_ALIGN */
+
+
+#ifndef NO_VSX_COPY
+TEST_VSX_COPY(uchar, unsigned char)
+TEST_VSX_COPY(schar, signed char)
+TEST_VSX_COPY(bchar, bool char)
+TEST_VSX_COPY(ushort, unsigned short)
+TEST_VSX_COPY(sshort, signed short)
+TEST_VSX_COPY(bshort, bool short)
+TEST_VSX_COPY(uint, unsigned int)
+TEST_VSX_COPY(sint, signed int)
+TEST_VSX_COPY(bint, bool int)
+TEST_VSX_COPY(float, float)
+TEST_VSX_COPY(double, double)
+#endif /* NO_VSX_COPY */
===================================================================
@@ -1,4 +1,5 @@
/* { dg-do run { target { powerpc64-*-* && { lp64 && dfprt } } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-options "-std=gnu99 -O2 -fno-strict-aliasing" } */
/* Testcase to check for ABI compliance of parameter passing
@@ -31,60 +32,42 @@ typedef struct
reg_parms_t gparms;
-/* Testcase could break on future gcc's, if parameter regs
- are changed before this asm. */
-
-#ifndef __MACH__
-#define save_parms(lparms) \
- asm volatile ("ld 11,gparms@got(2)\n\t" \
- "std 3,0(11)\n\t" \
- "std 4,8(11)\n\t" \
- "std 5,16(11)\n\t" \
- "std 6,24(11)\n\t" \
- "std 7,32(11)\n\t" \
- "std 8,40(11)\n\t" \
- "std 9,48(11)\n\t" \
- "std 10,56(11)\n\t" \
- "stfd 1,64(11)\n\t" \
- "stfd 2,72(11)\n\t" \
- "stfd 3,80(11)\n\t" \
- "stfd 4,88(11)\n\t" \
- "stfd 5,96(11)\n\t" \
- "stfd 6,104(11)\n\t" \
- "stfd 7,112(11)\n\t" \
- "stfd 8,120(11)\n\t" \
- "stfd 9,128(11)\n\t" \
- "stfd 10,136(11)\n\t" \
- "stfd 11,144(11)\n\t" \
- "stfd 12,152(11)\n\t" \
- "stfd 13,160(11)\n\t":::"11", "memory"); \
- lparms = gparms;
-#else
-#define save_parms(lparms) \
- asm volatile ("ld r11,gparms@got(r2)\n\t" \
- "std r3,0(r11)\n\t" \
- "std r4,8(r11)\n\t" \
- "std r5,16(r11)\n\t" \
- "std r6,24(r11)\n\t" \
- "std r7,32(r11)\n\t" \
- "std r8,40(r11)\n\t" \
- "std r9,48(r11)\n\t" \
- "std r10,56(r11)\n\t" \
- "stfd f1,64(r11)\n\t" \
- "stfd f2,72(r11)\n\t" \
- "stfd f3,80(r11)\n\t" \
- "stfd f4,88(r11)\n\t" \
- "stfd f5,96(r11)\n\t" \
- "stfd f6,104(r11)\n\t" \
- "stfd f7,112(r11)\n\t" \
- "stfd f8,120(r11)\n\t" \
- "stfd f9,128(r11)\n\t" \
- "stfd f10,136(r11)\n\t" \
- "stfd f11,144(r11)\n\t" \
- "stfd f12,152(r11)\n\t" \
- "stfd f13,160(r11)\n\t":::"r11", "memory"); \
- lparms = gparms;
-#endif
+/* Wrapper to save the GPRs and FPRs and then jump to the real function. */
+#define WRAPPER(NAME) \
+__asm__ ("\t.globl\t" #NAME "_asm\n\t" \
+ ".section \".opd\",\"aw\"\n\t" \
+ ".align 3\n" \
+ #NAME "_asm:\n\t" \
+ ".quad .L." #NAME "_asm,.TOC.@tocbase,0\n\t" \
+ ".text\n\t" \
+ ".type " #NAME "_asm, @function\n" \
+ ".L." #NAME "_asm:\n\t" \
+ "ld 11,gparms@got(2)\n\t" \
+ "std 3,0(11)\n\t" \
+ "std 4,8(11)\n\t" \
+ "std 5,16(11)\n\t" \
+ "std 6,24(11)\n\t" \
+ "std 7,32(11)\n\t" \
+ "std 8,40(11)\n\t" \
+ "std 9,48(11)\n\t" \
+ "std 10,56(11)\n\t" \
+ "stfd 1,64(11)\n\t" \
+ "stfd 2,72(11)\n\t" \
+ "stfd 3,80(11)\n\t" \
+ "stfd 4,88(11)\n\t" \
+ "stfd 5,96(11)\n\t" \
+ "stfd 6,104(11)\n\t" \
+ "stfd 7,112(11)\n\t" \
+ "stfd 8,120(11)\n\t" \
+ "stfd 9,128(11)\n\t" \
+ "stfd 10,136(11)\n\t" \
+ "stfd 11,144(11)\n\t" \
+ "stfd 12,152(11)\n\t" \
+ "stfd 13,160(11)\n\t" \
+ "b " #NAME "\n\t" \
+ ".long 0\n\t" \
+ ".byte 0,0,0,0,0,0,0,0\n\t" \
+ ".size " #NAME ",.-" #NAME "\n")
typedef struct sf
{
@@ -97,6 +80,13 @@ typedef struct sf
unsigned long slot[100];
} stack_frame_t;
+extern void func0_asm (double, double, double, double, double, double,
+ double, double, double, double, double, double,
+ double, double,
+ _Decimal64, _Decimal128, _Decimal64);
+
+WRAPPER(func0);
+
/* Fill up floating point registers with double arguments, forcing
decimal float arguments into the parameter save area. */
void __attribute__ ((noinline))
@@ -105,186 +95,209 @@ func0 (double a1, double a2, double a3,
double a13, double a14,
_Decimal64 a15, _Decimal128 a16, _Decimal64 a17)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != lparms.fprs[0]) FAILURE
- if (a2 != lparms.fprs[1]) FAILURE
- if (a3 != lparms.fprs[2]) FAILURE
- if (a4 != lparms.fprs[3]) FAILURE
- if (a5 != lparms.fprs[4]) FAILURE
- if (a6 != lparms.fprs[5]) FAILURE
- if (a7 != lparms.fprs[6]) FAILURE
- if (a8 != lparms.fprs[7]) FAILURE
- if (a9 != lparms.fprs[8]) FAILURE
- if (a10 != lparms.fprs[9]) FAILURE
- if (a11 != lparms.fprs[10]) FAILURE
- if (a12 != lparms.fprs[11]) FAILURE
- if (a13 != lparms.fprs[12]) FAILURE
+ if (a1 != gparms.fprs[0]) FAILURE
+ if (a2 != gparms.fprs[1]) FAILURE
+ if (a3 != gparms.fprs[2]) FAILURE
+ if (a4 != gparms.fprs[3]) FAILURE
+ if (a5 != gparms.fprs[4]) FAILURE
+ if (a6 != gparms.fprs[5]) FAILURE
+ if (a7 != gparms.fprs[6]) FAILURE
+ if (a8 != gparms.fprs[7]) FAILURE
+ if (a9 != gparms.fprs[8]) FAILURE
+ if (a10 != gparms.fprs[9]) FAILURE
+ if (a11 != gparms.fprs[10]) FAILURE
+ if (a12 != gparms.fprs[11]) FAILURE
+ if (a13 != gparms.fprs[12]) FAILURE
if (a14 != *(double *)&sp->slot[13]) FAILURE
if (a15 != *(_Decimal64 *)&sp->slot[14]) FAILURE
if (a16 != *(_Decimal128 *)&sp->slot[15]) FAILURE
if (a17 != *(_Decimal64 *)&sp->slot[17]) FAILURE
}
+extern void func1_asm (double, double, double, double, double, double,
+ double, double, double, double, double, double,
+ double, _Decimal128 );
+
+WRAPPER(func1);
+
void __attribute__ ((noinline))
func1 (double a1, double a2, double a3, double a4, double a5, double a6,
double a7, double a8, double a9, double a10, double a11, double a12,
double a13, _Decimal128 a14)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != lparms.fprs[0]) FAILURE
- if (a2 != lparms.fprs[1]) FAILURE
- if (a3 != lparms.fprs[2]) FAILURE
- if (a4 != lparms.fprs[3]) FAILURE
- if (a5 != lparms.fprs[4]) FAILURE
- if (a6 != lparms.fprs[5]) FAILURE
- if (a7 != lparms.fprs[6]) FAILURE
- if (a8 != lparms.fprs[7]) FAILURE
- if (a9 != lparms.fprs[8]) FAILURE
- if (a10 != lparms.fprs[9]) FAILURE
- if (a11 != lparms.fprs[10]) FAILURE
- if (a12 != lparms.fprs[11]) FAILURE
- if (a13 != lparms.fprs[12]) FAILURE
+ if (a1 != gparms.fprs[0]) FAILURE
+ if (a2 != gparms.fprs[1]) FAILURE
+ if (a3 != gparms.fprs[2]) FAILURE
+ if (a4 != gparms.fprs[3]) FAILURE
+ if (a5 != gparms.fprs[4]) FAILURE
+ if (a6 != gparms.fprs[5]) FAILURE
+ if (a7 != gparms.fprs[6]) FAILURE
+ if (a8 != gparms.fprs[7]) FAILURE
+ if (a9 != gparms.fprs[8]) FAILURE
+ if (a10 != gparms.fprs[9]) FAILURE
+ if (a11 != gparms.fprs[10]) FAILURE
+ if (a12 != gparms.fprs[11]) FAILURE
+ if (a13 != gparms.fprs[12]) FAILURE
if (a14 != *(_Decimal128 *)&sp->slot[13]) FAILURE
}
+extern void func2_asm (double, double, double, double, double, double,
+ double, double, double, double, double, double,
+ _Decimal128);
+
+WRAPPER(func2);
+
void __attribute__ ((noinline))
func2 (double a1, double a2, double a3, double a4, double a5, double a6,
double a7, double a8, double a9, double a10, double a11, double a12,
_Decimal128 a13)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != lparms.fprs[0]) FAILURE
- if (a2 != lparms.fprs[1]) FAILURE
- if (a3 != lparms.fprs[2]) FAILURE
- if (a4 != lparms.fprs[3]) FAILURE
- if (a5 != lparms.fprs[4]) FAILURE
- if (a6 != lparms.fprs[5]) FAILURE
- if (a7 != lparms.fprs[6]) FAILURE
- if (a8 != lparms.fprs[7]) FAILURE
- if (a9 != lparms.fprs[8]) FAILURE
- if (a10 != lparms.fprs[9]) FAILURE
- if (a11 != lparms.fprs[10]) FAILURE
- if (a12 != lparms.fprs[11]) FAILURE
+ if (a1 != gparms.fprs[0]) FAILURE
+ if (a2 != gparms.fprs[1]) FAILURE
+ if (a3 != gparms.fprs[2]) FAILURE
+ if (a4 != gparms.fprs[3]) FAILURE
+ if (a5 != gparms.fprs[4]) FAILURE
+ if (a6 != gparms.fprs[5]) FAILURE
+ if (a7 != gparms.fprs[6]) FAILURE
+ if (a8 != gparms.fprs[7]) FAILURE
+ if (a9 != gparms.fprs[8]) FAILURE
+ if (a10 != gparms.fprs[9]) FAILURE
+ if (a11 != gparms.fprs[10]) FAILURE
+ if (a12 != gparms.fprs[11]) FAILURE
if (a13 != *(_Decimal128 *)&sp->slot[12]) FAILURE
}
+extern void func3_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128,
+ _Decimal64, _Decimal128, _Decimal64, _Decimal128,
+ _Decimal64, _Decimal128);
+
+WRAPPER(func3);
+
void __attribute__ ((noinline))
func3 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4,
_Decimal64 a5, _Decimal128 a6, _Decimal64 a7, _Decimal128 a8,
_Decimal64 a9, _Decimal128 a10)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */
- if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */
- if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */
- if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */
- if (a5 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */
- if (a6 != *(_Decimal128 *)&lparms.fprs[9]) FAILURE /* f10 & f11 */
- if (a7 != *(_Decimal64 *)&lparms.fprs[11]) FAILURE /* f12 */
+ if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */
+ if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */
+ if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */
+ if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */
+ if (a5 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */
+ if (a6 != *(_Decimal128 *)&gparms.fprs[9]) FAILURE /* f10 & f11 */
+ if (a7 != *(_Decimal64 *)&gparms.fprs[11]) FAILURE /* f12 */
if (a8 != *(_Decimal128 *)&sp->slot[10]) FAILURE
if (a9 != *(_Decimal64 *)&sp->slot[12]) FAILURE
if (a10 != *(_Decimal128 *)&sp->slot[13]) FAILURE
}
+extern void func4_asm (_Decimal128, _Decimal64, _Decimal128, _Decimal64,
+ _Decimal128, _Decimal64, _Decimal128, _Decimal64);
+
+WRAPPER(func4);
+
void __attribute__ ((noinline))
func4 (_Decimal128 a1, _Decimal64 a2, _Decimal128 a3, _Decimal64 a4,
_Decimal128 a5, _Decimal64 a6, _Decimal128 a7, _Decimal64 a8)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */
- if (a2 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */
- if (a3 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */
- if (a4 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */
- if (a5 != *(_Decimal128 *)&lparms.fprs[9]) FAILURE /* f10 & f11 */
- if (a6 != *(_Decimal64 *)&lparms.fprs[11]) FAILURE /* f12 */
+ if (a1 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */
+ if (a2 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */
+ if (a3 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */
+ if (a4 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */
+ if (a5 != *(_Decimal128 *)&gparms.fprs[9]) FAILURE /* f10 & f11 */
+ if (a6 != *(_Decimal64 *)&gparms.fprs[11]) FAILURE /* f12 */
if (a7 != *(_Decimal128 *)&sp->slot[9]) FAILURE
if (a8 != *(_Decimal64 *)&sp->slot[11]) FAILURE
}
+extern void func5_asm (_Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32);
+
+WRAPPER(func5);
+
void __attribute__ ((noinline))
func5 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4,
_Decimal32 a5, _Decimal32 a6, _Decimal32 a7, _Decimal32 a8,
_Decimal32 a9, _Decimal32 a10, _Decimal32 a11, _Decimal32 a12,
_Decimal32 a13, _Decimal32 a14, _Decimal32 a15, _Decimal32 a16)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
/* _Decimal32 is passed in the lower half of an FPR or parameter slot. */
- if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */
- if (a2 != ((d32parm_t *)&lparms.fprs[1])->d) FAILURE /* f2 */
- if (a3 != ((d32parm_t *)&lparms.fprs[2])->d) FAILURE /* f3 */
- if (a4 != ((d32parm_t *)&lparms.fprs[3])->d) FAILURE /* f4 */
- if (a5 != ((d32parm_t *)&lparms.fprs[4])->d) FAILURE /* f5 */
- if (a6 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */
- if (a7 != ((d32parm_t *)&lparms.fprs[6])->d) FAILURE /* f7 */
- if (a8 != ((d32parm_t *)&lparms.fprs[7])->d) FAILURE /* f8 */
- if (a9 != ((d32parm_t *)&lparms.fprs[8])->d) FAILURE /* f9 */
- if (a10 != ((d32parm_t *)&lparms.fprs[9])->d) FAILURE /* f10 */
- if (a11 != ((d32parm_t *)&lparms.fprs[10])->d) FAILURE /* f11 */
- if (a12 != ((d32parm_t *)&lparms.fprs[11])->d) FAILURE /* f12 */
- if (a13 != ((d32parm_t *)&lparms.fprs[12])->d) FAILURE /* f13 */
+ if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */
+ if (a2 != ((d32parm_t *)&gparms.fprs[1])->d) FAILURE /* f2 */
+ if (a3 != ((d32parm_t *)&gparms.fprs[2])->d) FAILURE /* f3 */
+ if (a4 != ((d32parm_t *)&gparms.fprs[3])->d) FAILURE /* f4 */
+ if (a5 != ((d32parm_t *)&gparms.fprs[4])->d) FAILURE /* f5 */
+ if (a6 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */
+ if (a7 != ((d32parm_t *)&gparms.fprs[6])->d) FAILURE /* f7 */
+ if (a8 != ((d32parm_t *)&gparms.fprs[7])->d) FAILURE /* f8 */
+ if (a9 != ((d32parm_t *)&gparms.fprs[8])->d) FAILURE /* f9 */
+ if (a10 != ((d32parm_t *)&gparms.fprs[9])->d) FAILURE /* f10 */
+ if (a11 != ((d32parm_t *)&gparms.fprs[10])->d) FAILURE /* f11 */
+ if (a12 != ((d32parm_t *)&gparms.fprs[11])->d) FAILURE /* f12 */
+ if (a13 != ((d32parm_t *)&gparms.fprs[12])->d) FAILURE /* f13 */
if (a14 != ((d32parm_t *)&sp->slot[13])->d) FAILURE
if (a15 != ((d32parm_t *)&sp->slot[14])->d) FAILURE
if (a16 != ((d32parm_t *)&sp->slot[15])->d) FAILURE
}
+extern void func6_asm (_Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128);
+
+WRAPPER(func6);
+
void __attribute__ ((noinline))
func6 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3,
_Decimal32 a4, _Decimal64 a5, _Decimal128 a6,
_Decimal32 a7, _Decimal64 a8, _Decimal128 a9,
_Decimal32 a10, _Decimal64 a11, _Decimal128 a12)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */
- if (a2 != *(_Decimal64 *)&lparms.fprs[1]) FAILURE /* f2 */
- if (a3 != *(_Decimal128 *)&lparms.fprs[3]) FAILURE /* f4 & f5 */
- if (a4 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */
- if (a5 != *(_Decimal64 *)&lparms.fprs[6]) FAILURE /* f7 */
- if (a6 != *(_Decimal128 *)&lparms.fprs[7]) FAILURE /* f8 & f9 */
- if (a7 != ((d32parm_t *)&lparms.fprs[9])->d) FAILURE /* f10 */
- if (a8 != *(_Decimal64 *)&lparms.fprs[10]) FAILURE /* f11 */
- if (a9 != *(_Decimal128 *)&lparms.fprs[11]) FAILURE /* f12 & f13 */
+ if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */
+ if (a2 != *(_Decimal64 *)&gparms.fprs[1]) FAILURE /* f2 */
+ if (a3 != *(_Decimal128 *)&gparms.fprs[3]) FAILURE /* f4 & f5 */
+ if (a4 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */
+ if (a5 != *(_Decimal64 *)&gparms.fprs[6]) FAILURE /* f7 */
+ if (a6 != *(_Decimal128 *)&gparms.fprs[7]) FAILURE /* f8 & f9 */
+ if (a7 != ((d32parm_t *)&gparms.fprs[9])->d) FAILURE /* f10 */
+ if (a8 != *(_Decimal64 *)&gparms.fprs[10]) FAILURE /* f11 */
+ if (a9 != *(_Decimal128 *)&gparms.fprs[11]) FAILURE /* f12 & f13 */
if (a10 != ((d32parm_t *)&sp->slot[12])->d) FAILURE
if (a11 != *(_Decimal64 *)&sp->slot[13]) FAILURE
}
@@ -292,23 +305,23 @@ func6 (_Decimal32 a1, _Decimal64 a2, _De
int
main (void)
{
- func0 (1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5,
- 14.5, 15.2dd, 16.2dl, 17.2dd);
- func1 (101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5, 109.5,
- 110.5, 111.5, 112.5, 113.5, 114.2dd);
- func2 (201.5, 202.5, 203.5, 204.5, 205.5, 206.5, 207.5, 208.5, 209.5,
- 210.5, 211.5, 212.5, 213.2dd);
- func3 (301.2dd, 302.2dl, 303.2dd, 304.2dl, 305.2dd, 306.2dl, 307.2dd,
- 308.2dl, 309.2dd, 310.2dl);
- func4 (401.2dl, 402.2dd, 403.2dl, 404.2dd, 405.2dl, 406.2dd, 407.2dl,
- 408.2dd);
+ func0_asm (1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5,
+ 14.5, 15.2dd, 16.2dl, 17.2dd);
+ func1_asm (101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5, 109.5,
+ 110.5, 111.5, 112.5, 113.5, 114.2dd);
+ func2_asm (201.5, 202.5, 203.5, 204.5, 205.5, 206.5, 207.5, 208.5, 209.5,
+ 210.5, 211.5, 212.5, 213.2dd);
+ func3_asm (301.2dd, 302.2dl, 303.2dd, 304.2dl, 305.2dd, 306.2dl, 307.2dd,
+ 308.2dl, 309.2dd, 310.2dl);
+ func4_asm (401.2dl, 402.2dd, 403.2dl, 404.2dd, 405.2dl, 406.2dd, 407.2dl,
+ 408.2dd);
#if 0
/* _Decimal32 doesn't yet follow the ABI; enable this when it does. */
- func5 (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df,
- 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df,
- 515.2df, 516.2df);
- func6 (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl,
- 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl);
+ func5_asm (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df,
+ 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df,
+ 515.2df, 516.2df);
+ func6_asm (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl,
+ 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl);
#endif
if (failcnt != 0)
===================================================================
@@ -30,31 +30,6 @@ typedef struct
reg_parms_t gparms;
-
-/* Testcase could break on future gcc's, if parameter regs
- are changed before this asm. */
-
-#define save_parms(lparms) \
- asm volatile ("lis 11,gparms@ha\n\t" \
- "la 11,gparms@l(11)\n\t" \
- "st 3,0(11)\n\t" \
- "st 4,4(11)\n\t" \
- "st 5,8(11)\n\t" \
- "st 6,12(11)\n\t" \
- "st 7,16(11)\n\t" \
- "st 8,20(11)\n\t" \
- "st 9,24(11)\n\t" \
- "st 10,28(11)\n\t" \
- "stfd 1,32(11)\n\t" \
- "stfd 2,40(11)\n\t" \
- "stfd 3,48(11)\n\t" \
- "stfd 4,56(11)\n\t" \
- "stfd 5,64(11)\n\t" \
- "stfd 6,72(11)\n\t" \
- "stfd 7,80(11)\n\t" \
- "stfd 8,88(11)\n\t":::"11", "memory"); \
- lparms = gparms;
-
typedef struct sf
{
struct sf *backchain;
@@ -62,115 +37,159 @@ typedef struct sf
unsigned int slot[200];
} stack_frame_t;
+/* Wrapper to save the GPRs and FPRs and then jump to the real function. */
+#define WRAPPER(NAME) \
+__asm__ ("\t.globl\t" #NAME "_asm\n\t" \
+ ".text\n\t" \
+ ".type " #NAME "_asm, @function\n" \
+ #NAME "_asm:\n\t" \
+ "lis 11,gparms@ha\n\t" \
+ "la 11,gparms@l(11)\n\t" \
+ "st 3,0(11)\n\t" \
+ "st 4,4(11)\n\t" \
+ "st 5,8(11)\n\t" \
+ "st 6,12(11)\n\t" \
+ "st 7,16(11)\n\t" \
+ "st 8,20(11)\n\t" \
+ "st 9,24(11)\n\t" \
+ "st 10,28(11)\n\t" \
+ "stfd 1,32(11)\n\t" \
+ "stfd 2,40(11)\n\t" \
+ "stfd 3,48(11)\n\t" \
+ "stfd 4,56(11)\n\t" \
+ "stfd 5,64(11)\n\t" \
+ "stfd 6,72(11)\n\t" \
+ "stfd 7,80(11)\n\t" \
+ "stfd 8,88(11)\n\t" \
+ "b " #NAME "\n\t" \
+ ".size " #NAME ",.-" #NAME "\n")
+
/* Fill up floating point registers with double arguments, forcing
decimal float arguments into the parameter save area. */
+extern void func0_asm (double, double, double, double, double,
+ double, double, double, _Decimal64, _Decimal128);
+
+WRAPPER(func0);
+
void __attribute__ ((noinline))
func0 (double a1, double a2, double a3, double a4, double a5,
double a6, double a7, double a8, _Decimal64 a9, _Decimal128 a10)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != lparms.fprs[0]) FAILURE
- if (a2 != lparms.fprs[1]) FAILURE
- if (a3 != lparms.fprs[2]) FAILURE
- if (a4 != lparms.fprs[3]) FAILURE
- if (a5 != lparms.fprs[4]) FAILURE
- if (a6 != lparms.fprs[5]) FAILURE
- if (a7 != lparms.fprs[6]) FAILURE
- if (a8 != lparms.fprs[7]) FAILURE
+ if (a1 != gparms.fprs[0]) FAILURE
+ if (a2 != gparms.fprs[1]) FAILURE
+ if (a3 != gparms.fprs[2]) FAILURE
+ if (a4 != gparms.fprs[3]) FAILURE
+ if (a5 != gparms.fprs[4]) FAILURE
+ if (a6 != gparms.fprs[5]) FAILURE
+ if (a7 != gparms.fprs[6]) FAILURE
+ if (a8 != gparms.fprs[7]) FAILURE
if (a9 != *(_Decimal64 *)&sp->slot[0]) FAILURE
if (a10 != *(_Decimal128 *)&sp->slot[2]) FAILURE
}
/* Alternate 64-bit and 128-bit decimal float arguments, checking that
_Decimal128 is always passed in even/odd register pairs. */
+extern void func1_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128,
+ _Decimal64, _Decimal128, _Decimal64, _Decimal128);
+
+WRAPPER(func1);
+
void __attribute__ ((noinline))
func1 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4,
_Decimal64 a5, _Decimal128 a6, _Decimal64 a7, _Decimal128 a8)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */
- if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */
- if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */
- if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */
- if (a5 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */
+ if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */
+ if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */
+ if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */
+ if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */
+ if (a5 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */
if (a6 != *(_Decimal128 *)&sp->slot[0]) FAILURE
if (a7 != *(_Decimal64 *)&sp->slot[4]) FAILURE
if (a8 != *(_Decimal128 *)&sp->slot[6]) FAILURE
}
+extern void func2_asm (_Decimal128, _Decimal64, _Decimal128, _Decimal64,
+ _Decimal128, _Decimal64, _Decimal128, _Decimal64);
+
+WRAPPER(func2);
+
void __attribute__ ((noinline))
func2 (_Decimal128 a1, _Decimal64 a2, _Decimal128 a3, _Decimal64 a4,
_Decimal128 a5, _Decimal64 a6, _Decimal128 a7, _Decimal64 a8)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */
- if (a2 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */
- if (a3 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */
- if (a4 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */
+ if (a1 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */
+ if (a2 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */
+ if (a3 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */
+ if (a4 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */
if (a5 != *(_Decimal128 *)&sp->slot[0]) FAILURE
if (a6 != *(_Decimal64 *)&sp->slot[4]) FAILURE
if (a7 != *(_Decimal128 *)&sp->slot[6]) FAILURE
if (a8 != *(_Decimal64 *)&sp->slot[10]) FAILURE
}
+extern void func3_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128,
+ _Decimal64);
+
+WRAPPER(func3);
+
void __attribute__ ((noinline))
func3 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4,
_Decimal64 a5)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */
- if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */
- if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */
- if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */
+ if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */
+ if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */
+ if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */
+ if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */
if (a5 != *(_Decimal128 *)&sp->slot[0]) FAILURE
}
+extern void func4_asm (_Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32,
+ _Decimal32, _Decimal32, _Decimal32, _Decimal32);
+
+WRAPPER(func4);
+
void __attribute__ ((noinline))
func4 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4,
_Decimal32 a5, _Decimal32 a6, _Decimal32 a7, _Decimal32 a8,
_Decimal32 a9, _Decimal32 a10, _Decimal32 a11, _Decimal32 a12,
_Decimal32 a13, _Decimal32 a14, _Decimal32 a15, _Decimal32 a16)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
/* _Decimal32 is passed in the lower half of an FPR, or in parameter slot. */
- if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */
- if (a2 != ((d32parm_t *)&lparms.fprs[1])->d) FAILURE /* f2 */
- if (a3 != ((d32parm_t *)&lparms.fprs[2])->d) FAILURE /* f3 */
- if (a4 != ((d32parm_t *)&lparms.fprs[3])->d) FAILURE /* f4 */
- if (a5 != ((d32parm_t *)&lparms.fprs[4])->d) FAILURE /* f5 */
- if (a6 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */
- if (a7 != ((d32parm_t *)&lparms.fprs[6])->d) FAILURE /* f7 */
- if (a8 != ((d32parm_t *)&lparms.fprs[7])->d) FAILURE /* f8 */
+ if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */
+ if (a2 != ((d32parm_t *)&gparms.fprs[1])->d) FAILURE /* f2 */
+ if (a3 != ((d32parm_t *)&gparms.fprs[2])->d) FAILURE /* f3 */
+ if (a4 != ((d32parm_t *)&gparms.fprs[3])->d) FAILURE /* f4 */
+ if (a5 != ((d32parm_t *)&gparms.fprs[4])->d) FAILURE /* f5 */
+ if (a6 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */
+ if (a7 != ((d32parm_t *)&gparms.fprs[6])->d) FAILURE /* f7 */
+ if (a8 != ((d32parm_t *)&gparms.fprs[7])->d) FAILURE /* f8 */
if (a9 != *(_Decimal32 *)&sp->slot[0]) FAILURE
if (a10 != *(_Decimal32 *)&sp->slot[1]) FAILURE
if (a11 != *(_Decimal32 *)&sp->slot[2]) FAILURE
@@ -181,24 +200,29 @@ func4 (_Decimal32 a1, _Decimal32 a2, _De
if (a16 != *(_Decimal32 *)&sp->slot[7]) FAILURE
}
+extern void func5_asm (_Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128,
+ _Decimal32, _Decimal64, _Decimal128);
+
+WRAPPER(func5);
+
void __attribute__ ((noinline))
func5 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3,
_Decimal32 a4, _Decimal64 a5, _Decimal128 a6,
_Decimal32 a7, _Decimal64 a8, _Decimal128 a9,
_Decimal32 a10, _Decimal64 a11, _Decimal128 a12)
{
- reg_parms_t lparms;
stack_frame_t *sp;
- save_parms (lparms);
sp = __builtin_frame_address (0);
sp = sp->backchain;
- if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */
- if (a2 != *(_Decimal64 *)&lparms.fprs[1]) FAILURE /* f2 */
- if (a3 != *(_Decimal128 *)&lparms.fprs[3]) FAILURE /* f4 & f5 */
- if (a4 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */
- if (a5 != *(_Decimal64 *)&lparms.fprs[6]) FAILURE /* f7 */
+ if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */
+ if (a2 != *(_Decimal64 *)&gparms.fprs[1]) FAILURE /* f2 */
+ if (a3 != *(_Decimal128 *)&gparms.fprs[3]) FAILURE /* f4 & f5 */
+ if (a4 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */
+ if (a5 != *(_Decimal64 *)&gparms.fprs[6]) FAILURE /* f7 */
if (a6 != *(_Decimal128 *)&sp->slot[0]) FAILURE
if (a7 != *(_Decimal32 *)&sp->slot[4]) FAILURE
@@ -212,15 +236,15 @@ func5 (_Decimal32 a1, _Decimal64 a2, _De
int
main ()
{
- func0 (1., 2., 3., 4., 5., 6., 7., 8., 9.dd, 10.dl);
- func1 (1.dd, 2.dl, 3.dd, 4.dl, 5.dd, 6.dl, 7.dd, 8.dl);
- func2 (1.dl, 2.dd, 3.dl, 4.dd, 5.dl, 6.dd, 7.dl, 8.dd);
- func3 (1.dd, 2.dl, 3.dd, 4.dl, 5.dl);
- func4 (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df,
- 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df,
- 515.2df, 516.2df);
- func5 (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl,
- 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl);
+ func0_asm (1., 2., 3., 4., 5., 6., 7., 8., 9.dd, 10.dl);
+ func1_asm (1.dd, 2.dl, 3.dd, 4.dl, 5.dd, 6.dl, 7.dd, 8.dl);
+ func2_asm (1.dl, 2.dd, 3.dl, 4.dd, 5.dl, 6.dd, 7.dl, 8.dd);
+ func3_asm (1.dd, 2.dl, 3.dd, 4.dl, 5.dl);
+ func4_asm (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df,
+ 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df,
+ 515.2df, 516.2df);
+ func5_asm (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl,
+ 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl);
if (failcnt != 0)
abort ();
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile { target { powerpc*-*-* } } } */
-/* { dg-options "-O2 -mavoid-indexed-addresses" } */
+/* { dg-options "-O2 -mavoid-indexed-addresses -mno-altivec -mno-vsx" } */
/* { dg-final { scan-assembler-not "lbzx" } }
===================================================================
@@ -123,6 +123,43 @@ (define_split
DONE;
})
+;; Vector floating point load/store instructions that uses the Altivec
+;; instructions even if we are compiling for VSX, since the Altivec
+;; instructions silently ignore the bottom 3 bits of the address, and VSX does
+;; not.
+(define_expand "vector_altivec_load_<mode>"
+ [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+ (match_operand:VEC_M 1 "memory_operand" ""))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+ if (VECTOR_MEM_VSX_P (<MODE>mode))
+ {
+ operands[1] = rs6000_address_for_altivec (operands[1]);
+ emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1]));
+ DONE;
+ }
+}")
+
+(define_expand "vector_altivec_store_<mode>"
+ [(set (match_operand:VEC_M 0 "memory_operand" "")
+ (match_operand:VEC_M 1 "vfloat_operand" ""))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "
+{
+ gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+ if (VECTOR_MEM_VSX_P (<MODE>mode))
+ {
+ operands[0] = rs6000_address_for_altivec (operands[0]);
+ emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1]));
+ DONE;
+ }
+}")
+
+
;; Reload patterns for vector operations. We may need an addtional base
;; register to convert the reg+offset addressing to reg+reg for vector
===================================================================
@@ -129,6 +129,7 @@ extern void rs6000_emit_parity (rtx, rtx
extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode,
rtx);
extern rtx rs6000_address_for_fpconvert (rtx);
+extern rtx rs6000_address_for_altivec (rtx);
extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
extern int rs6000_loop_align (rtx);
#endif /* RTX_CODE */
===================================================================
@@ -1000,6 +1000,14 @@ const struct altivec_builtin_types altiv
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+ RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
@@ -1115,6 +1123,14 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+ RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
@@ -1133,6 +1149,16 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
@@ -1151,6 +1177,16 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
{ ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+ { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
{ ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
@@ -2644,6 +2680,14 @@ const struct altivec_builtin_types altiv
{ ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
{ ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+ RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+ RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
@@ -2809,6 +2853,16 @@ const struct altivec_builtin_types altiv
RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
{ ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+ RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+ RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI },
{ ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
@@ -3002,6 +3056,112 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
RS6000_BTI_NOT_OPAQUE },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V2DI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+ RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+ RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+ RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+ RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+ RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+ RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+
/* Predicates. */
{ ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
===================================================================
@@ -37,6 +37,10 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERN
RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_16qi, RS6000_BTC_MEM)
RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4sf, RS6000_BTC_MEM)
RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4sf, RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2df, RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2df, RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2di, RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2di, RS6000_BTC_MEM)
RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBM, RS6000_BTC_CONST)
RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHM, RS6000_BTC_CONST)
RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWM, RS6000_BTC_CONST)
@@ -778,12 +782,20 @@ RS6000_BUILTIN(PAIRED_BUILTIN_CMPU1, R
/* VSX builtins. */
RS6000_BUILTIN(VSX_BUILTIN_LXSDX, RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_LXVD2X, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTC_MEM)
RS6000_BUILTIN(VSX_BUILTIN_LXVDSX, RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_LXVW4X, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTC_MEM)
RS6000_BUILTIN(VSX_BUILTIN_STXSDX, RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_STXVD2X, RS6000_BTC_MEM)
-RS6000_BUILTIN(VSX_BUILTIN_STXVW4X, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DF, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DI, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SF, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SI, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V8HI, RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V16QI, RS6000_BTC_MEM)
RS6000_BUILTIN(VSX_BUILTIN_XSABSDP, RS6000_BTC_CONST)
RS6000_BUILTIN(VSX_BUILTIN_XSADDDP, RS6000_BTC_FP_PURE)
RS6000_BUILTIN(VSX_BUILTIN_XSCMPODP, RS6000_BTC_FP_PURE)
@@ -983,8 +995,10 @@ RS6000_BUILTIN(VSX_BUILTIN_VEC_XXPERMDI,
RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSLDWI, RS6000_BTC_MISC)
RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTD, RS6000_BTC_MISC)
RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTW, RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_LD, RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_ST, RS6000_BTC_MISC)
RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_LAST,
- VSX_BUILTIN_VEC_XXSPLTW)
+ VSX_BUILTIN_VEC_ST)
/* Combined VSX/Altivec builtins. */
RS6000_BUILTIN(VECTOR_BUILTIN_FLOAT_V4SI_V4SF, RS6000_BTC_FP_PURE)
===================================================================
@@ -3316,9 +3316,12 @@ rs6000_option_override_internal (bool gl
/* If not explicitly specified via option, decide whether to generate indexed
load/store instructions. */
if (TARGET_AVOID_XFORM == -1)
- /* Avoid indexed addressing when targeting Power6 in order to avoid
- the DERAT mispredict penalty. */
- TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB);
+ /* Avoid indexed addressing when targeting Power6 in order to avoid the
+ DERAT mispredict penalty. However the LVE and STVE altivec instructions
+ need indexed accesses and the type used is the scalar type of the element
+ being loaded or stored. */
+ TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
+ && !TARGET_ALTIVEC);
/* Set the -mrecip options. */
if (rs6000_recip_name)
@@ -11263,16 +11266,22 @@ altivec_expand_ld_builtin (tree exp, rtx
switch (fcode)
{
case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
- icode = CODE_FOR_vector_load_v16qi;
+ icode = CODE_FOR_vector_altivec_load_v16qi;
break;
case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
- icode = CODE_FOR_vector_load_v8hi;
+ icode = CODE_FOR_vector_altivec_load_v8hi;
break;
case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
- icode = CODE_FOR_vector_load_v4si;
+ icode = CODE_FOR_vector_altivec_load_v4si;
break;
case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
- icode = CODE_FOR_vector_load_v4sf;
+ icode = CODE_FOR_vector_altivec_load_v4sf;
+ break;
+ case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
+ icode = CODE_FOR_vector_altivec_load_v2df;
+ break;
+ case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
+ icode = CODE_FOR_vector_altivec_load_v2di;
break;
default:
*expandedp = false;
@@ -11316,16 +11325,22 @@ altivec_expand_st_builtin (tree exp, rtx
switch (fcode)
{
case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
- icode = CODE_FOR_vector_store_v16qi;
+ icode = CODE_FOR_vector_altivec_store_v16qi;
break;
case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
- icode = CODE_FOR_vector_store_v8hi;
+ icode = CODE_FOR_vector_altivec_store_v8hi;
break;
case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
- icode = CODE_FOR_vector_store_v4si;
+ icode = CODE_FOR_vector_altivec_store_v4si;
break;
case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
- icode = CODE_FOR_vector_store_v4sf;
+ icode = CODE_FOR_vector_altivec_store_v4sf;
+ break;
+ case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
+ icode = CODE_FOR_vector_altivec_store_v2df;
+ break;
+ case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
+ icode = CODE_FOR_vector_altivec_store_v2di;
break;
default:
*expandedp = false;
@@ -11557,7 +11572,7 @@ altivec_expand_builtin (tree exp, rtx ta
switch (fcode)
{
case ALTIVEC_BUILTIN_STVX:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, exp);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
case ALTIVEC_BUILTIN_STVEBX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
case ALTIVEC_BUILTIN_STVEHX:
@@ -11576,6 +11591,19 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_STVRXL:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
+ case VSX_BUILTIN_STXVD2X_V2DF:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
+ case VSX_BUILTIN_STXVD2X_V2DI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
+ case VSX_BUILTIN_STXVW4X_V4SF:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
+ case VSX_BUILTIN_STXVW4X_V4SI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
+ case VSX_BUILTIN_STXVW4X_V8HI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
+ case VSX_BUILTIN_STXVW4X_V16QI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
+
case ALTIVEC_BUILTIN_MFVSCR:
icode = CODE_FOR_altivec_mfvscr;
tmode = insn_data[icode].operand[0].mode;
@@ -11700,7 +11728,7 @@ altivec_expand_builtin (tree exp, rtx ta
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
exp, target, false);
case ALTIVEC_BUILTIN_LVX:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx,
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
exp, target, false);
case ALTIVEC_BUILTIN_LVLX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
@@ -11714,6 +11742,25 @@ altivec_expand_builtin (tree exp, rtx ta
case ALTIVEC_BUILTIN_LVRXL:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
exp, target, true);
+ case VSX_BUILTIN_LXVD2X_V2DF:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
+ exp, target, false);
+ case VSX_BUILTIN_LXVD2X_V2DI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
+ exp, target, false);
+ case VSX_BUILTIN_LXVW4X_V4SF:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
+ exp, target, false);
+ case VSX_BUILTIN_LXVW4X_V4SI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
+ exp, target, false);
+ case VSX_BUILTIN_LXVW4X_V8HI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
+ exp, target, false);
+ case VSX_BUILTIN_LXVW4X_V16QI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
+ exp, target, false);
+ break;
default:
break;
/* Fall through. */
@@ -12331,6 +12378,8 @@ rs6000_init_builtins (void)
long_integer_type_internal_node = long_integer_type_node;
long_unsigned_type_internal_node = long_unsigned_type_node;
+ long_long_integer_type_internal_node = long_long_integer_type_node;
+ long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
intQI_type_internal_node = intQI_type_node;
uintQI_type_internal_node = unsigned_intQI_type_node;
intHI_type_internal_node = intHI_type_node;
@@ -12340,7 +12389,7 @@ rs6000_init_builtins (void)
intDI_type_internal_node = intDI_type_node;
uintDI_type_internal_node = unsigned_intDI_type_node;
float_type_internal_node = float_type_node;
- double_type_internal_node = float_type_node;
+ double_type_internal_node = double_type_node;
void_type_internal_node = void_type_node;
/* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -12872,19 +12921,11 @@ altivec_init_builtins (void)
size_t i;
tree ftype;
- tree pfloat_type_node = build_pointer_type (float_type_node);
- tree pint_type_node = build_pointer_type (integer_type_node);
- tree pshort_type_node = build_pointer_type (short_integer_type_node);
- tree pchar_type_node = build_pointer_type (char_type_node);
-
tree pvoid_type_node = build_pointer_type (void_type_node);
- tree pcfloat_type_node = build_pointer_type (build_qualified_type (float_type_node, TYPE_QUAL_CONST));
- tree pcint_type_node = build_pointer_type (build_qualified_type (integer_type_node, TYPE_QUAL_CONST));
- tree pcshort_type_node = build_pointer_type (build_qualified_type (short_integer_type_node, TYPE_QUAL_CONST));
- tree pcchar_type_node = build_pointer_type (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
-
- tree pcvoid_type_node = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST));
+ tree pcvoid_type_node
+ = build_pointer_type (build_qualified_type (void_type_node,
+ TYPE_QUAL_CONST));
tree int_ftype_opaque
= build_function_type_list (integer_type_node,
@@ -12907,26 +12948,6 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V4SI_type_node,
V4SI_type_node, NULL_TREE);
- tree v4sf_ftype_pcfloat
- = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
- tree void_ftype_pfloat_v4sf
- = build_function_type_list (void_type_node,
- pfloat_type_node, V4SF_type_node, NULL_TREE);
- tree v4si_ftype_pcint
- = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
- tree void_ftype_pint_v4si
- = build_function_type_list (void_type_node,
- pint_type_node, V4SI_type_node, NULL_TREE);
- tree v8hi_ftype_pcshort
- = build_function_type_list (V8HI_type_node, pcshort_type_node, NULL_TREE);
- tree void_ftype_pshort_v8hi
- = build_function_type_list (void_type_node,
- pshort_type_node, V8HI_type_node, NULL_TREE);
- tree v16qi_ftype_pcchar
- = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
- tree void_ftype_pchar_v16qi
- = build_function_type_list (void_type_node,
- pchar_type_node, V16QI_type_node, NULL_TREE);
tree void_ftype_v4si
= build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_void
@@ -12948,6 +12969,15 @@ altivec_init_builtins (void)
tree v4si_ftype_long_pcvoid
= build_function_type_list (V4SI_type_node,
long_integer_type_node, pcvoid_type_node, NULL_TREE);
+ tree v4sf_ftype_long_pcvoid
+ = build_function_type_list (V4SF_type_node,
+ long_integer_type_node, pcvoid_type_node, NULL_TREE);
+ tree v2df_ftype_long_pcvoid
+ = build_function_type_list (V2DF_type_node,
+ long_integer_type_node, pcvoid_type_node, NULL_TREE);
+ tree v2di_ftype_long_pcvoid
+ = build_function_type_list (V2DI_type_node,
+ long_integer_type_node, pcvoid_type_node, NULL_TREE);
tree void_ftype_opaque_long_pvoid
= build_function_type_list (void_type_node,
@@ -12965,6 +12995,18 @@ altivec_init_builtins (void)
= build_function_type_list (void_type_node,
V8HI_type_node, long_integer_type_node,
pvoid_type_node, NULL_TREE);
+ tree void_ftype_v4sf_long_pvoid
+ = build_function_type_list (void_type_node,
+ V4SF_type_node, long_integer_type_node,
+ pvoid_type_node, NULL_TREE);
+ tree void_ftype_v2df_long_pvoid
+ = build_function_type_list (void_type_node,
+ V2DF_type_node, long_integer_type_node,
+ pvoid_type_node, NULL_TREE);
+ tree void_ftype_v2di_long_pvoid
+ = build_function_type_list (void_type_node,
+ V2DI_type_node, long_integer_type_node,
+ pvoid_type_node, NULL_TREE);
tree int_ftype_int_v8hi_v8hi
= build_function_type_list (integer_type_node,
integer_type_node, V8HI_type_node,
@@ -12996,22 +13038,6 @@ altivec_init_builtins (void)
pcvoid_type_node, integer_type_node,
integer_type_node, NULL_TREE);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4sf", v4sf_ftype_pcfloat,
- ALTIVEC_BUILTIN_LD_INTERNAL_4sf);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4sf", void_ftype_pfloat_v4sf,
- ALTIVEC_BUILTIN_ST_INTERNAL_4sf);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4si", v4si_ftype_pcint,
- ALTIVEC_BUILTIN_LD_INTERNAL_4si);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4si", void_ftype_pint_v4si,
- ALTIVEC_BUILTIN_ST_INTERNAL_4si);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_8hi", v8hi_ftype_pcshort,
- ALTIVEC_BUILTIN_LD_INTERNAL_8hi);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_8hi", void_ftype_pshort_v8hi,
- ALTIVEC_BUILTIN_ST_INTERNAL_8hi);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_16qi", v16qi_ftype_pcchar,
- ALTIVEC_BUILTIN_LD_INTERNAL_16qi);
- def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_16qi", void_ftype_pchar_v16qi,
- ALTIVEC_BUILTIN_ST_INTERNAL_16qi);
def_builtin (MASK_ALTIVEC, "__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
def_builtin (MASK_ALTIVEC, "__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
def_builtin (MASK_ALTIVEC, "__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
@@ -13043,6 +13069,21 @@ altivec_init_builtins (void)
def_builtin (MASK_ALTIVEC, "__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
def_builtin (MASK_ALTIVEC, "__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid, VSX_BUILTIN_LXVD2X_V2DF);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid, VSX_BUILTIN_LXVD2X_V2DI);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V4SF);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V4SI);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V8HI);
+ def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V16QI);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid, VSX_BUILTIN_STXVD2X_V2DF);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid, VSX_BUILTIN_STXVD2X_V2DI);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid, VSX_BUILTIN_STXVW4X_V4SF);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid, VSX_BUILTIN_STXVW4X_V4SI);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid, VSX_BUILTIN_STXVW4X_V8HI);
+ def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid, VSX_BUILTIN_STXVW4X_V16QI);
+ def_builtin (MASK_VSX, "__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, VSX_BUILTIN_VEC_LD);
+ def_builtin (MASK_VSX, "__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid, VSX_BUILTIN_VEC_ST);
+
if (rs6000_cpu == PROCESSOR_CELL)
{
def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
@@ -27925,4 +27966,29 @@ rs6000_address_for_fpconvert (rtx x)
return x;
}
+/* Given a memory reference, if it is not in the form for altivec memory
+ reference instructions (i.e. reg or reg+reg addressing with AND of -16),
+ convert to the altivec format. */
+
+rtx
+rs6000_address_for_altivec (rtx x)
+{
+ gcc_assert (MEM_P (x));
+ if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
+ {
+ rtx addr = XEXP (x, 0);
+ int strict_p = (reload_in_progress || reload_completed);
+
+ if (!legitimate_indexed_address_p (addr, strict_p)
+ && !legitimate_indirect_address_p (addr, strict_p))
+ addr = copy_to_mode_reg (Pmode, addr);
+
+ addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+ x = change_address (x, GET_MODE (x), addr);
+ }
+
+ return x;
+}
+
+
#include "gt-rs6000.h"
===================================================================
@@ -308,6 +308,19 @@ (define_insn "*vsx_movti"
}
[(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+;; Explicit load/store expanders for the builtin functions
+(define_expand "vsx_load_<mode>"
+ [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
+ (match_operand:VSX_M 1 "memory_operand" ""))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "")
+
+(define_expand "vsx_store_<mode>"
+ [(set (match_operand:VEC_M 0 "memory_operand" "")
+ (match_operand:VEC_M 1 "vsx_register_operand" ""))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "")
+
;; VSX scalar and vector floating point arithmetic instructions
(define_insn "*vsx_add<mode>3"
===================================================================
@@ -2368,6 +2368,8 @@ enum rs6000_builtin_type_index
RS6000_BTI_pixel_V8HI, /* __vector __pixel */
RS6000_BTI_long, /* long_integer_type_node */
RS6000_BTI_unsigned_long, /* long_unsigned_type_node */
+ RS6000_BTI_long_long, /* long_long_integer_type_node */
+ RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */
RS6000_BTI_INTQI, /* intQI_type_node */
RS6000_BTI_UINTQI, /* unsigned_intQI_type_node */
RS6000_BTI_INTHI, /* intHI_type_node */
@@ -2411,6 +2413,8 @@ enum rs6000_builtin_type_index
#define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI])
#define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI])
+#define long_long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_long])
+#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long])
#define long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long])
#define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long])
#define intQI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTQI])
===================================================================
@@ -96,7 +96,7 @@ (define_constants
(UNSPEC_STVE 203)
(UNSPEC_SET_VSCR 213)
(UNSPEC_GET_VRSAVE 214)
- ;; 215 deleted
+ (UNSPEC_LVX 215)
(UNSPEC_REDUC_PLUS 217)
(UNSPEC_VECSH 219)
(UNSPEC_EXTEVEN_V4SI 220)
@@ -1750,17 +1750,19 @@ (define_insn "altivec_lvxl"
"lvxl %0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "altivec_lvx"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (match_operand:V4SI 1 "memory_operand" "Z"))]
+(define_insn "altivec_lvx_<mode>"
+ [(parallel
+ [(set (match_operand:VM2 0 "register_operand" "=v")
+ (match_operand:VM2 1 "memory_operand" "Z"))
+ (unspec [(const_int 0)] UNSPEC_LVX)])]
"TARGET_ALTIVEC"
"lvx %0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "altivec_stvx"
+(define_insn "altivec_stvx_<mode>"
[(parallel
- [(set (match_operand:V4SI 0 "memory_operand" "=Z")
- (match_operand:V4SI 1 "register_operand" "v"))
+ [(set (match_operand:VM2 0 "memory_operand" "=Z")
+ (match_operand:VM2 1 "register_operand" "v"))
(unspec [(const_int 0)] UNSPEC_STVX)])]
"TARGET_ALTIVEC"
"stvx %1,%y0"
===================================================================
@@ -318,6 +318,8 @@
#define vec_nearbyint __builtin_vec_nearbyint
#define vec_rint __builtin_vec_rint
#define vec_sqrt __builtin_vec_sqrt
+#define vec_vsx_ld __builtin_vec_vsx_ld
+#define vec_vsx_st __builtin_vec_vsx_st
#endif
/* Predicates.
===================================================================
@@ -547,6 +547,11 @@ search_line_fast (const uchar *s, const
const vc zero = { 0 };
vc data, mask, t;
+ const uchar *unaligned_s = s;
+
+ /* While altivec loads mask addresses, we still need to align S so
+ that the offset we compute at the end is correct. */
+ s = (const uchar *)((uintptr_t)s & -16);
/* Altivec loads automatically mask addresses with -16. This lets us
issue the first load as early as possible. */
@@ -555,15 +560,20 @@ search_line_fast (const uchar *s, const
/* Discard bytes before the beginning of the buffer. Do this by
beginning with all ones and shifting in zeros according to the
mis-alignment. The LVSR instruction pulls the exact shift we
- want from the address. */
- mask = __builtin_vec_lvsr(0, s);
+ want from the address.
+
+ Originally, we used s in the lvsr and did the alignment afterwords, which
+ works on a system that supported just the Altivec instruction set using
+ the LVX instruction. With the introduction of the VSX instruction, for
+ GCC 4.5, the load became LXVW4X. LVX ignores the bottom 3 bits, and
+ LXVW4X does not. While GCC 4.6 will revert vec_ld/vec_st to go back to
+ only produce Altivec instructions, the possibiliy exists that the stage1
+ compiler was built with a compiler that generated LXVW4X. This code will
+ work on either system. */
+ mask = __builtin_vec_lvsr(0, unaligned_s);
mask = __builtin_vec_perm(zero, ones, mask);
data &= mask;
- /* While altivec loads mask addresses, we still need to align S so
- that the offset we compute at the end is correct. */
- s = (const uchar *)((uintptr_t)s & -16);
-
/* Main loop processing 16 bytes at a time. */
goto start;
do