===================================================================
@@ -887,7 +887,11 @@ _mm_cvtpd_epi32 (__m128d __A)
: );
#ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
+#else
+ temp = vec_mergee (temp, temp);
+#endif
result = (__v4si) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else
@@ -922,7 +926,11 @@ _mm_cvtpd_ps (__m128d __A)
: );
#ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
+#else
+ temp = vec_mergee (temp, temp);
+#endif
result = (__v4sf) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else
@@ -951,7 +959,11 @@ _mm_cvttpd_epi32 (__m128d __A)
: );
#ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
temp = vec_mergeo (temp, temp);
+#else
+ temp = vec_mergee (temp, temp);
+#endif
result = (__v4si) vec_vpkudum ((__vector long long) temp,
(__vector long long) vzero);
#else
===================================================================
@@ -905,7 +905,7 @@ _mm_cvtss_f32 (__m128 __A)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si32 (__m128 __A)
{
- __m64 res = 0;
+ int res;
#ifdef _ARCH_PWR8
double dtmp;
__asm__(
@@ -938,8 +938,8 @@ _mm_cvt_ss2si (__m128 __A)
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtss_si64 (__m128 __A)
{
- __m64 res = 0;
-#ifdef _ARCH_PWR8
+ long long res;
+#if defined (_ARCH_PWR8) && defined (__powerpc64__)
double dtmp;
__asm__(
#ifdef __LITTLE_ENDIAN__
@@ -1577,6 +1577,7 @@ _m_pminub (__m64 __A, __m64 __B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
+#ifdef __powerpc64__
unsigned long long p =
#ifdef __LITTLE_ENDIAN__
0x0008101820283038UL; // permute control for sign bits
@@ -1584,6 +1585,18 @@ _mm_movemask_pi8 (__m64 __A)
0x3830282018100800UL; // permute control for sign bits
#endif
return __builtin_bpermd (p, __A);
+#else
+ vector unsigned char A = (vector unsigned char)
+ (vector unsigned long long) { 0, __A };
+ vector unsigned char mask = {
+ 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00,
+ 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40
+ };
+ vector unsigned long long r = (vector unsigned long long)
+ vec_bperm (A, mask);
+ return r[0];
+#endif
+
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))