diff mbox

[rs6000] Add support for vec_rlnm and vec_rlmi

Message ID 26e01065-a7b2-9a11-96e9-579ad3e1614b@linux.vnet.ibm.com
State New
Headers show

Commit Message

Bill Schmidt Jan. 16, 2017, 6:12 p.m. UTC
Hi,

ISA 3.0 introduces new instructions vrlwmi, vrldmi, vrlwnm, and vrldnm.
This patch provides access to them via built-ins, including the vec_rlmi
and vec_rlnm built-ins mandated by Appendix A of the ELFv2 ABI document.
I also added a vec_vrlnm built-in, which is a more direct translation of
the vrlwnm and vrldnm instructions that some users might prefer.

This has been bootstrapped and tested on powerpc64le-unknown-linux-gnu
with no regressions.  I am in process of testing them on a big-endian
system as well.  Provided there are no problems there, is this ok for
trunk?

Thanks,
Bill


[gcc]

2017-01-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/altivec.h (vec_rlmi): New #define.
	(vec_vrlnm): Likewise.
	(vec_rlnm): Likewise.
	* config/rs6000/altivec.md (UNSPEC_VRLMI): New UNSPEC enum value.
	(UNSPEC_VRLNM): Likewise.
	(VIlong): New mode iterator.
	(altivec_vrl<VI_char>mi): New define_insn.
	(altivec_vrl<VI_char>nm): Likewise.
	* config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
	function entry.
	(VRLDNM): Likewise.
	(RLNM): New polymorphic function entry.
	(VRLWMI): New monomorphic function entry.
	(VRLDMI): Likewise.
	(RLMI): New polymorphic function entry.
	* config/rs6000/r6000-c.c (altivec_overloaded_builtin_table): Add
	new entries for P9V_BUILTIN_VEC_RLMI and P9V_BUILTIN_VEC_RLNM.
	* doc/extend.texi: Add description of vec_rlmi, vec_rlnm, and
	vec_vrlnm.

[gcc/testsuite]

2017-01-16  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* vec-rlmi-rlnm.c: New file.

Comments

Segher Boessenkool Jan. 16, 2017, 10:24 p.m. UTC | #1
Hi Bill,

A few comments:

On Mon, Jan 16, 2017 at 12:12:22PM -0600, Bill Schmidt wrote:
> 	* config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
> 	function entry.

I had to look up if "monomorphic" is an existing word in this context.
Unfortunately it is, sigh (it clashes hard with all the more common
meanings).

> --- gcc/config/rs6000/altivec.h	(revision 244498)
> +++ gcc/config/rs6000/altivec.h	(working copy)
> @@ -168,6 +168,9 @@
>  #define vec_re __builtin_vec_re
>  #define vec_round __builtin_vec_round
>  #define vec_recipdiv __builtin_vec_recipdiv
> +#define vec_rlmi __builtin_vec_rlmi
> +#define vec_vrlnm __builtin_vec_rlnm
> +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm(a,(b<<8)|c))

This needs parens around the arguments.

> +The result of @code{vec_rlmi} is obtained by rotating each element of
> +the first argument vector left and inserting it under mask into the
> +second argument vector.

Did you swap first and second here?

Okay for trunk with those points addressed.  Thanks!


Segher
Bill Schmidt Jan. 16, 2017, 11:18 p.m. UTC | #2
> On Jan 16, 2017, at 4:24 PM, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> 
> Hi Bill,
> 
> A few comments:
> 
> On Mon, Jan 16, 2017 at 12:12:22PM -0600, Bill Schmidt wrote:
>> 	* config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
>> 	function entry.
> 
> I had to look up if "monomorphic" is an existing word in this context.
> Unfortunately it is, sigh (it clashes hard with all the more common
> meanings).

Eh, "more common" if you are an algebra or biology bigot. :P

> 
>> --- gcc/config/rs6000/altivec.h	(revision 244498)
>> +++ gcc/config/rs6000/altivec.h	(working copy)
>> @@ -168,6 +168,9 @@
>> #define vec_re __builtin_vec_re
>> #define vec_round __builtin_vec_round
>> #define vec_recipdiv __builtin_vec_recipdiv
>> +#define vec_rlmi __builtin_vec_rlmi
>> +#define vec_vrlnm __builtin_vec_rlnm
>> +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm(a,(b<<8)|c))
> 
> This needs parens around the arguments.

Oops, fixing.

> 
>> +The result of @code{vec_rlmi} is obtained by rotating each element of
>> +the first argument vector left and inserting it under mask into the
>> +second argument vector.
> 
> Did you swap first and second here?

No -- this is the way the vec_rlmi interface is defined in the appendix.

Thanks for the review!
Bill

> 
> Okay for trunk with those points addressed.  Thanks!
> 
> 
> Segher
>
diff mbox

Patch

Index: gcc/config/rs6000/altivec.h
===================================================================
--- gcc/config/rs6000/altivec.h	(revision 244498)
+++ gcc/config/rs6000/altivec.h	(working copy)
@@ -168,6 +168,9 @@ 
 #define vec_re __builtin_vec_re
 #define vec_round __builtin_vec_round
 #define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rlmi __builtin_vec_rlmi
+#define vec_vrlnm __builtin_vec_rlnm
+#define vec_rlnm(a,b,c) (__builtin_vec_rlnm(a,(b<<8)|c))
 #define vec_rsqrt __builtin_vec_rsqrt
 #define vec_rsqrte __builtin_vec_rsqrte
 #define vec_vsubfp __builtin_vec_vsubfp
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md	(revision 244498)
+++ gcc/config/rs6000/altivec.md	(working copy)
@@ -156,6 +156,8 @@ 
    UNSPEC_CMPRB
    UNSPEC_CMPRB2
    UNSPEC_CMPEQB
+   UNSPEC_VRLMI
+   UNSPEC_VRLNM
 ])
 
 (define_c_enum "unspecv"
@@ -168,8 +170,10 @@ 
 
 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
-;; Short vec in modes
+;; Short vec int modes
 (define_mode_iterator VIshort [V8HI V16QI])
+;; Longer vec int modes for rotate/mask ops
+(define_mode_iterator VIlong [V2DI V4SI])
 ;; Vec float modes
 (define_mode_iterator VF [V4SF])
 ;; Vec modes, pity mode iterators are not composable
@@ -1627,6 +1631,25 @@ 
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vrl<VI_char>mi"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0")
+	                (match_operand:VIlong 2 "register_operand" "v")
+		        (match_operand:VIlong 3 "register_operand" "v")]
+		       UNSPEC_VRLMI))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>mi %0,%2,%3"
+  [(set_attr "type" "veclogical")])
+
+(define_insn "altivec_vrl<VI_char>nm"
+  [(set (match_operand:VIlong 0 "register_operand" "=v")
+        (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
+		        (match_operand:VIlong 2 "register_operand" "v")]
+		       UNSPEC_VRLNM))]
+  "TARGET_P9_VECTOR"
+  "vrl<VI_char>nm %0,%1,%2"
+  [(set_attr "type" "veclogical")])
+
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def	(revision 244498)
+++ gcc/config/rs6000/rs6000-builtin.def	(working copy)
@@ -1918,6 +1918,8 @@  BU_P9V_OVERLOAD_2 (VSRV,	"vsrv")
 BU_P9V_AV_2 (VADUB,		"vadub",		CONST,  vaduv16qi3)
 BU_P9V_AV_2 (VADUH,		"vaduh",		CONST,  vaduv8hi3)
 BU_P9V_AV_2 (VADUW,		"vaduw",		CONST,  vaduv4si3)
+BU_P9V_AV_2 (VRLWNM,		"vrlwnm",		CONST,	altivec_vrlwnm)
+BU_P9V_AV_2 (VRLDNM,		"vrldnm",		CONST,	altivec_vrldnm)
 
 /* ISA 3.0 vector overloaded 2 argument functions. */
 BU_P9V_OVERLOAD_2 (VADU,	"vadu")
@@ -1924,7 +1926,15 @@  BU_P9V_OVERLOAD_2 (VADU,	"vadu")
 BU_P9V_OVERLOAD_2 (VADUB,	"vadub")
 BU_P9V_OVERLOAD_2 (VADUH,	"vaduh")
 BU_P9V_OVERLOAD_2 (VADUW,	"vaduw")
+BU_P9V_OVERLOAD_2 (RLNM,	"rlnm")
 
+/* ISA 3.0 3-argument vector functions.  */
+BU_P9V_AV_3 (VRLWMI,		"vrlwmi",		CONST,	altivec_vrlwmi)
+BU_P9V_AV_3 (VRLDMI,		"vrldmi",		CONST,	altivec_vrldmi)
+
+/* ISA 3.0 vector overloaded 3-argument functions.  */
+BU_P9V_OVERLOAD_3 (RLMI,	"rlmi")
+
 /* 1 argument vsx scalar functions added in ISA 3.0 (power9).  */
 BU_P9V_64BIT_VSX_1 (VSEEDP,	"scalar_extract_exp",	CONST,	xsxexpdp)
 BU_P9V_64BIT_VSX_1 (VSESDP,	"scalar_extract_sig",	CONST,	xsxsigdp)
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c	(revision 244498)
+++ gcc/config/rs6000/rs6000-c.c	(working copy)
@@ -2202,6 +2202,18 @@  const struct altivec_builtin_types altivec_overloa
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLWMI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 244498)
+++ gcc/doc/extend.texi	(working copy)
@@ -18179,6 +18179,43 @@  If any of the enabled test conditions is true, the
 in the result vector is -1.  Otherwise (all of the enabled test
 conditions are false), the corresponding entry of the result vector is 0.
 
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}):
+@smallexample
+vector unsigned int vec_rlmi (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlmi (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_rlnm (vector unsigned int, vector unsigned int,
+                              vector unsigned int);
+vector unsigned long long vec_rlnm (vector unsigned long long,
+                                    vector unsigned long long,
+                                    vector unsigned long long);
+vector unsigned int vec_vrlnm (vector unsigned int, vector unsigned int);
+vector unsigned long long vec_vrlnm (vector unsigned long long,
+                                     vector unsigned long long);
+@end smallexample
+
+The result of @code{vec_rlmi} is obtained by rotating each element of
+the first argument vector left and inserting it under mask into the
+second argument vector.  The third argument vector contains the mask
+beginning in bits 11:15, the mask end in bits 19:23, and the shift
+count in bits 27:31, of each element.
+
+The result of @code{vec_rlnm} is obtained by rotating each element of
+the first argument vector left and ANDing it with a mask specified by
+the second and third argument vectors.  The second argument vector
+contains the shift count for each element in the low-order byte.  The
+third argument vector contains the mask end for each element in the
+low-order byte, with the mask begin in the next higher byte.
+
+The result of @code{vec_vrlnm} is obtained by rotating each element
+of the first argument vector left and ANDing it with a mask.  The
+second argument vector contains the mask  beginning in bits 11:15,
+the mask end in bits 19:23, and the shift count in bits 27:31,
+of each element.
+
 If the cryptographic instructions are enabled (@option{-mcrypto} or
 @option{-mcpu=power8}), the following builtins are enabled.
 
Index: gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c	(working copy)
@@ -0,0 +1,69 @@ 
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector unsigned int
+rlmi_test_1 (vector unsigned int x, vector unsigned int y,
+	     vector unsigned int z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned long long
+rlmi_test_2 (vector unsigned long long x, vector unsigned long long y,
+	     vector unsigned long long z)
+{
+  return vec_rlmi (x, y, z);
+}
+
+vector unsigned int
+vrlnm_test_1 (vector unsigned int x, vector unsigned int y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned long long
+vrlnm_test_2 (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_vrlnm (x, y);
+}
+
+vector unsigned int
+rlnm_test_1 (vector unsigned int x, vector unsigned int y,
+	     vector unsigned int z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+vector unsigned long long
+rlnm_test_2 (vector unsigned long long x, vector unsigned long long y,
+	     vector unsigned long long z)
+{
+  return vec_rlnm (x, y, z);
+}
+
+/* Expected code generation for rlmi_test_1 is vrlwmi.
+   Expected code generation for rlmi_test_2 is vrldmi.
+   Expected code generation for vrlnm_test_1 is vrlwnm.
+   Expected code generation for vrlnm_test_2 is vrldnm.
+   Expected code generation for the others is more complex, because
+   the second and third arguments are combined by a shift and OR,
+   and because there is no splat-immediate doubleword.
+    - For rlnm_test_1: vspltisw, vslw, xxlor, vrlwnm.
+    - For rlnm_test_2: xxspltib, vextsb2d, vsld, xxlor, vrldnm.
+   There is a choice of splat instructions in both cases, so we
+   just check for "splt".  */
+
+/* { dg-final { scan-assembler-times "vrlwmi" 1 } } */
+/* { dg-final { scan-assembler-times "vrldmi" 1 } } */
+/* { dg-final { scan-assembler-times "splt" 2 } } */
+/* { dg-final { scan-assembler-times "vextsb2d" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
+/* { dg-final { scan-assembler-times "vsld" 1 } } */
+/* { dg-final { scan-assembler-times "xxlor" 2 } } */
+/* { dg-final { scan-assembler-times "vrlwnm" 2 } } */
+/* { dg-final { scan-assembler-times "vrldnm" 2 } } */