diff mbox series

[09/18] rs6000: Builtin expansion, part 4

Message ID 7c71697caa84bc60b5342711ac2ef29042b8835a.1630511335.git.wschmidt@linux.ibm.com
State New
Headers show
Series Replace the Power target-specific builtin machinery | expand

Commit Message

Bill Schmidt Sept. 1, 2021, 4:13 p.m. UTC
Consolidate into elemrev_icode some logic that is scattered throughout
the old altivec_expand_builtin.  Also replace functions for handling
special load and store built-ins:
= ldv_expand_builtin replaces altivec_expand_lv_builtin
= lxvrse_expand_builtin and lxvrze_expand_builtin replace
  altivec_expand_lxvr_builtin
= stv_expand builtin replaces altivec_expand_stv_builtin

In all cases, there are no logic changes except that some code was
already factored out into rs6000_expand_new_builtin.

2021-09-01  Bill Schmidt  <wschmidt@linux.ibm.com>

gcc/
	* config/rs6000/rs6000-call.c (elemrev_icode): Implement.
	(ldv_expand_builtin): Likewise.
	(lxvrse_expand_builtin): Likewise.
	(lxvrze_expand_builtin): Likewise.
	(stv_expand_builtin): Likewise.
---
 gcc/config/rs6000/rs6000-call.c | 245 ++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)

Comments

Segher Boessenkool Nov. 3, 2021, 1:52 a.m. UTC | #1
Hi!

On Wed, Sep 01, 2021 at 11:13:45AM -0500, Bill Schmidt wrote:
>  static insn_code
>  elemrev_icode (rs6000_gen_builtins fcode)
>  {
> +  switch (fcode)
> +    {
> +    default:
> +      gcc_unreachable ();

default: goes at the end.

> +    case RS6000_BIF_ST_ELEMREV_V1TI:
> +      return BYTES_BIG_ENDIAN
> +	? CODE_FOR_vsx_store_v1ti
> +	: CODE_FOR_vsx_st_elemrev_v1ti;

That fits on one or two lines.  Many more like that, I won't point them
all out.

Alternatively, maybe nicer, put an "if (BYTES_BIG_ENDIAN)" before the
switch, and duplicate the switch.  It probably is more readable that
way, easier to spot if you missed or typoedsomething.

> +    }
> +  gcc_unreachable ();

So the default: has no use at all!  We do the same after the switch
already.

>    return (insn_code) 0;

And neither does this line.  Leave it out, the gcc_unreachable tells GCC
that this path does not have to return a value (because this path can
never be taken).  Put a blank line before the gcc_unreachable though,
nice and dramatic right before the function-closing curly ;-)

>  static rtx
>  ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode)
>  {
> +  rtx pat, addr;
> +  bool blk = (icode == CODE_FOR_altivec_lvlx
> +	      || icode == CODE_FOR_altivec_lvlxl
> +	      || icode == CODE_FOR_altivec_lvrx
> +	      || icode == CODE_FOR_altivec_lvrxl);

"blk" is used 32 lines later.  Maybe a better name would help?  Maybe
you can declare (and init) the variable later?  Maybe a tiny comment
will help?

Apparently it means to use BLKmode instead of tmode, but why is that?

> +  if (target == 0
> +      || GET_MODE (target) != tmode
> +      || !insn_data[icode].operand[0].predicate (target, tmode))
> +    target = gen_reg_rtx (tmode);

And here it uses tmode anyway.  Hrm.

> +  /* For LVX, express the RTL accurately by ANDing the address with -16.
> +     LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
> +     so the raw address is fine.  */

In the case of lvxl the unspec is not around the memory address, so this
is not true.

Oh, for lve* the same:

(define_insn "altivec_lve<VI_char>x"
  [(parallel
    [(set (match_operand:VI 0 "register_operand" "=v")
          (match_operand:VI 1 "memory_operand" "Z"))
     (unspec [(const_int 0)] UNSPEC_LVE)])]
  "TARGET_ALTIVEC"
  "lve<VI_char>x %0,%y1"
  [(set_attr "type" "vecload")])

The "set" is just plain.  It is paralleled with an unspec so it is not
the same RTL as some other load, but it is perfectly visible to all RTL
optimisers.

There needs to be an unspec in the "set" itself, like already done for
stve*: lve* leaves most of the vector undefined, but the RTL does not
express that currently, and then things can go wrong.  I cannot think of
an example where it *will* go wrong, but that does not say much :-)

So, all VMX-style loads and stores need the &-16 .

We survived this for ages, and it is not like lve* is such a hotly used
builtin these days, so we'll survive things, but: put it on a to-do
list somewhere?  :-)

> +  /* Emit the lxvr*x insn.  */
> +  pat = GEN_FCN (icode) (tiscratch, addr);

(declare it here, "rtx pat", not much earlier)

Okay for trunk with whatever tidyings you feel you can make now, and
leave the rest for a later day.  Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 5032e947a8e..33153a5657c 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14754,12 +14754,142 @@  new_cpu_expand_builtin (enum rs6000_gen_builtins fcode,
 static insn_code
 elemrev_icode (rs6000_gen_builtins fcode)
 {
+  switch (fcode)
+    {
+    default:
+      gcc_unreachable ();
+
+    case RS6000_BIF_ST_ELEMREV_V1TI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v1ti
+	: CODE_FOR_vsx_st_elemrev_v1ti;
+
+    case RS6000_BIF_ST_ELEMREV_V2DF:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v2df
+	: CODE_FOR_vsx_st_elemrev_v2df;
+
+    case RS6000_BIF_ST_ELEMREV_V2DI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v2di
+	: CODE_FOR_vsx_st_elemrev_v2di;
+
+    case RS6000_BIF_ST_ELEMREV_V4SF:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v4sf
+	: CODE_FOR_vsx_st_elemrev_v4sf;
+
+    case RS6000_BIF_ST_ELEMREV_V4SI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v4si
+	: CODE_FOR_vsx_st_elemrev_v4si;
+
+    case RS6000_BIF_ST_ELEMREV_V8HI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v8hi
+	: CODE_FOR_vsx_st_elemrev_v8hi;
+
+    case RS6000_BIF_ST_ELEMREV_V16QI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_store_v16qi
+	: CODE_FOR_vsx_st_elemrev_v16qi;
+
+    case RS6000_BIF_LD_ELEMREV_V2DF:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v2df
+	: CODE_FOR_vsx_ld_elemrev_v2df;
+
+    case RS6000_BIF_LD_ELEMREV_V1TI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v1ti
+	: CODE_FOR_vsx_ld_elemrev_v1ti;
+
+    case RS6000_BIF_LD_ELEMREV_V2DI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v2di
+	: CODE_FOR_vsx_ld_elemrev_v2di;
+
+    case RS6000_BIF_LD_ELEMREV_V4SF:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v4sf
+	: CODE_FOR_vsx_ld_elemrev_v4sf;
+
+    case RS6000_BIF_LD_ELEMREV_V4SI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v4si
+	: CODE_FOR_vsx_ld_elemrev_v4si;
+
+    case RS6000_BIF_LD_ELEMREV_V8HI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v8hi
+	: CODE_FOR_vsx_ld_elemrev_v8hi;
+
+    case RS6000_BIF_LD_ELEMREV_V16QI:
+      return BYTES_BIG_ENDIAN
+	? CODE_FOR_vsx_load_v16qi
+	: CODE_FOR_vsx_ld_elemrev_v16qi;
+    }
+  gcc_unreachable ();
   return (insn_code) 0;
 }
 
 static rtx
 ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode)
 {
+  rtx pat, addr;
+  bool blk = (icode == CODE_FOR_altivec_lvlx
+	      || icode == CODE_FOR_altivec_lvlxl
+	      || icode == CODE_FOR_altivec_lvrx
+	      || icode == CODE_FOR_altivec_lvrxl);
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op[1] = copy_to_mode_reg (Pmode, op[1]);
+
+  /* For LVX, express the RTL accurately by ANDing the address with -16.
+     LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
+     so the raw address is fine.  */
+  if (icode == CODE_FOR_altivec_lvx_v1ti
+      || icode == CODE_FOR_altivec_lvx_v2df
+      || icode == CODE_FOR_altivec_lvx_v2di
+      || icode == CODE_FOR_altivec_lvx_v4sf
+      || icode == CODE_FOR_altivec_lvx_v4si
+      || icode == CODE_FOR_altivec_lvx_v8hi
+      || icode == CODE_FOR_altivec_lvx_v16qi)
+    {
+      rtx rawaddr;
+      if (op[0] == const0_rtx)
+	rawaddr = op[1];
+      else
+	{
+	  op[0] = copy_to_mode_reg (Pmode, op[0]);
+	  rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]);
+	}
+      addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
+
+      emit_insn (gen_rtx_SET (target, addr));
+    }
+  else
+    {
+      if (op[0] == const0_rtx)
+	addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]);
+      else
+	{
+	  op[0] = copy_to_mode_reg (Pmode, op[0]);
+	  addr = gen_rtx_MEM (blk ? BLKmode : tmode,
+			      gen_rtx_PLUS (Pmode, op[1], op[0]));
+	}
+
+      pat = GEN_FCN (icode) (target, addr);
+      if (!pat)
+	return 0;
+      emit_insn (pat);
+    }
+
   return target;
 }
 
@@ -14767,6 +14897,42 @@  static rtx
 lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op,
 		       machine_mode tmode, machine_mode smode)
 {
+  rtx pat, addr;
+  op[1] = copy_to_mode_reg (Pmode, op[1]);
+
+  if (op[0] == const0_rtx)
+    addr = gen_rtx_MEM (tmode, op[1]);
+  else
+    {
+      op[0] = copy_to_mode_reg (Pmode, op[0]);
+      addr = gen_rtx_MEM (smode,
+			  gen_rtx_PLUS (Pmode, op[1], op[0]));
+    }
+
+  rtx discratch = gen_reg_rtx (DImode);
+  rtx tiscratch = gen_reg_rtx (TImode);
+
+  /* Emit the lxvr*x insn.  */
+  pat = GEN_FCN (icode) (tiscratch, addr);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  /* Emit a sign extension from QI,HI,WI to double (DI).  */
+  rtx scratch = gen_lowpart (smode, tiscratch);
+  if (icode == CODE_FOR_vsx_lxvrbx)
+    emit_insn (gen_extendqidi2 (discratch, scratch));
+  else if (icode == CODE_FOR_vsx_lxvrhx)
+    emit_insn (gen_extendhidi2 (discratch, scratch));
+  else if (icode == CODE_FOR_vsx_lxvrwx)
+    emit_insn (gen_extendsidi2 (discratch, scratch));
+  /*  Assign discratch directly if scratch is already DI.  */
+  if (icode == CODE_FOR_vsx_lxvrdx)
+    discratch = scratch;
+
+  /* Emit the sign extension from DI (double) to TI (quad).  */
+  emit_insn (gen_extendditi2 (target, discratch));
+
   return target;
 }
 
@@ -14774,6 +14940,22 @@  static rtx
 lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op,
 		       machine_mode tmode, machine_mode smode)
 {
+  rtx pat, addr;
+  op[1] = copy_to_mode_reg (Pmode, op[1]);
+
+  if (op[0] == const0_rtx)
+    addr = gen_rtx_MEM (tmode, op[1]);
+  else
+    {
+      op[0] = copy_to_mode_reg (Pmode, op[0]);
+      addr = gen_rtx_MEM (smode,
+			  gen_rtx_PLUS (Pmode, op[1], op[0]));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
   return target;
 }
 
@@ -14781,6 +14963,69 @@  static rtx
 stv_expand_builtin (insn_code icode, rtx *op,
 		    machine_mode tmode, machine_mode smode)
 {
+  rtx pat, addr, rawaddr, truncrtx;
+  op[2] = copy_to_mode_reg (Pmode, op[2]);
+
+  /* For STVX, express the RTL accurately by ANDing the address with -16.
+     STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
+     so the raw address is fine.  */
+  if (icode == CODE_FOR_altivec_stvx_v2df
+      || icode == CODE_FOR_altivec_stvx_v2di
+      || icode == CODE_FOR_altivec_stvx_v4sf
+      || icode == CODE_FOR_altivec_stvx_v4si
+      || icode == CODE_FOR_altivec_stvx_v8hi
+      || icode == CODE_FOR_altivec_stvx_v16qi)
+    {
+      if (op[1] == const0_rtx)
+	rawaddr = op[2];
+      else
+	{
+	  op[1] = copy_to_mode_reg (Pmode, op[1]);
+	  rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]);
+	}
+
+      addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
+      addr = gen_rtx_MEM (tmode, addr);
+      op[0] = copy_to_mode_reg (tmode, op[0]);
+      emit_insn (gen_rtx_SET (addr, op[0]));
+    }
+  else if (icode == CODE_FOR_vsx_stxvrbx
+	   || icode == CODE_FOR_vsx_stxvrhx
+	   || icode == CODE_FOR_vsx_stxvrwx
+	   || icode == CODE_FOR_vsx_stxvrdx)
+    {
+      truncrtx = gen_rtx_TRUNCATE (tmode, op[0]);
+      op[0] = copy_to_mode_reg (E_TImode, truncrtx);
+
+      if (op[1] == const0_rtx)
+	addr = gen_rtx_MEM (Pmode, op[2]);
+      else
+	{
+	  op[1] = copy_to_mode_reg (Pmode, op[1]);
+	  addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
+	}
+      pat = GEN_FCN (icode) (addr, op[0]);
+      if (pat)
+	emit_insn (pat);
+    }
+  else
+    {
+      if (!insn_data[icode].operand[1].predicate (op[0], smode))
+	op[0] = copy_to_mode_reg (smode, op[0]);
+
+      if (op[1] == const0_rtx)
+	addr = gen_rtx_MEM (tmode, op[2]);
+      else
+	{
+	  op[1] = copy_to_mode_reg (Pmode, op[1]);
+	  addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1]));
+	}
+
+      pat = GEN_FCN (icode) (addr, op[0]);
+      if (pat)
+	emit_insn (pat);
+    }
+
   return NULL_RTX;
 }