diff mbox

[0/6] More vector permutation work

Message ID 4EA82EF7.50903@redhat.com
State New
Headers show

Commit Message

Richard Henderson Oct. 26, 2011, 4:01 p.m. UTC
On 10/26/2011 07:30 AM, Ulrich Weigand wrote:
> This fails since for u == 4 and mode == V4SFmode it attempts to expand
> a V4SFmode shift, which is unsupported.
> 
> Shouldn't this be using the mode of the selector rather than the mode
> of the result in any case?

Yes, it should use the mode of the selector.  And doing just that is
enough to fix the bug.  But I noticed that the actual results for a
constant permutation were much nastier than they ought to be.

Try this.

Iain, this might solve your case too; no I can't test myself off cross,
because libgfortran needs the whole cross-env -- system headers and everything.


r~

Comments

Iain Sandoe Oct. 26, 2011, 7:53 p.m. UTC | #1
On 26 Oct 2011, at 17:01, Richard Henderson wrote:

> On 10/26/2011 07:30 AM, Ulrich Weigand wrote:
>> This fails since for u == 4 and mode == V4SFmode it attempts to  
>> expand
>> a V4SFmode shift, which is unsupported.
>>
>> Shouldn't this be using the mode of the selector rather than the mode
>> of the result in any case?
>
> Yes, it should use the mode of the selector.  And doing just that is
> enough to fix the bug.  But I noticed that the actual results for a
> constant permutation were much nastier than they ought to be.
>
> Try this.
>
> Iain, this might solve your case too;

yes - bootstrap incl fortran just finished (not the quickest machine ;))

> no I can't test myself off cross,
> because libgfortran needs the whole cross-env -- system headers and  
> everything.

Ah yes .. as does Ada .. and ObjC.

I recently (when trying to figure out what was broken w.ppc Ada) did  
some Darwin crosses from Linux, not too too painful to set up (but  
then I have the stuff to hand),  but in essence prob. no worse than d/ 
l a bunch of pkg or whatever to set up a foreign linux cross-root.

cheers
Iain
diff mbox

Patch

diff --git a/gcc/optabs.c b/gcc/optabs.c
index 9afc911..736d826 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6912,7 +6912,7 @@  expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   enum insn_code icode;
   enum machine_mode qimode;
   unsigned int i, w, e, u;
-  rtx tmp, sel_qi;
+  rtx tmp, sel_qi = NULL;
   rtvec vec;
 
   if (!target || GET_MODE (target) != mode)
@@ -6946,23 +6946,23 @@  expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
       /* Fall back to a constant byte-based permutation.  */
       if (qimode != VOIDmode)
 	{
-	  icode = direct_optab_handler (vec_perm_const_optab, qimode);
-	  if (icode != CODE_FOR_nothing)
+	  vec = rtvec_alloc (w);
+	  for (i = 0; i < e; ++i)
 	    {
-	      vec = rtvec_alloc (w);
-	      for (i = 0; i < e; ++i)
-		{
-		  unsigned int j, this_e;
+	      unsigned int j, this_e;
 
-		  this_e = INTVAL (XVECEXP (sel, 0, i));
-		  this_e &= 2 * e - 1;
-		  this_e *= u;
+	      this_e = INTVAL (XVECEXP (sel, 0, i));
+	      this_e &= 2 * e - 1;
+	      this_e *= u;
 
-		  for (j = 0; j < u; ++j)
-		    RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
-		}
-	      sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
+	      for (j = 0; j < u; ++j)
+		RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
+	    }
+	  sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
 
+	  icode = direct_optab_handler (vec_perm_const_optab, qimode);
+	  if (icode != CODE_FOR_nothing)
+	    {
 	      tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
 				       gen_lowpart (qimode, v0),
 				       gen_lowpart (qimode, v1), sel_qi);
@@ -6989,47 +6989,53 @@  expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   if (icode == CODE_FOR_nothing)
     return NULL_RTX;
 
-  /* Multiply each element by its byte size.  */
-  if (u == 2)
-    sel = expand_simple_binop (mode, PLUS, sel, sel, sel, 0, OPTAB_DIRECT);
-  else
-    sel = expand_simple_binop (mode, ASHIFT, sel, GEN_INT (exact_log2 (u)),
-			       sel, 0, OPTAB_DIRECT);
-  gcc_assert (sel != NULL);
-
-  /* Broadcast the low byte each element into each of its bytes.  */
-  vec = rtvec_alloc (w);
-  for (i = 0; i < w; ++i)
-    {
-      int this_e = i / u * u;
-      if (BYTES_BIG_ENDIAN)
-	this_e += u - 1;
-      RTVEC_ELT (vec, i) = GEN_INT (this_e);
-    }
-  tmp = gen_rtx_CONST_VECTOR (qimode, vec);
-  sel = gen_lowpart (qimode, sel);
-  sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
-  gcc_assert (sel != NULL);
-
-  /* Add the byte offset to each byte element.  */
-  /* Note that the definition of the indicies here is memory ordering,
-     so there should be no difference between big and little endian.  */
-  vec = rtvec_alloc (w);
-  for (i = 0; i < w; ++i)
-    RTVEC_ELT (vec, i) = GEN_INT (i % u);
-  tmp = gen_rtx_CONST_VECTOR (qimode, vec);
-  sel = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT);
-  gcc_assert (sel != NULL);
+  if (sel_qi == NULL)
+    {
+      /* Multiply each element by its byte size.  */
+      enum machine_mode selmode = GET_MODE (sel);
+      if (u == 2)
+	sel = expand_simple_binop (selmode, PLUS, sel, sel,
+				   sel, 0, OPTAB_DIRECT);
+      else
+	sel = expand_simple_binop (selmode, ASHIFT, sel,
+				   GEN_INT (exact_log2 (u)),
+				   sel, 0, OPTAB_DIRECT);
+      gcc_assert (sel != NULL);
+
+      /* Broadcast the low byte each element into each of its bytes.  */
+      vec = rtvec_alloc (w);
+      for (i = 0; i < w; ++i)
+	{
+	  int this_e = i / u * u;
+	  if (BYTES_BIG_ENDIAN)
+	    this_e += u - 1;
+	  RTVEC_ELT (vec, i) = GEN_INT (this_e);
+	}
+      tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+      sel = gen_lowpart (qimode, sel);
+      sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
+      gcc_assert (sel != NULL);
+
+      /* Add the byte offset to each byte element.  */
+      /* Note that the definition of the indicies here is memory ordering,
+	 so there should be no difference between big and little endian.  */
+      vec = rtvec_alloc (w);
+      for (i = 0; i < w; ++i)
+	RTVEC_ELT (vec, i) = GEN_INT (i % u);
+      tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+      sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
+				    sel, 0, OPTAB_DIRECT);
+      gcc_assert (sel_qi != NULL);
+    }
 
   tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
 			   gen_lowpart (qimode, v0),
-			   gen_lowpart (qimode, v1), sel);
+			   gen_lowpart (qimode, v1), sel_qi);
   if (tmp)
     tmp = gen_lowpart (mode, tmp);
   return tmp;
 }
 
-
 /* Return insn code for a conditional operator with a comparison in
    mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE.  */