diff mbox

[powerpc] PR target/58673, Fix power8 quad memory/no-vsx-timode interaction

Message ID 20131011182537.GA6220@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner Oct. 11, 2013, 6:25 p.m. UTC
When I patched the compiler for PR 58587 to not set -mvsx-timode automatically
for ISA 2.06 sytems (power7/power8), there was an unexpected problem between
ISA 2.07 quad memory support and not allowing TImode variables in VSX
registers.

The movti insn in the -mno-vsx-timode case, did not have quad memory support
enabled, and issued a split for quad memory load/stores, while the insn
splitter said the insn could be done by a quad memory load/store, and did not
split the insn.

This patch allows TImode values to have register + offset form in the
-mno-vsx-timode (-mvsx-timode currently limits TImode addresses to be a single
register, so that it can be used with both quad memory and VSX load -- this is
one more thing that needs to be addressed with secondary reload support), and
it fixes the non VSX-timode insn to add quad memory support.

I have done a bootstrap and run make check with no regressions.  I have built
all of Spec 2006 which showed the initial problem and it all built
successfully.  The only code changes that are due to these patches are due to
load and store quad allowing reg+offset addressing on 64-bit power8.  Are these
patches ok to install?

[gcc]
2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
	restrict TImode addresses to single indirect registers if both
	-mquad-memory and -mvsx-timode are used.
	(rs6000_output_move_128bit): Use quad_load_store_p to determine if
	we should emit load/store quad.  Remove using %y for quad memory
	addresses.

	* config/rs6000/rs6000.md (mov<mode>_ppc64, TI/PTImode): Add
	constraints to allow load/store quad on machines where TImode is
	not allowed in VSX registers.

[gcc/testsuite]
2013-10-11  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/58673
	* gcc.target/powerpc/pr58673-1.c: New file to test whether
	-mquad-word + -mno-vsx-timode causes errors.
	* gcc.target/powerpc/pr58673-2.c: Likewise.
diff mbox

Patch

Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 203389)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -7182,12 +7182,12 @@  rs6000_legitimate_address_p (enum machin
   if (reg_offset_p
       && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
     return 1;
-  /* For TImode, if we have load/store quad, only allow register indirect
-     addresses.  This will allow the values to go in either GPRs or VSX
-     registers without reloading.  The vector types would tend to go into VSX
-     registers, so we allow REG+REG, while TImode seems somewhat split, in that
-     some uses are GPR based, and some VSX based.  */
-  if (mode == TImode && TARGET_QUAD_MEMORY)
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
     return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
@@ -16060,12 +16060,11 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (dest_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
-	      && quad_memory_operand (src, mode)
-	      && !reg_overlap_mentioned_p (dest, src))
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
 	    {
 	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+	      /* return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; */
+	      return "lq %0,%1";
 	    }
 	  else
 	    return "#";
@@ -16095,11 +16094,11 @@  rs6000_output_move_128bit (rtx operands[
     {
       if (src_gpr_p)
 	{
-	  if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
-	      && quad_memory_operand (dest, mode))
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
 	    {
 	      /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
-	      return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+	      /* return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; */
+      	      return "stq %1,%0";
 	    }
 	  else
 	    return "#";
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 203389)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -10312,15 +10312,15 @@  (define_insn "*mov<mode>_string"
 					  (const_string "conditional")))])
 
 (define_insn "*mov<mode>_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
-	(match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))]
   "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode)))"
 {
   return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "store,load,*,*")
+  [(set_attr "type" "store,store,load,load,*,*")
    (set_attr "length" "8")])
 
 (define_split
Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c	(revision 0)
@@ -0,0 +1,78 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -m64 -O1" } */
+
+enum typecode
+{
+  QIcode, QUcode, HIcode, HUcode, SIcode, SUcode, DIcode, DUcode, SFcode,
+    DFcode, XFcode, Pcode, Tcode, LAST_AND_UNUSED_TYPECODE
+};
+enum bytecode_opcode
+{
+  neverneverland, drop, duplicate, over, setstackSI, adjstackSI, constQI,
+    constHI, constSI, constDI, constSF, constDF, constXF, constP, loadQI,
+    loadHI, loadSI, loadDI, loadSF, loadDF, loadXF, loadP, storeQI, storeHI,
+    storeSI, storeDI, storeSF, storeDF, storeXF, storeP, storeBLK, clearBLK,
+    addconstPSI, newlocalSI, localP, argP, convertQIHI, convertHISI,
+    convertSIDI, convertQISI, convertQUHU, convertHUSU, convertSUDU,
+    convertQUSU, convertSFDF, convertDFXF, convertHIQI, convertSIHI,
+    convertDISI, convertSIQI, convertSUQU, convertDFSF, convertXFDF,
+    convertSISF, convertSIDF, convertSIXF, convertSUSF, convertSUDF,
+    convertSUXF, convertDISF, convertDIDF, convertDIXF, convertDUSF,
+    convertDUDF, convertDUXF, convertSFSI, convertDFSI, convertXFSI,
+    convertSFSU, convertDFSU, convertXFSU, convertSFDI, convertDFDI,
+    convertXFDI, convertSFDU, convertDFDU, convertXFDU, convertPSI,
+    convertSIP, convertSIT, convertDIT, convertSFT, convertDFT, convertXFT,
+    convertPT, zxloadBI, sxloadBI, sstoreBI, addSI, addDI, addSF, addDF,
+    addXF, addPSI, subSI, subDI, subSF, subDF, subXF, subPP, mulSI, mulDI,
+    mulSU, mulDU, mulSF, mulDF, mulXF, divSI, divDI, divSU, divDU, divSF,
+    divDF, divXF, modSI, modDI, modSU, modDU, andSI, andDI, iorSI, iorDI,
+    xorSI, xorDI, lshiftSI, lshiftSU, lshiftDI, lshiftDU, rshiftSI, rshiftSU,
+    rshiftDI, rshiftDU, ltSI, ltSU, ltDI, ltDU, ltSF, ltDF, ltXF, ltP, leSI,
+    leSU, leDI, leDU, leSF, leDF, leXF, leP, geSI, geSU, geDI, geDU, geSF,
+    geDF, geXF, geP, gtSI, gtSU, gtDI, gtDU, gtSF, gtDF, gtXF, gtP, eqSI,
+    eqDI, eqSF, eqDF, eqXF, eqP, neSI, neDI, neSF, neDF, neXF, neP, negSI,
+    negDI, negSF, negDF, negXF, notSI, notDI, notT, predecQI, predecHI,
+    predecSI, predecDI, predecP, predecSF, predecDF, predecXF, predecBI,
+    preincQI, preincHI, preincSI, preincDI, preincP, preincSF, preincDF,
+    preincXF, preincBI, postdecQI, postdecHI, postdecSI, postdecDI, postdecP,
+    postdecSF, postdecDF, postdecXF, postdecBI, postincQI, postincHI,
+    postincSI, postincDI, postincP, postincSF, postincDF, postincXF,
+    postincBI, xjumpif, xjumpifnot, jump, jumpP, caseSI, caseSU, caseDI,
+    caseDU, call, returnP, ret, linenote, LAST_AND_UNUSED_OPCODE
+};
+struct binary_operator
+{
+  enum bytecode_opcode opcode;
+  enum typecode arg0;
+};
+static struct conversion_recipe
+{
+  unsigned char *opcodes;
+  int cost;
+}
+conversion_recipe[((int) LAST_AND_UNUSED_TYPECODE)][((int)
+						     LAST_AND_UNUSED_TYPECODE)];
+static struct conversion_recipe
+deduce_conversion (from, to)
+     enum typecode from, to;
+{
+  (conversion_recipe[(int) from][(int) to].
+   opcodes ? 0 : (conversion_recipe[(int) from][(int) to] =
+		  deduce_conversion (from, to), 0));
+}
+
+void
+bc_expand_binary_operation (optab, resulttype, arg0, arg1)
+     struct binary_operator optab[];
+{
+  int i, besti, cost, bestcost;
+  enum typecode resultcode, arg0code;
+  for (i = 0; optab[i].opcode != -1; ++i)
+    {
+      (conversion_recipe[(int) arg0code][(int) optab[i].arg0].
+       opcodes ? 0 : (conversion_recipe[(int) arg0code][(int) optab[i].arg0] =
+		      deduce_conversion (arg0code, optab[i].arg0), 0));
+    }
+}
Index: gcc/testsuite/gcc.target/powerpc/pr58673-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-2.c	(revision 0)
@@ -0,0 +1,217 @@ 
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O3 -m64 -funroll-loops" } */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+typedef long unsigned int size_t;
+typedef struct _IO_FILE FILE;
+typedef float real;
+typedef real rvec[3];
+typedef real matrix[3][3];
+typedef real tensor[3][3];
+enum
+{
+  F_BONDS, F_G96BONDS, F_MORSE, F_CUBICBONDS, F_CONNBONDS, F_HARMONIC,
+    F_ANGLES, F_G96ANGLES, F_PDIHS, F_RBDIHS, F_IDIHS, F_LJ14, F_COUL14, F_LJ,
+    F_BHAM, F_LJLR, F_DISPCORR, F_SR, F_LR, F_WPOL, F_POSRES, F_DISRES,
+    F_DISRESVIOL, F_ORIRES, F_ORIRESDEV, F_ANGRES, F_ANGRESZ, F_SHAKE,
+    F_SHAKENC, F_SETTLE, F_DUMMY2, F_DUMMY3, F_DUMMY3FD, F_DUMMY3FAD,
+    F_DUMMY3OUT, F_DUMMY4FD, F_EQM, F_EPOT, F_EKIN, F_ETOT, F_TEMP, F_PRES,
+    F_DVDL, F_DVDLKIN, F_NRE
+};
+typedef union
+{
+  struct
+  {
+  }
+  bham;
+  struct
+  {
+    real rA, krA, rB, krB;
+  }
+  harmonic;
+}
+t_iparams;
+typedef struct
+{
+  t_iparams *iparams;
+}
+t_idef;
+typedef struct
+{
+}
+t_inputrec;
+typedef struct
+{
+}
+t_commrec;
+typedef struct
+{
+}
+t_forcerec;
+typedef struct
+{
+}
+t_mdatoms;
+typedef struct
+{
+}
+t_filenm;
+enum
+{
+  eoPres, eoEpot, eoVir, eoDist, eoMu, eoForce, eoFx, eoFy, eoFz, eoPx, eoPy,
+    eoPz, eoPolarizability, eoDipole, eoObsNR, eoMemory =
+    eoObsNR, eoInter, eoUseVirial, eoNR
+};
+extern char *eoNames[eoNR];
+typedef struct
+{
+  int bPrint;
+}
+t_coupl_LJ;
+typedef struct
+{
+  int eObs;
+  t_iparams xi;
+}
+t_coupl_iparams;
+typedef struct
+{
+  real act_value[eoObsNR];
+  real av_value[eoObsNR];
+  real ref_value[eoObsNR];
+  int bObsUsed[eoObsNR];
+  int nLJ, nBU, nQ, nIP;
+  t_coupl_LJ *tcLJ;
+}
+t_coupl_rec;
+static void
+pr_ff (t_coupl_rec * tcr, real time, t_idef * idef, t_commrec * cr, int nfile,
+       t_filenm fnm[])
+{
+  static FILE *prop;
+  static FILE **out = ((void *) 0);
+  static FILE **qq = ((void *) 0);
+  static FILE **ip = ((void *) 0);
+  char buf[256];
+  char *leg[] = {
+    "C12", "C6"
+  };
+  char **raleg;
+  int i, j, index;
+  if ((prop == ((void *) 0)) && (out == ((void *) 0)) && (qq == ((void *) 0))
+      && (ip == ((void *) 0)))
+    {
+      for (i = j = 0; (i < eoObsNR); i++)
+	{
+	  if (tcr->bObsUsed[i])
+	    {
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (eoNames[i])
+		  && ((size_t) (const void *) ((eoNames[i]) + 1) -
+		      (size_t) (const void *) (eoNames[i]) ==
+		      1) ? (((const char *) (eoNames[i]))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (eoNames
+									    [i])
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (eoNames[i])));
+	      raleg[j++] =
+		(__extension__
+		 (__builtin_constant_p (buf)
+		  && ((size_t) (const void *) ((buf) + 1) -
+		      (size_t) (const void *) (buf) ==
+		      1) ? (((const char *) (buf))[0] ==
+			    '\0' ? (char *) calloc ((size_t) 1,
+						    (size_t) 1) : (
+									   {
+									   size_t
+									   __len
+									   =
+									   strlen
+									   (buf)
+									   +
+									   1;
+									   char
+									   *__retval
+									   =
+									   (char
+									    *)
+									   malloc
+									   (__len);
+									   __retval;}
+	    )):	    __strdup (buf)));
+	    }
+	}
+      if (tcr->nLJ)
+	{
+	  for (i = 0; (i < tcr->nLJ); i++)
+	    {
+	      if (tcr->tcLJ[i].bPrint)
+		{
+		  xvgr_legend (out[i], (sizeof (leg) / sizeof ((leg)[0])),
+			       leg);
+		}
+	    }
+	}
+    }
+}
+
+void
+do_coupling (FILE * log, int nfile, t_filenm fnm[], t_coupl_rec * tcr, real t,
+	     int step, real ener[], t_forcerec * fr, t_inputrec * ir,
+	     int bMaster, t_mdatoms * md, t_idef * idef, real mu_aver,
+	     int nmols, t_commrec * cr, matrix box, tensor virial,
+	     tensor pres, rvec mu_tot, rvec x[], rvec f[], int bDoIt)
+{
+  int i, j, ati, atj, atnr2, type, ftype;
+  real deviation[eoObsNR], prdev[eoObsNR], epot0, dist, rmsf;
+  real ff6, ff12, ffa, ffb, ffc, ffq, factor, dt, mu_ind;
+  int bTest, bPrint;
+  t_coupl_iparams *tip;
+  if (bPrint)
+    {
+      pr_ff (tcr, t, idef, cr, nfile, fnm);
+    }
+  for (i = 0; (i < eoObsNR); i++)
+    {
+      deviation[i] =
+	calc_deviation (tcr->av_value[i], tcr->act_value[i],
+			tcr->ref_value[i]);
+      prdev[i] = tcr->ref_value[i] - tcr->act_value[i];
+    }
+  if (bPrint)
+    pr_dev (tcr, t, prdev, cr, nfile, fnm);
+  for (i = 0; (i < atnr2); i++)
+    {
+      factor = dt * deviation[tip->eObs];
+      switch (ftype)
+	{
+	case F_BONDS:
+	  if (fabs (tip->xi.harmonic.krA) > 1.2e-38)
+	    idef->iparams[type].harmonic.krA *=
+	      (1 + factor / tip->xi.harmonic.krA);
+	}
+    }
+}