Message ID | ZZiTiojbYNzVvJEV@cowardly-lion.the-meissners.org |
---|---|
State | New |
Headers | show |
Series | Repost [PATCH 5/6] PowerPC: Switch to dense math names for all MMA operations. | expand |
Ping | Date: Fri, 5 Jan 2024 18:40:58 -0500 | From: Michael Meissner <meissner@linux.ibm.com> | Subject: Repost [PATCH 5/6] PowerPC: Switch to dense math names for all MMA operations. | Message-ID: <ZZiTiojbYNzVvJEV@cowardly-lion.the-meissners.org> https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641965.html
Hi Mike, on 2024/1/6 07:40, Michael Meissner wrote: > This patch changes the assembler instruction names for MMA instructions from > the original name used in power10 to the new name when used with the dense math > system. I.e. xvf64gerpp becomes dmxvf64gerpp. The assembler will emit the > same bits for either spelling. > > The patches have been tested on both little and big endian systems. Can I check > it into the master branch? > > 2024-01-05 Michael Meissner <meissner@linux.ibm.com> > > gcc/ > > * config/rs6000/mma.md (vvi4i4i8_dm): New int attribute. > (avvi4i4i8_dm): Likewise. > (vvi4i4i2_dm): Likewise. > (avvi4i4i2_dm): Likewise. > (vvi4i4_dm): Likewise. > (avvi4i4_dm): Likewise. > (pvi4i2_dm): Likewise. > (apvi4i2_dm): Likewise. > (vvi4i4i4_dm): Likewise. > (avvi4i4i4_dm): Likewise. > (mma_<vv>): Add support for running on DMF systems, generating the dense > math instruction and using the dense math accumulators. > (mma_<avv>): Likewise. > (mma_<pv>): Likewise. > (mma_<apv>): Likewise. > (mma_<vvi4i4i8>): Likewise. > (mma_<avvi4i4i8>): Likewise. > (mma_<vvi4i4i2>): Likewise. > (mma_<avvi4i4i2>): Likewise. > (mma_<vvi4i4>): Likewise. > (mma_<avvi4i4): Likewise. > (mma_<pvi4i2>): Likewise. > (mma_<apvi4i2): Likewise. > (mma_<vvi4i4i4>): Likewise. > (mma_<avvi4i4i4>): Likewise. > > gcc/testsuite/ > > * gcc.target/powerpc/dm-double-test.c: New test. > * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New > target test. > --- > gcc/config/rs6000/mma.md | 98 +++++++-- > .../gcc.target/powerpc/dm-double-test.c | 194 ++++++++++++++++++ > gcc/testsuite/lib/target-supports.exp | 19 ++ > 3 files changed, 299 insertions(+), 12 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/dm-double-test.c > > diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md > index 525a85146ff..f06e6bbb184 100644 > --- a/gcc/config/rs6000/mma.md > +++ b/gcc/config/rs6000/mma.md > @@ -227,13 +227,22 @@ (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp") > > (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) > > +(define_int_attr vvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8 "pmdmxvi4ger8")]) Can we update vvi4i4i8 to (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) by avoiding to introduce vvi4i4i8_dm, then its use places would be like: - "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" + "@ + pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 + pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 + pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" and - define_insn "mma_<vvi4i4i8>" + define_insn "mma_pm<vvi4i4i8>" (or updating its use in corresponding bif expander field) ? This comment is also applied for the other iterators changes. > + > (define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) > > +(define_int_attr avvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8PP "pmdmxvi4ger8pp")]) > + > (define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2") > (UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") > (UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2") > (UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) > > +(define_int_attr vvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2 "pmdmxvi16ger2") > + (UNSPEC_MMA_PMXVI16GER2S "pmdmxvi16ger2s") > + (UNSPEC_MMA_PMXVF16GER2 "pmdmxvf16ger2") > + (UNSPEC_MMA_PMXVBF16GER2 "pmdmxvbf16ger2")]) > + > (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") > (UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp") > (UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp") > @@ -245,25 +254,54 @@ (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") > (UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np") > (UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")]) > > +(define_int_attr avvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2PP "pmdmxvi16ger2pp") > + (UNSPEC_MMA_PMXVI16GER2SPP "pmdmxvi16ger2spp") > + (UNSPEC_MMA_PMXVF16GER2PP "pmdmxvf16ger2pp") > + (UNSPEC_MMA_PMXVF16GER2PN "pmdmxvf16ger2pn") > + (UNSPEC_MMA_PMXVF16GER2NP "pmdmxvf16ger2np") > + (UNSPEC_MMA_PMXVF16GER2NN "pmdmxvf16ger2nn") > + (UNSPEC_MMA_PMXVBF16GER2PP "pmdmxvbf16ger2pp") > + (UNSPEC_MMA_PMXVBF16GER2PN "pmdmxvbf16ger2pn") > + (UNSPEC_MMA_PMXVBF16GER2NP "pmdmxvbf16ger2np") > + (UNSPEC_MMA_PMXVBF16GER2NN "pmdmxvbf16ger2nn")]) > + > (define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")]) > > +(define_int_attr vvi4i4_dm [(UNSPEC_MMA_PMXVF32GER "pmdmxvf32ger")]) > + > (define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp") > (UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn") > (UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp") > (UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")]) > > +(define_int_attr avvi4i4_dm [(UNSPEC_MMA_PMXVF32GERPP "pmdmxvf32gerpp") > + (UNSPEC_MMA_PMXVF32GERPN "pmdmxvf32gerpn") > + (UNSPEC_MMA_PMXVF32GERNP "pmdmxvf32gernp") > + (UNSPEC_MMA_PMXVF32GERNN "pmdmxvf32gernn")]) > + > (define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")]) > > +(define_int_attr pvi4i2_dm [(UNSPEC_MMA_PMXVF64GER "pmdmxvf64ger")]) > + > (define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp") > (UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn") > (UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp") > (UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")]) > > +(define_int_attr apvi4i2_dm [(UNSPEC_MMA_PMXVF64GERPP "pmdmxvf64gerpp") > + (UNSPEC_MMA_PMXVF64GERPN "pmdmxvf64gerpn") > + (UNSPEC_MMA_PMXVF64GERNP "pmdmxvf64gernp") > + (UNSPEC_MMA_PMXVF64GERNN "pmdmxvf64gernn")]) > + > (define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")]) > > +(define_int_attr vvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4 "pmdmxvi8ger4")]) > + > (define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp") > (UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")]) > > +(define_int_attr avvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4PP "pmdmxvi8ger4pp") > + (UNSPEC_MMA_PMXVI8GER4SPP "pmdmxvi8ger4spp")]) > > ;; Vector pair support. OOmode can only live in VSRs. > (define_expand "movoo" > @@ -629,7 +667,10 @@ (define_insn "mma_<vv>" > (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] > MMA_VV))] > "TARGET_MMA" > - "<vv> %A0,%x1,%x2" > + "@ > + dm<vv> %A0,%x1,%x2 > + <vv> %A0,%x1,%x2 > + <vv> %A0,%x1,%x2" > [(set_attr "type" "mma") > (set_attr "isa" "dm,not_dm,not_dm")]) > > @@ -650,7 +691,10 @@ (define_insn "mma_<pv>" > (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] > MMA_PV))] > "TARGET_MMA" > - "<pv> %A0,%x1,%x2" > + "@ > + dm<pv> %A0,%x1,%x2 > + <pv> %A0,%x1,%x2 > + <pv> %A0,%x1,%x2" > [(set_attr "type" "mma") > (set_attr "isa" "dm,not_dm,not_dm")]) > > @@ -661,7 +705,10 @@ (define_insn "mma_<apv>" > (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] > MMA_APV))] > "TARGET_MMA" > - "<apv> %A0,%x2,%x3" > + "@ > + dm<apv> %A0,%x2,%x3 > + <apv> %A0,%x2,%x3 > + <apv> %A0,%x2,%x3" > [(set_attr "type" "mma") > (set_attr "isa" "dm,not_dm,not_dm")]) > > @@ -674,7 +721,10 @@ (define_insn "mma_<vvi4i4i8>" > (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] > MMA_VVI4I4I8))] > "TARGET_MMA" > - "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" > + "@ > + dm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 typo? I think you meant <vvi4i4i8_dm>, but it doesn't matter any more with the above suggestion. > + <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 > + <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -703,7 +753,10 @@ (define_insn "mma_<vvi4i4i2>" > (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] > MMA_VVI4I4I2))] > "TARGET_MMA" > - "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5" > + "@ > + <vvi4i4i2_dm> %A0,%x1,%x2,%3,%4,%5 > + <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5 > + <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -718,7 +771,10 @@ (define_insn "mma_<avvi4i4i2>" > (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")] > MMA_AVVI4I4I2))] > "TARGET_MMA" > - "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6" > + "@ > + <avvi4i4i2_dm> %A0,%x2,%x3,%4,%5,%6 > + <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6 > + <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -731,7 +787,10 @@ (define_insn "mma_<vvi4i4>" > (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")] > MMA_VVI4I4))] > "TARGET_MMA" > - "<vvi4i4> %A0,%x1,%x2,%3,%4" > + "@ > + <vvi4i4_dm> %A0,%x1,%x2,%3,%4 > + <vvi4i4> %A0,%x1,%x2,%3,%4 > + <vvi4i4> %A0,%x1,%x2,%3,%4" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -745,7 +804,10 @@ (define_insn "mma_<avvi4i4>" > (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] > MMA_AVVI4I4))] > "TARGET_MMA" > - "<avvi4i4> %A0,%x2,%x3,%4,%5" > + "@ > + <avvi4i4_dm> %A0,%x2,%x3,%4,%5 > + <avvi4i4> %A0,%x2,%x3,%4,%5 > + <avvi4i4> %A0,%x2,%x3,%4,%5" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -758,7 +820,10 @@ (define_insn "mma_<pvi4i2>" > (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")] > MMA_PVI4I2))] > "TARGET_MMA" > - "<pvi4i2> %A0,%x1,%x2,%3,%4" > + "@ > + <pvi4i2_dm> %A0,%x1,%x2,%3,%4 > + <pvi4i2> %A0,%x1,%x2,%3,%4 > + <pvi4i2> %A0,%x1,%x2,%3,%4" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -772,7 +837,10 @@ (define_insn "mma_<apvi4i2>" > (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] > MMA_APVI4I2))] > "TARGET_MMA" > - "<apvi4i2> %A0,%x2,%x3,%4,%5" > + "@ > + <apvi4i2_dm> %A0,%x2,%x3,%4,%5 > + <apvi4i2> %A0,%x2,%x3,%4,%5 > + <apvi4i2> %A0,%x2,%x3,%4,%5" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -786,7 +854,10 @@ (define_insn "mma_<vvi4i4i4>" > (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] > MMA_VVI4I4I4))] > "TARGET_MMA" > - "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5" > + "@ > + <vvi4i4i4_dm> %A0,%x1,%x2,%3,%4,%5 > + <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5 > + <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > @@ -801,7 +872,10 @@ (define_insn "mma_<avvi4i4i4>" > (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")] > MMA_AVVI4I4I4))] > "TARGET_MMA" > - "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6" > + "@ > + <avvi4i4i4_dm> %A0,%x2,%x3,%4,%5,%6 > + <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6 > + <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6" > [(set_attr "type" "mma") > (set_attr "prefixed" "yes") > (set_attr "isa" "dm,not_dm,not_dm")]) > diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c > new file mode 100644 > index 00000000000..66c19779585 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c > @@ -0,0 +1,194 @@ > +/* Test derived from mma-double-1.c, modified for dense math. */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_dense_math_ok } */ > +/* { dg-options "-mdejagnu-cpu=future -O2" } */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <altivec.h> > + > +typedef unsigned char vec_t __attribute__ ((vector_size (16))); > +typedef double v4sf_t __attribute__ ((vector_size (16))); > +#define SAVE_ACC(ACC, ldc, J) \ > + __builtin_mma_disassemble_acc (result, ACC); \ > + rowC = (v4sf_t *) &CO[0*ldc+J]; \ > + rowC[0] += result[0]; \ > + rowC = (v4sf_t *) &CO[1*ldc+J]; \ > + rowC[0] += result[1]; \ > + rowC = (v4sf_t *) &CO[2*ldc+J]; \ > + rowC[0] += result[2]; \ > + rowC = (v4sf_t *) &CO[3*ldc+J]; \ > + rowC[0] += result[3]; > + > +void > +DM (int m, int n, int k, double *A, double *B, double *C) > +{ > + __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; > + v4sf_t result[4]; > + v4sf_t *rowC; > + for (int l = 0; l < n; l += 4) > + { > + double *CO; > + double *AO; > + AO = A; > + CO = C; > + C += m * 4; > + for (int j = 0; j < m; j += 16) > + { > + double *BO = B; > + __builtin_mma_xxsetaccz (&acc0); > + __builtin_mma_xxsetaccz (&acc1); > + __builtin_mma_xxsetaccz (&acc2); > + __builtin_mma_xxsetaccz (&acc3); > + __builtin_mma_xxsetaccz (&acc4); > + __builtin_mma_xxsetaccz (&acc5); > + __builtin_mma_xxsetaccz (&acc6); > + __builtin_mma_xxsetaccz (&acc7); > + unsigned long i; > + > + for (i = 0; i < k; i++) > + { > + vec_t *rowA = (vec_t *) & AO[i * 16]; > + __vector_pair rowB; > + vec_t *rb = (vec_t *) & BO[i * 4]; > + __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); > + __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); > + __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); > + __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); > + __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]); > + __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]); > + __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]); > + __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]); > + __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]); > + } > + SAVE_ACC (&acc0, m, 0); > + SAVE_ACC (&acc2, m, 4); > + SAVE_ACC (&acc1, m, 2); > + SAVE_ACC (&acc3, m, 6); > + SAVE_ACC (&acc4, m, 8); > + SAVE_ACC (&acc6, m, 12); > + SAVE_ACC (&acc5, m, 10); > + SAVE_ACC (&acc7, m, 14); > + AO += k * 16; > + BO += k * 4; > + CO += 16; > + } > + B += k * 4; > + } > +} > + > +void > +init (double *matrix, int row, int column) > +{ > + for (int j = 0; j < column; j++) > + { > + for (int i = 0; i < row; i++) > + { > + matrix[j * row + i] = (i * 16 + 2 + j) / 0.123; > + } > + } > +} > + > +void > +init0 (double *matrix, double *matrix1, int row, int column) > +{ > + for (int j = 0; j < column; j++) > + for (int i = 0; i < row; i++) > + matrix[j * row + i] = matrix1[j * row + i] = 0; > +} > + > + > +void > +print (const char *name, const double *matrix, int row, int column) > +{ > + printf ("Matrix %s has %d rows and %d columns:\n", name, row, column); > + for (int i = 0; i < row; i++) > + { > + for (int j = 0; j < column; j++) > + { > + printf ("%f ", matrix[j * row + i]); > + } > + printf ("\n"); > + } > + printf ("\n"); > +} > + > +int > +main (int argc, char *argv[]) > +{ > + int rowsA, colsB, common; > + int i, j, k; > + int ret = 0; > + > + for (int t = 16; t <= 128; t += 16) > + { > + for (int t1 = 4; t1 <= 16; t1 += 4) > + { > + rowsA = t; > + colsB = t1; > + common = 1; > + /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */ > + double A[rowsA * common]; > + double B[common * colsB]; > + double C[rowsA * colsB]; > + double D[rowsA * colsB]; > + > + > + init (A, rowsA, common); > + init (B, common, colsB); > + init0 (C, D, rowsA, colsB); > + DM (rowsA, colsB, common, A, B, C); > + > + for (i = 0; i < colsB; i++) > + { > + for (j = 0; j < rowsA; j++) > + { > + D[i * rowsA + j] = 0; > + for (k = 0; k < common; k++) > + { > + D[i * rowsA + j] += > + A[k * rowsA + j] * B[k + common * i]; > + } > + } > + } > + for (i = 0; i < colsB; i++) > + { > + for (j = 0; j < rowsA; j++) > + { > + for (k = 0; k < common; k++) > + { > + if (D[i * rowsA + j] != C[i * rowsA + j]) > + { > + printf ("Error %d,%d,%d\n",i,j,k); > + ret++; > + } > + } > + } > + } > + if (ret) > + { > + print ("A", A, rowsA, common); > + print ("B", B, common, colsB); > + print ("C", C, rowsA, colsB); > + print ("D", D, rowsA, colsB); > + } > + } > + } > + > +#ifdef VERBOSE > + if (ret) > + printf ("DM double test fail: %d errors\n",ret); > + else > + printf ("DM double test success: 0 DM errors\n"); > +#else > + if (ret) > + abort(); > +#endif > + > + return ret; > +} > + > +/* { dg-final { scan-assembler {\mdmsetdmrz\M} } } */ > +/* { dg-final { scan-assembler {\mdmxvf64gerpp\M} } } */ > +/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */ > + > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp > index 1b4a3fb18df..2dec3682a2f 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -7101,6 +7101,25 @@ proc check_effective_target_power10_ok { } { > } > } > > +# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math s/ or -mdense-math// The others look good to me, thanks! BR, Kewen > +# which enables the dense math operations. > +proc check_effective_target_powerpc_dense_math_ok { } { > + return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly { > + __vector_quad vq; > + void test (void) > + { > + #ifndef __PPC_DMR__ > + #error "target does not have dense math support." > + #else > + /* Make sure we have dense math support. */ > + __vector_quad dmr; > + __asm__ ("dmsetaccz %A0" : "=wD" (dmr)); > + vq = dmr; > + #endif > + } > + } "-mcpu=future"] > +} > + > # Return 1 if this is a PowerPC target supporting -mfloat128 via either > # software emulation on power7/power8 systems or hardware support on power9. >
On Sun, Feb 04, 2024 at 01:47:12PM +0800, Kewen.Lin wrote: > > diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md > > index 525a85146ff..f06e6bbb184 100644 > > --- a/gcc/config/rs6000/mma.md > > +++ b/gcc/config/rs6000/mma.md > > @@ -227,13 +227,22 @@ (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp") > > > > (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) > > > > +(define_int_attr vvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8 "pmdmxvi4ger8")]) > > Can we update vvi4i4i8 to > > (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) > > by avoiding to introduce vvi4i4i8_dm, then its use places would be like: > > - "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" > + "@ > + pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 > + pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 > + pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" > > and > > - define_insn "mma_<vvi4i4i8>" > + define_insn "mma_pm<vvi4i4i8>" > > (or updating its use in corresponding bif expander field) Yes I can do that.
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 525a85146ff..f06e6bbb184 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -227,13 +227,22 @@ (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp") (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) +(define_int_attr vvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8 "pmdmxvi4ger8")]) + (define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) +(define_int_attr avvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8PP "pmdmxvi4ger8pp")]) + (define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2") (UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") (UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2") (UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) +(define_int_attr vvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2 "pmdmxvi16ger2") + (UNSPEC_MMA_PMXVI16GER2S "pmdmxvi16ger2s") + (UNSPEC_MMA_PMXVF16GER2 "pmdmxvf16ger2") + (UNSPEC_MMA_PMXVBF16GER2 "pmdmxvbf16ger2")]) + (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") (UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp") (UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp") @@ -245,25 +254,54 @@ (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") (UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np") (UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")]) +(define_int_attr avvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2PP "pmdmxvi16ger2pp") + (UNSPEC_MMA_PMXVI16GER2SPP "pmdmxvi16ger2spp") + (UNSPEC_MMA_PMXVF16GER2PP "pmdmxvf16ger2pp") + (UNSPEC_MMA_PMXVF16GER2PN "pmdmxvf16ger2pn") + (UNSPEC_MMA_PMXVF16GER2NP "pmdmxvf16ger2np") + (UNSPEC_MMA_PMXVF16GER2NN "pmdmxvf16ger2nn") + (UNSPEC_MMA_PMXVBF16GER2PP "pmdmxvbf16ger2pp") + (UNSPEC_MMA_PMXVBF16GER2PN "pmdmxvbf16ger2pn") + (UNSPEC_MMA_PMXVBF16GER2NP "pmdmxvbf16ger2np") + (UNSPEC_MMA_PMXVBF16GER2NN "pmdmxvbf16ger2nn")]) + (define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")]) +(define_int_attr vvi4i4_dm [(UNSPEC_MMA_PMXVF32GER "pmdmxvf32ger")]) + (define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp") (UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn") (UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp") (UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")]) +(define_int_attr avvi4i4_dm [(UNSPEC_MMA_PMXVF32GERPP "pmdmxvf32gerpp") + (UNSPEC_MMA_PMXVF32GERPN "pmdmxvf32gerpn") + (UNSPEC_MMA_PMXVF32GERNP "pmdmxvf32gernp") + (UNSPEC_MMA_PMXVF32GERNN "pmdmxvf32gernn")]) + (define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")]) +(define_int_attr pvi4i2_dm [(UNSPEC_MMA_PMXVF64GER "pmdmxvf64ger")]) + (define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp") (UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn") (UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp") (UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")]) +(define_int_attr apvi4i2_dm [(UNSPEC_MMA_PMXVF64GERPP "pmdmxvf64gerpp") + (UNSPEC_MMA_PMXVF64GERPN "pmdmxvf64gerpn") + (UNSPEC_MMA_PMXVF64GERNP "pmdmxvf64gernp") + (UNSPEC_MMA_PMXVF64GERNN "pmdmxvf64gernn")]) + (define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")]) +(define_int_attr vvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4 "pmdmxvi8ger4")]) + (define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp") (UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")]) +(define_int_attr avvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4PP "pmdmxvi8ger4pp") + (UNSPEC_MMA_PMXVI8GER4SPP "pmdmxvi8ger4spp")]) ;; Vector pair support. OOmode can only live in VSRs. (define_expand "movoo" @@ -629,7 +667,10 @@ (define_insn "mma_<vv>" (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] MMA_VV))] "TARGET_MMA" - "<vv> %A0,%x1,%x2" + "@ + dm<vv> %A0,%x1,%x2 + <vv> %A0,%x1,%x2 + <vv> %A0,%x1,%x2" [(set_attr "type" "mma") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -650,7 +691,10 @@ (define_insn "mma_<pv>" (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] MMA_PV))] "TARGET_MMA" - "<pv> %A0,%x1,%x2" + "@ + dm<pv> %A0,%x1,%x2 + <pv> %A0,%x1,%x2 + <pv> %A0,%x1,%x2" [(set_attr "type" "mma") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -661,7 +705,10 @@ (define_insn "mma_<apv>" (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] MMA_APV))] "TARGET_MMA" - "<apv> %A0,%x2,%x3" + "@ + dm<apv> %A0,%x2,%x3 + <apv> %A0,%x2,%x3 + <apv> %A0,%x2,%x3" [(set_attr "type" "mma") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -674,7 +721,10 @@ (define_insn "mma_<vvi4i4i8>" (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] MMA_VVI4I4I8))] "TARGET_MMA" - "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" + "@ + dm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -703,7 +753,10 @@ (define_insn "mma_<vvi4i4i2>" (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] MMA_VVI4I4I2))] "TARGET_MMA" - "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5" + "@ + <vvi4i4i2_dm> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -718,7 +771,10 @@ (define_insn "mma_<avvi4i4i2>" (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")] MMA_AVVI4I4I2))] "TARGET_MMA" - "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6" + "@ + <avvi4i4i2_dm> %A0,%x2,%x3,%4,%5,%6 + <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6 + <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -731,7 +787,10 @@ (define_insn "mma_<vvi4i4>" (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")] MMA_VVI4I4))] "TARGET_MMA" - "<vvi4i4> %A0,%x1,%x2,%3,%4" + "@ + <vvi4i4_dm> %A0,%x1,%x2,%3,%4 + <vvi4i4> %A0,%x1,%x2,%3,%4 + <vvi4i4> %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -745,7 +804,10 @@ (define_insn "mma_<avvi4i4>" (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] MMA_AVVI4I4))] "TARGET_MMA" - "<avvi4i4> %A0,%x2,%x3,%4,%5" + "@ + <avvi4i4_dm> %A0,%x2,%x3,%4,%5 + <avvi4i4> %A0,%x2,%x3,%4,%5 + <avvi4i4> %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -758,7 +820,10 @@ (define_insn "mma_<pvi4i2>" (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")] MMA_PVI4I2))] "TARGET_MMA" - "<pvi4i2> %A0,%x1,%x2,%3,%4" + "@ + <pvi4i2_dm> %A0,%x1,%x2,%3,%4 + <pvi4i2> %A0,%x1,%x2,%3,%4 + <pvi4i2> %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -772,7 +837,10 @@ (define_insn "mma_<apvi4i2>" (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] MMA_APVI4I2))] "TARGET_MMA" - "<apvi4i2> %A0,%x2,%x3,%4,%5" + "@ + <apvi4i2_dm> %A0,%x2,%x3,%4,%5 + <apvi4i2> %A0,%x2,%x3,%4,%5 + <apvi4i2> %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -786,7 +854,10 @@ (define_insn "mma_<vvi4i4i4>" (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] MMA_VVI4I4I4))] "TARGET_MMA" - "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5" + "@ + <vvi4i4i4_dm> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5 + <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) @@ -801,7 +872,10 @@ (define_insn "mma_<avvi4i4i4>" (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")] MMA_AVVI4I4I4))] "TARGET_MMA" - "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6" + "@ + <avvi4i4i4_dm> %A0,%x2,%x3,%4,%5,%6 + <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6 + <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") (set_attr "prefixed" "yes") (set_attr "isa" "dm,not_dm,not_dm")]) diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c new file mode 100644 index 00000000000..66c19779585 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c @@ -0,0 +1,194 @@ +/* Test derived from mma-double-1.c, modified for dense math. */ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_dense_math_ok } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <altivec.h> + +typedef unsigned char vec_t __attribute__ ((vector_size (16))); +typedef double v4sf_t __attribute__ ((vector_size (16))); +#define SAVE_ACC(ACC, ldc, J) \ + __builtin_mma_disassemble_acc (result, ACC); \ + rowC = (v4sf_t *) &CO[0*ldc+J]; \ + rowC[0] += result[0]; \ + rowC = (v4sf_t *) &CO[1*ldc+J]; \ + rowC[0] += result[1]; \ + rowC = (v4sf_t *) &CO[2*ldc+J]; \ + rowC[0] += result[2]; \ + rowC = (v4sf_t *) &CO[3*ldc+J]; \ + rowC[0] += result[3]; + +void +DM (int m, int n, int k, double *A, double *B, double *C) +{ + __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; + v4sf_t result[4]; + v4sf_t *rowC; + for (int l = 0; l < n; l += 4) + { + double *CO; + double *AO; + AO = A; + CO = C; + C += m * 4; + for (int j = 0; j < m; j += 16) + { + double *BO = B; + __builtin_mma_xxsetaccz (&acc0); + __builtin_mma_xxsetaccz (&acc1); + __builtin_mma_xxsetaccz (&acc2); + __builtin_mma_xxsetaccz (&acc3); + __builtin_mma_xxsetaccz (&acc4); + __builtin_mma_xxsetaccz (&acc5); + __builtin_mma_xxsetaccz (&acc6); + __builtin_mma_xxsetaccz (&acc7); + unsigned long i; + + for (i = 0; i < k; i++) + { + vec_t *rowA = (vec_t *) & AO[i * 16]; + __vector_pair rowB; + vec_t *rb = (vec_t *) & BO[i * 4]; + __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); + __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); + __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); + __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]); + __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]); + __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]); + __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]); + __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]); + } + SAVE_ACC (&acc0, m, 0); + SAVE_ACC (&acc2, m, 4); + SAVE_ACC (&acc1, m, 2); + SAVE_ACC (&acc3, m, 6); + SAVE_ACC (&acc4, m, 8); + SAVE_ACC (&acc6, m, 12); + SAVE_ACC (&acc5, m, 10); + SAVE_ACC (&acc7, m, 14); + AO += k * 16; + BO += k * 4; + CO += 16; + } + B += k * 4; + } +} + +void +init (double *matrix, int row, int column) +{ + for (int j = 0; j < column; j++) + { + for (int i = 0; i < row; i++) + { + matrix[j * row + i] = (i * 16 + 2 + j) / 0.123; + } + } +} + +void +init0 (double *matrix, double *matrix1, int row, int column) +{ + for (int j = 0; j < column; j++) + for (int i = 0; i < row; i++) + matrix[j * row + i] = matrix1[j * row + i] = 0; +} + + +void +print (const char *name, const double *matrix, int row, int column) +{ + printf ("Matrix %s has %d rows and %d columns:\n", name, row, column); + for (int i = 0; i < row; i++) + { + for (int j = 0; j < column; j++) + { + printf ("%f ", matrix[j * row + i]); + } + printf ("\n"); + } + printf ("\n"); +} + +int +main (int argc, char *argv[]) +{ + int rowsA, colsB, common; + int i, j, k; + int ret = 0; + + for (int t = 16; t <= 128; t += 16) + { + for (int t1 = 4; t1 <= 16; t1 += 4) + { + rowsA = t; + colsB = t1; + common = 1; + /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */ + double A[rowsA * common]; + double B[common * colsB]; + double C[rowsA * colsB]; + double D[rowsA * colsB]; + + + init (A, rowsA, common); + init (B, common, colsB); + init0 (C, D, rowsA, colsB); + DM (rowsA, colsB, common, A, B, C); + + for (i = 0; i < colsB; i++) + { + for (j = 0; j < rowsA; j++) + { + D[i * rowsA + j] = 0; + for (k = 0; k < common; k++) + { + D[i * rowsA + j] += + A[k * rowsA + j] * B[k + common * i]; + } + } + } + for (i = 0; i < colsB; i++) + { + for (j = 0; j < rowsA; j++) + { + for (k = 0; k < common; k++) + { + if (D[i * rowsA + j] != C[i * rowsA + j]) + { + printf ("Error %d,%d,%d\n",i,j,k); + ret++; + } + } + } + } + if (ret) + { + print ("A", A, rowsA, common); + print ("B", B, common, colsB); + print ("C", C, rowsA, colsB); + print ("D", D, rowsA, colsB); + } + } + } + +#ifdef VERBOSE + if (ret) + printf ("DM double test fail: %d errors\n",ret); + else + printf ("DM double test success: 0 DM errors\n"); +#else + if (ret) + abort(); +#endif + + return ret; +} + +/* { dg-final { scan-assembler {\mdmsetdmrz\M} } } */ +/* { dg-final { scan-assembler {\mdmxvf64gerpp\M} } } */ +/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */ + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1b4a3fb18df..2dec3682a2f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7101,6 +7101,25 @@ proc check_effective_target_power10_ok { } { } } +# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math +# which enables the dense math operations. +proc check_effective_target_powerpc_dense_math_ok { } { + return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly { + __vector_quad vq; + void test (void) + { + #ifndef __PPC_DMR__ + #error "target does not have dense math support." + #else + /* Make sure we have dense math support. */ + __vector_quad dmr; + __asm__ ("dmsetaccz %A0" : "=wD" (dmr)); + vq = dmr; + #endif + } + } "-mcpu=future"] +} + # Return 1 if this is a PowerPC target supporting -mfloat128 via either # software emulation on power7/power8 systems or hardware support on power9.