new file mode 100644
@@ -0,0 +1,313 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo0 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvi4ger8 (&acc, vec0, vec1);
+ __builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
+ dst[0] = acc;
+}
+
+void
+foo1 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvi8ger4 (&acc, vec0, vec1);
+ __builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
+ __builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
+ dst[1] = acc;
+}
+
+void
+foo2 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvi16ger2 (&acc, vec0, vec1);
+ __builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
+ dst[2] = acc;
+}
+
+void
+foo3 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvi16ger2s (&acc, vec0, vec1);
+ __builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
+ dst[3] = acc;
+}
+
+void
+foo4 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvf16ger2 (&acc, vec0, vec1);
+ __builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
+ __builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
+ dst[4] = acc;
+}
+
+void
+foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_xvf16ger2np (&acc, vec0, vec1);
+ __builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
+ dst[4] = acc;
+}
+
+void
+foo5 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
+ __builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
+ __builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
+ dst[5] = acc;
+}
+
+void
+foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
+ __builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
+ dst[5] = acc;
+}
+
+void
+foo6 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvf32ger (&acc, vec0, vec1);
+ __builtin_mma_xvf32gerpp (&acc, vec0, vec1);
+ __builtin_mma_xvf32gerpn (&acc, vec0, vec1);
+ dst[6] = acc;
+}
+
+void
+foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_xvf32gernp (&acc, vec0, vec1);
+ __builtin_mma_xvf32gernn (&acc, vec0, vec1);
+ dst[6] = acc;
+}
+
+void
+foo7 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
+ __builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
+ dst[7] = acc;
+}
+
+void
+foo8 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
+ __builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
+ __builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
+ dst[8] = acc;
+}
+
+void
+foo9 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
+ dst[9] = acc;
+}
+
+void
+foo10 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
+ dst[10] = acc;
+}
+
+void
+foo11 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
+ dst[11] = acc;
+}
+
+void
+foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
+ dst[11] = acc;
+}
+
+void
+foo12 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
+ dst[12] = acc;
+}
+
+void
+foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
+ __builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
+ dst[12] = acc;
+}
+
+void
+foo13 (__vector_quad *dst, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
+ __builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
+ __builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
+ dst[13] = acc;
+}
+
+void
+foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
+{
+ __vector_quad acc;
+ vec_t vec0 = vec[0];
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
+ __builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
+ dst[13] = acc;
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */
new file mode 100644
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo0 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
+{
+ __vector_quad acc;
+ __vector_pair vecp0 = *pvecp;
+ vec_t vec1 = vec[1];
+
+ __builtin_mma_xvf64ger (&acc, vecp0, vec1);
+ __builtin_mma_xvf64gerpp (&acc, vecp0, vec1);
+ __builtin_mma_xvf64gerpn (&acc, vecp0, vec1);
+ dst[0] = acc;
+}
+
+void
+foo1 (__vector_quad *dst, __vector_quad *src, vec_t *vec, __vector_pair *pvecp)
+{
+ __vector_quad acc;
+ __vector_pair vecp0 = *pvecp;
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_xvf64gernp (&acc, vecp0, vec1);
+ __builtin_mma_xvf64gernn (&acc, vecp0, vec1);
+ dst[0] = acc;
+}
+
+void
+foo2 (__vector_quad *dst, vec_t *vec, __vector_pair *pvecp)
+{
+ __vector_quad acc;
+ __vector_pair vecp0 = *pvecp;
+ vec_t vec1 = vec[1];
+ __builtin_mma_pmxvf64ger (&acc, vecp0, vec1, 15, 3);
+ __builtin_mma_pmxvf64gerpp (&acc, vecp0, vec1, 15, 3);
+ __builtin_mma_pmxvf64gerpn (&acc, vecp0, vec1, 15, 3);
+ dst[1] = acc;
+}
+
+void
+foo3 (__vector_quad *dst, __vector_quad *src, vec_t *vec, __vector_pair *pvecp)
+{
+ __vector_quad acc;
+ __vector_pair vecp0 = *pvecp;
+ vec_t vec1 = vec[1];
+
+ acc = src[0];
+ __builtin_mma_pmxvf64gernp (&acc, vecp0, vec1, 15, 3);
+ __builtin_mma_pmxvf64gernn (&acc, vecp0, vec1, 15, 3);
+ dst[1] = acc;
+}
+
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 8 } } */
+/* { dg-final { scan-assembler-times {\mxvf64ger\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf64gerpp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf64gerpn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf64gernp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvf64gernn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf64ger\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf64gerpp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf64gerpn\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf64gernp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mpmxvf64gernn\M} 1 } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+void
+foo0 (void)
+{
+ __vector_quad acc;
+ asm ("#..." : "=d" (acc));
+ __builtin_mma_xxmtacc (&acc);
+ __builtin_mma_xxmfacc (&acc);
+ asm ("#..." :: "d" (acc));
+}
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo1 (vec_t *vec)
+{
+ vec[1] = __builtin_vsx_xvcvspbf16 (vec[0]);
+ vec[3] = __builtin_vsx_xvcvbf16sp (vec[2]);
+}
+
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mlxvp\M} } } */
+/* { dg-final { scan-assembler-not {\mstxvp\M} } } */
+/* { dg-final { scan-assembler-times {\mxvcvspbf16\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvcvbf16sp\M} 1 } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_pair *dst, vec_t *src)
+{
+ __vector_pair pair;
+ __builtin_mma_assemble_pair (&pair, src[0], src[4]);
+ *dst = pair;
+}
+
+void
+bar (vec_t *dst, __vector_pair *src)
+{
+ vec_t res[2];
+ __builtin_mma_disassemble_pair (res, src);
+ dst[0] = res[0];
+ dst[4] = res[1];
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */
+
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+typedef unsigned char vec_t __attribute__((vector_size(16)));
+
+void
+foo (__vector_quad *dst, vec_t *src)
+{
+ __vector_quad acc;
+ __builtin_mma_assemble_acc (&acc, src[0], src[4], src[8], src[12]);
+ *dst = acc;
+}
+
+void
+bar (vec_t *dst, __vector_quad *src)
+{
+ vec_t res[4];
+ __builtin_mma_disassemble_acc (res, src);
+ dst[0] = res[0];
+ dst[4] = res[1];
+ dst[8] = res[2];
+ dst[12] = res[3];
+}
+
+/* { dg-final { scan-assembler-times {\mlxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=future -O2" } */
+
+void
+foo (__vector_quad *dst)
+{
+ __vector_quad acc;
+ __builtin_mma_xxsetaccz (&acc);
+ *dst = acc;
+}
+
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvp\M} } } */
+/* { dg-final { scan-assembler-not {\mxxmtacc\M} } } */
+/* { dg-final { scan-assembler-times {\mxxsetaccz\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxmfacc\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */