diff mbox

[libfortran] Fix PR 80687l build failure on nvptx

Message ID d3bcfd98-9cb9-3f17-eb99-3383e486e2dc@netcologne.de
State New
Headers show

Commit Message

Thomas Koenig May 9, 2017, 5:35 p.m. UTC
Hello world,

the attached patch hopefully fixes the build failure on nvptx introduced
by my recent matmul library patch.  It uses malloc/free if VLAs do not
work.

Thomas S., does this fix the problem?

Tested on x86_64 to make sure that the matmul tests still pass;
full regression test still in progress.

OK for trunk if the nvptx problem is fixed and regression-tests pass?

Regards

	Thomas

2017-05-09  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/80687
	* acinclude.m4 (LIBGFOR_CHECK_VLA):  New macro.
	* configure.ac: Use it.
	* config.h.in:  Regenerated.
	* configure:  Regenerated.
	* m4/matmul_internal.m4:  'matmul_name`:  Use
	malloc/free instead of VLA if HAVE_VLA is undefined.
	t1 to a VLA of the required size.
         * generated/matmul_c10.c: Regenerated.
         * generated/matmul_c16.c: Regenerated.
         * generated/matmul_c4.c: Regenerated.
         * generated/matmul_c8.c: Regenerated.
         * generated/matmul_i1.c: Regenerated.
         * generated/matmul_i16.c: Regenerated.
         * generated/matmul_i2.c: Regenerated.
         * generated/matmul_i4.c: Regenerated.
         * generated/matmul_i8.c: Regenerated.
         * generated/matmul_r10.c: Regenerated.
         * generated/matmul_r16.c: Regenerated.
         * generated/matmul_r4.c: Regenerated.
         * generated/matmul_r8.c: Regenerated.

Comments

Janne Blomqvist May 9, 2017, 6:03 p.m. UTC | #1
On Tue, May 9, 2017 at 8:35 PM, Thomas Koenig <tkoenig@netcologne.de> wrote:
> Hello world,
>
> the attached patch hopefully fixes the build failure on nvptx introduced
> by my recent matmul library patch.  It uses malloc/free if VLAs do not
> work.
>
> Thomas S., does this fix the problem?
>
> Tested on x86_64 to make sure that the matmul tests still pass;
> full regression test still in progress.
>
> OK for trunk if the nvptx problem is fixed and regression-tests pass?

I'd prefer to get rid of VLA's altogether. Which is why I added
-Werror=vla in https://gcc.gnu.org/ml/fortran/2014-11/msg00052.html .
In that message I also helpfully point out that avoiding VLA's can
help limited targets like nvptx that lack VLA's.. :)

VLA's don't come for free. Functions using VLA's require an extra
register for the frame pointer, and AFAIK can't be inlined. Also,
calculating variable offsets might not be possible at compile time as
the offset from the stack/frame pointer might not be known statically.
diff mbox

Patch

Index: acinclude.m4
===================================================================
--- acinclude.m4	(Revision 247566)
+++ acinclude.m4	(Arbeitskopie)
@@ -452,3 +452,20 @@  AC_DEFUN([LIBGFOR_CHECK_AVX512F], [
 	[])
   CFLAGS="$ac_save_CFLAGS"
 ])
+
+dnl Check if VLAs work
+
+AC_DEFUN([LIBGFOR_CHECK_VLA], [
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-Wno-vla"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+	void foo(int n)
+	{
+	int a[n];
+        }]], [[]])],
+	AC_DEFINE(HAVE_VLA, 1,
+	[Define if VLAs can be compiled]),
+	[])
+  CFLAGS="$ac_save_CFLAGS"
+])
+
Index: config.h.in
===================================================================
--- config.h.in	(Revision 247566)
+++ config.h.in	(Arbeitskopie)
@@ -807,6 +807,9 @@ 
 /* Define to 1 if you have the `uselocale' function. */
 #undef HAVE_USELOCALE
 
+/* Define if VLAs can be compiled */
+#undef HAVE_VLA
+
 /* Define to 1 if you have the `vsnprintf' function. */
 #undef HAVE_VSNPRINTF
 
Index: configure
===================================================================
--- configure	(Revision 247566)
+++ configure	(Arbeitskopie)
@@ -26363,6 +26363,34 @@  rm -f core conftest.err conftest.$ac_objext confte
   CFLAGS="$ac_save_CFLAGS"
 
 
+# Check wether VLAs work
+
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-Wno-vla"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+	void foo(int n)
+	{
+	int a[n];
+        }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_VLA 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS="$ac_save_CFLAGS"
+
+
 # Determine what GCC version number to use in filesystem paths.
 
   get_gcc_base_ver="cat"
Index: configure.ac
===================================================================
--- configure.ac	(Revision 247566)
+++ configure.ac	(Arbeitskopie)
@@ -624,6 +624,9 @@  LIBGFOR_CHECK_AVX2
 # Check wether we support AVX512f extensions
 LIBGFOR_CHECK_AVX512F
 
+# Check wether VLAs work
+LIBGFOR_CHECK_VLA
+
 # Determine what GCC version number to use in filesystem paths.
 GCC_BASE_VER
 
Index: generated/matmul_c10.c
===================================================================
--- generated/matmul_c10.c	(Revision 247753)
+++ generated/matmul_c10.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_c10_avx (gfc_array_c10 * const restrict ret
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_c10_avx (gfc_array_c10 * const restrict ret
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_c10_avx2 (gfc_array_c10 * const restrict re
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_c10_avx2 (gfc_array_c10 * const restrict re
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_c10_avx512f (gfc_array_c10 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_c10_avx512f (gfc_array_c10 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_c10_vanilla (gfc_array_c10 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_c10_vanilla (gfc_array_c10 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_c10 (gfc_array_c10 * const restrict retarra
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_c10 (gfc_array_c10 * const restrict retarra
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_c16.c
===================================================================
--- generated/matmul_c16.c	(Revision 247753)
+++ generated/matmul_c16.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_c16_avx (gfc_array_c16 * const restrict ret
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_c16_avx (gfc_array_c16 * const restrict ret
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_c16_avx2 (gfc_array_c16 * const restrict re
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_c16_avx2 (gfc_array_c16 * const restrict re
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_c16_avx512f (gfc_array_c16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_c16_avx512f (gfc_array_c16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_c16_vanilla (gfc_array_c16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_c16_vanilla (gfc_array_c16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_c16 (gfc_array_c16 * const restrict retarra
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_c16 (gfc_array_c16 * const restrict retarra
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_c4.c
===================================================================
--- generated/matmul_c4.c	(Revision 247753)
+++ generated/matmul_c4.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_c4_avx (gfc_array_c4 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_c4_avx (gfc_array_c4 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_c4_avx2 (gfc_array_c4 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_c4_avx2 (gfc_array_c4 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_c4_avx512f (gfc_array_c4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_c4_avx512f (gfc_array_c4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_c4_vanilla (gfc_array_c4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_c4_vanilla (gfc_array_c4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_c4 (gfc_array_c4 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_c4 (gfc_array_c4 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_c8.c
===================================================================
--- generated/matmul_c8.c	(Revision 247753)
+++ generated/matmul_c8.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_c8_avx (gfc_array_c8 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_c8_avx (gfc_array_c8 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_c8_avx2 (gfc_array_c8 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_c8_avx2 (gfc_array_c8 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_c8_avx512f (gfc_array_c8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_c8_avx512f (gfc_array_c8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_c8_vanilla (gfc_array_c8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_c8_vanilla (gfc_array_c8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_c8 (gfc_array_c8 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_COMPLEX_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_c8 (gfc_array_c8 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_i1.c
===================================================================
--- generated/matmul_i1.c	(Revision 247753)
+++ generated/matmul_i1.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_i1_avx (gfc_array_i1 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_1 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_i1_avx (gfc_array_i1 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_i1_avx2 (gfc_array_i1 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_1 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_i1_avx2 (gfc_array_i1 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_i1_avx512f (gfc_array_i1 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_1 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_i1_avx512f (gfc_array_i1 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_i1_vanilla (gfc_array_i1 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_1 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_i1_vanilla (gfc_array_i1 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_i1 (gfc_array_i1 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_1 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_i1 (gfc_array_i1 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_i16.c
===================================================================
--- generated/matmul_i16.c	(Revision 247753)
+++ generated/matmul_i16.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_i16_avx (gfc_array_i16 * const restrict ret
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_i16_avx (gfc_array_i16 * const restrict ret
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_i16_avx2 (gfc_array_i16 * const restrict re
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_i16_avx2 (gfc_array_i16 * const restrict re
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_i16_avx512f (gfc_array_i16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_i16_avx512f (gfc_array_i16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_i16_vanilla (gfc_array_i16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_i16_vanilla (gfc_array_i16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_i16 (gfc_array_i16 * const restrict retarra
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_i16 (gfc_array_i16 * const restrict retarra
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_i2.c
===================================================================
--- generated/matmul_i2.c	(Revision 247753)
+++ generated/matmul_i2.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_i2_avx (gfc_array_i2 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_2 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_i2_avx (gfc_array_i2 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_i2_avx2 (gfc_array_i2 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_2 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_i2_avx2 (gfc_array_i2 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_i2_avx512f (gfc_array_i2 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_2 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_i2_avx512f (gfc_array_i2 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_i2_vanilla (gfc_array_i2 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_2 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_i2_vanilla (gfc_array_i2 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_i2 (gfc_array_i2 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_2 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_i2 (gfc_array_i2 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_i4.c
===================================================================
--- generated/matmul_i4.c	(Revision 247753)
+++ generated/matmul_i4.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_i4_avx (gfc_array_i4 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_i4_avx (gfc_array_i4 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_i4_avx2 (gfc_array_i4 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_i4_avx2 (gfc_array_i4 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_i4_avx512f (gfc_array_i4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_i4_avx512f (gfc_array_i4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_i4_vanilla (gfc_array_i4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_i4_vanilla (gfc_array_i4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_i4 (gfc_array_i4 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_i4 (gfc_array_i4 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_i8.c
===================================================================
--- generated/matmul_i8.c	(Revision 247753)
+++ generated/matmul_i8.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_i8_avx (gfc_array_i8 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_i8_avx (gfc_array_i8 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_i8_avx2 (gfc_array_i8 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_i8_avx2 (gfc_array_i8 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_i8_avx512f (gfc_array_i8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_i8_avx512f (gfc_array_i8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_i8_vanilla (gfc_array_i8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_i8_vanilla (gfc_array_i8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_i8 (gfc_array_i8 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_INTEGER_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_i8 (gfc_array_i8 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_r10.c
===================================================================
--- generated/matmul_r10.c	(Revision 247753)
+++ generated/matmul_r10.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_r10_avx (gfc_array_r10 * const restrict ret
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_r10_avx (gfc_array_r10 * const restrict ret
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_r10_avx2 (gfc_array_r10 * const restrict re
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_r10_avx2 (gfc_array_r10 * const restrict re
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_r10_avx512f (gfc_array_r10 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_r10_avx512f (gfc_array_r10 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_r10_vanilla (gfc_array_r10 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_r10_vanilla (gfc_array_r10 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_r10 (gfc_array_r10 * const restrict retarra
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_10 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_10 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_r10 (gfc_array_r10 * const restrict retarra
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_r16.c
===================================================================
--- generated/matmul_r16.c	(Revision 247753)
+++ generated/matmul_r16.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_r16_avx (gfc_array_r16 * const restrict ret
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_r16_avx (gfc_array_r16 * const restrict ret
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_r16_avx2 (gfc_array_r16 * const restrict re
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_r16_avx2 (gfc_array_r16 * const restrict re
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_r16_avx512f (gfc_array_r16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_r16_avx512f (gfc_array_r16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_r16_vanilla (gfc_array_r16 * const restrict
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_r16_vanilla (gfc_array_r16 * const restrict
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_r16 (gfc_array_r16 * const restrict retarra
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_16 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_16 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_r16 (gfc_array_r16 * const restrict retarra
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_r4.c
===================================================================
--- generated/matmul_r4.c	(Revision 247753)
+++ generated/matmul_r4.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_r4_avx (gfc_array_r4 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_r4_avx (gfc_array_r4 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_r4_avx2 (gfc_array_r4 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_r4_avx2 (gfc_array_r4 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_r4_avx512f (gfc_array_r4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_r4_avx512f (gfc_array_r4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_r4_vanilla (gfc_array_r4 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_r4_vanilla (gfc_array_r4 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_r4 (gfc_array_r4 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_4 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_4 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_r4 (gfc_array_r4 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: generated/matmul_r8.c
===================================================================
--- generated/matmul_r8.c	(Revision 247753)
+++ generated/matmul_r8.c	(Arbeitskopie)
@@ -316,11 +316,15 @@  matmul_r8_avx (gfc_array_r8 * const restrict retar
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -535,6 +539,9 @@  matmul_r8_avx (gfc_array_r8 * const restrict retar
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -869,11 +876,15 @@  matmul_r8_avx2 (gfc_array_r8 * const restrict reta
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1088,6 +1099,9 @@  matmul_r8_avx2 (gfc_array_r8 * const restrict reta
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1422,11 +1436,15 @@  matmul_r8_avx512f (gfc_array_r8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -1641,6 +1659,9 @@  matmul_r8_avx512f (gfc_array_r8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -1971,11 +1992,15 @@  matmul_r8_vanilla (gfc_array_r8 * const restrict r
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2190,6 +2215,9 @@  matmul_r8_vanilla (gfc_array_r8 * const restrict r
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
@@ -2578,11 +2606,15 @@  matmul_r8 (gfc_array_r8 * const restrict retarray,
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       GFC_REAL_8 t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      GFC_REAL_8 *t1;
+      t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -2797,6 +2829,9 @@  matmul_r8 (gfc_array_r8 * const restrict retarray,
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)
Index: m4/matmul_internal.m4
===================================================================
--- m4/matmul_internal.m4	(Revision 247753)
+++ m4/matmul_internal.m4	(Arbeitskopie)
@@ -232,11 +232,15 @@  sinclude(`matmul_asm_'rtype_code`.m4')dnl
       if (t1_dim > 65536)
 	t1_dim = 65536;
 
+#ifdef HAVE_VLA
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wvla"
       'rtype_name` t1[t1_dim]; /* was [256][256] */
 #pragma GCC diagnostic pop
-
+#else
+      'rtype_name` *t1;
+      t1 = malloc (t1_dim * sizeof('rtype_name`));
+#endif
       /* Empty c first.  */
       for (j=1; j<=n; j++)
 	for (i=1; i<=m; i++)
@@ -451,6 +455,9 @@  sinclude(`matmul_asm_'rtype_code`.m4')dnl
 		}
 	    }
 	}
+#ifndef HAVE_VLA
+      free(t1);
+#endif
       return;
     }
   else if (rxstride == 1 && aystride == 1 && bxstride == 1)