===================================================================
@@ -233,8 +233,13 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_c10 (gfc_array_c10 * const restrict retarra
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_c16 (gfc_array_c16 * const restrict retarra
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarra
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_r10 (gfc_array_r10 * const restrict retarra
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_r16 (gfc_array_r16 * const restrict retarra
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -317,8 +317,13 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -869,8 +874,13 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1421,8 +1431,13 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -1987,8 +2002,13 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -2613,8 +2633,13 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const rest
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
===================================================================
@@ -282,8 +282,13 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
@@ -835,8 +840,13 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restri
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;