new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will insert the clone for the function ef_add inside the function
+ * main (the non-masked version).
+ */
+
+#include <stdlib.h>
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (pentium_4), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y);
+
+My_Type vhx2[10];
+int
+main (int argc, char **argv)
+{
+ My_Type vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ abort ();
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 2 clones of the function below,
+ * for the pentium4 with sse3 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (pentium_4_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clones of the function below, just mask
+ * for the pentium4 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), mask, processor (pentium_4_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clones of the function below, just no mask
+ * for the pentium4 with sse3 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 1 clone of the function below, just one for mask
+ * for the pentium4 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), mask, processor (pentium_4), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 1 clone of the function below, just one for nomask
+ * for the pentium4 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), nomask, processor (pentium_4), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 2 clones of the function below,
+ * for the pentium4 processor.
+ */
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (pentium_4), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 2 clones of the function below, one for mask and one
+ without the mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ if (x > 0)
+ return x + y;
+ else
+ return (x-y);
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 2 clones of the function below, one for mask and one
+ without the mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ if (x > 0) {
+ if (y > 0) {
+ return x + y;
+ }
+ else
+ return (x-y);
+ }
+ else
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 2 clones of the function below, one for mask and one
+ without the mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ switch ((int)x) {
+ case 0:
+ case 1:
+ x = x + 1;
+ return x+y;
+ case 2:
+ y = y + 2;
+ return x-y;
+ default:
+ return 0;
+ }
+ return 0;
+
+}
+
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will insert the clone for the function ef_add inside the function
+ * main (the non-masked version).
+ */
+
+#include <stdlib.h>
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y)))) My_Type ef_add (My_Type x, My_Type y);
+
+My_Type vhx2[10];
+int
+main (int argc, char **argv)
+{
+ My_Type vhx[10];
+ int ii = 9;
+
+ if (argc == 1)
+ for (ii = 0; ii < 10; ii++)
+ vhx[ii] = argc;
+
+ for (ii = 0; ii < 10; ii++)
+ vhx2[ii] = ef_add(vhx[ii], vhx[ii]);
+
+ for (ii = 0; ii < 10; ii++)
+ if (vhx2[ii] != (argc + argc))
+ abort ();
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 2 clones of the function below,
+ * for the pentium4 with sse3 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core2_duo_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 1 clones of the function below, just the mask
+ * for the pentium4 with sse3 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(mask, vectorlength(4), processor (core2_duo_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 1 clones of the function below, just the mask
+ * for the pentium4 with sse3 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(nomask, vectorlength(4), processor (core2_duo_sse3), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 2 clones of the function below,
+ * for the pentium4 with sse3 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_2_duo_sse_4_1), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 2 clones of the function below,
+ * for the core2_duo with sse 4.1 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_2_duo_sse_4_1), linear(y), uniform (x), nomask)))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clones of the function below, just mask
+ * for the core2_duo with sse 4.1 processor.
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_2_duo_sse_4_1), linear(y), mask, uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 2 clones of the function below, one for mask and one
+ without the mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+
+/* { dg-final { scan-assembler "simdsimd" } } */
+/* This test will create 1 clone of the function below, just one for mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), mask, processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clone of the function below, just one for nomask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), nomask, processor (core_i7_sse4_2), uniform (x,y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clone of the function below, just one for nomask and
+ do a linear for y variable
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), nomask, processor (core_i7_sse4_2), uniform (x), linear(y))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { scan-assembler "simdsimd" } } */
+
+/* This test will create 1 clone of the function below, just one for mask
+ */
+
+#define My_Type float
+__attribute__ ((vector(vectorlength(4), mask, processor (core_i7_sse4_2), linear(y), uniform (x))))
+My_Type ef_add (My_Type x, My_Type y)
+
+{
+ return x + y;
+}
+
new file mode 100644
@@ -0,0 +1,50 @@
+# Copyright (C) 2012
+# Free Software Foundation, Inc.
+
+# Contributed by Balaji V. Iyer <balaji.v.iyer@intel.com>
+# Intel Corporation.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+
+# Exit immediately if this isn't a x86 target.
+if { ![istarget i?86*-*-*] && ![istarget x86_64-*-*] } then {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+set tests_32bit [lsort [glob -nocomplain $srcdir/$subdir/32bit/*.\[cS\]]]
+set tests_64bit [lsort [glob -nocomplain $srcdir/$subdir/64bit/*.\[cS\]]]
+
+if { [istarget i?86*-*-*] } then {
+
+# Main loop.
+dg-runtest $tests_32bit " -O3 -ftree-vectorize -fcilkplus" " "
+# All done.
+dg-finish
+}
+
+# For 64 bit architectures, we can run both 32 bit and 64 bit tests.
+if { [istarget x86_64-*-*] } then {
+
+# Main loop.
+dg-runtest $tests_32bit "-m32 -O3 -ftree-vectorize -fcilkplus" " "
+# All done.
+dg-finish
+# Main loop.
+dg-runtest $tests_64bit " -O3 -ftree-vectorize -fcilkplus" " "
+# All done.
+dg-finish
+}