new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=128" } */
+/* Disabling epilogues until we find a better way to deal with scans. */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { scan-assembler-not "maskmov" } } */
+
+#include "mask-pack.c"
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=256" } */
+/* Disabling epilogues until we find a better way to deal with scans. */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { scan-assembler-not "maskmov" } } */
+
+#include "mask-pack.c"
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=cascadelake -O3 -fdump-tree-vect-details -mprefer-vector-width=128" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
+typedef unsigned char uint8_t;
+
+static uint8_t x264_clip_uint8 (int x)
+{
+ return x & (~255) ? (-x) >> 31 : x;
+}
+
+void
+mc_weight (uint8_t* __restrict dst, uint8_t* __restrict src,
+ int i_width,int i_scale)
+{
+ for(int x = 0; x < i_width; x++)
+ dst[x] = x264_clip_uint8 (src[x] * i_scale);
+}
@@ -12124,6 +12124,7 @@ supportable_narrowing_operation (enum tree_code code,
tree intermediate_type, prev_type;
machine_mode intermediate_mode, prev_mode;
int i;
+ unsigned HOST_WIDE_INT n_elts;
bool uns;
*multi_step_cvt = 0;
@@ -12133,8 +12134,9 @@ supportable_narrowing_operation (enum tree_code code,
c1 = VEC_PACK_TRUNC_EXPR;
if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
&& VECTOR_BOOLEAN_TYPE_P (vectype)
- && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
- && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
+ && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
+ && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
+ && n_elts < BITS_PER_UNIT)
optab1 = vec_pack_sbool_trunc_optab;
else
optab1 = optab_for_tree_code (c1, vectype, optab_default);
@@ -12225,8 +12227,9 @@ supportable_narrowing_operation (enum tree_code code,
= lang_hooks.types.type_for_mode (intermediate_mode, uns);
if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
&& VECTOR_BOOLEAN_TYPE_P (prev_type)
- && intermediate_mode == prev_mode
- && SCALAR_INT_MODE_P (prev_mode))
+ && SCALAR_INT_MODE_P (prev_mode)
+ && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
+ && n_elts < BITS_PER_UNIT)
interm_optab = vec_pack_sbool_trunc_optab;
else
interm_optab