diff mbox

Vectorizer RFC patch

Message ID 4D125050.80404@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt Dec. 22, 2010, 7:24 p.m. UTC
I'm working on a new target which has some suport for (mostly) V2HI
vectors. In particular, I'd like to get gcc to generate dotv2hi patterns
from

int mac(const short *a, const short *b, int sqr, int *sum)
{
 int i;
 int dotp = *sum;

 for (i = 0; i < 150; i++) {
  dotp += b[i] * a[i];
  sqr += b[i] * b[i];
 }

 *sum = dotp;
 return sqr;
}

The problem is that this involves an int value, and the vectorizer gives
up because it obviously can't generate a vector out of SImode ints when
UNITS_PER_SIMD_WORD is 4.

I'm playing with the patch below (based on a 4.5 tree), which allows the
vectorizer to treat scalar types as one-element vectors. It uses some
new inline functions in tree.h rather than direct accesses to TREE_TYPE
and TYPE_VECTOR_SUBPARTS, and makes adjustments to some functions such
as build_vector.

This gives me the code generation I want on this loop. Since I'm not all
that familiar with the vectorizer I thought I'd post the patch here for
comments. Does this look like a reasonable solution, or should I be
looking at something else?


Bernd

Comments

Richard Biener Dec. 23, 2010, 1 a.m. UTC | #1
On Wed, Dec 22, 2010 at 8:24 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> I'm working on a new target which has some suport for (mostly) V2HI
> vectors. In particular, I'd like to get gcc to generate dotv2hi patterns
> from
>
> int mac(const short *a, const short *b, int sqr, int *sum)
> {
>  int i;
>  int dotp = *sum;
>
>  for (i = 0; i < 150; i++) {
>  dotp += b[i] * a[i];
>  sqr += b[i] * b[i];
>  }
>
>  *sum = dotp;
>  return sqr;
> }
>
> The problem is that this involves an int value, and the vectorizer gives
> up because it obviously can't generate a vector out of SImode ints when
> UNITS_PER_SIMD_WORD is 4.
>
> I'm playing with the patch below (based on a 4.5 tree), which allows the
> vectorizer to treat scalar types as one-element vectors. It uses some
> new inline functions in tree.h rather than direct accesses to TREE_TYPE
> and TYPE_VECTOR_SUBPARTS, and makes adjustments to some functions such
> as build_vector.
>
> This gives me the code generation I want on this loop. Since I'm not all
> that familiar with the vectorizer I thought I'd post the patch here for
> comments. Does this look like a reasonable solution, or should I be
> looking at something else?

Hm, what problem are you trying to fix?  Can you expand on what dotv2hi is?
The C code is dotp = dotp + (int)b[i] * (int)a[i], so there must be
some widening
in that pattern.

That said - I do not like the patch too much.  If you really need single element
vectors then create single element vectors.

Richard.

>
> Bernd
>
diff mbox

Patch

Index: tree.c
===================================================================
--- tree.c	(revision 308952)
+++ tree.c	(working copy)
@@ -1271,10 +1271,18 @@  cst_and_fits_in_hwi (const_tree x)
 tree
 build_vector (tree type, tree vals)
 {
-  tree v = make_node (VECTOR_CST);
+  tree v;
   int over = 0;
   tree link;
 
+  if (TREE_CODE (type) != VECTOR_TYPE)
+    {
+      tree value = TREE_VALUE (vals);
+      gcc_assert (TREE_CHAIN (vals) == NULL_TREE);
+      return fold_convert (type, value);
+    }
+  
+  v = make_node (VECTOR_CST);
   TREE_VECTOR_CST_ELTS (v) = vals;
   TREE_TYPE (v) = type;
 
Index: tree.h
===================================================================
--- tree.h	(revision 308952)
+++ tree.h	(working copy)
@@ -5462,4 +5462,20 @@  is_lang_specific (tree t)
   return TREE_CODE (t) == LANG_TYPE || TREE_CODE (t) >= NUM_TREE_CODES;
 }
 
+static inline int
+type_vector_subparts (tree type)
+{
+  if (TREE_CODE (type) != VECTOR_TYPE)
+    return 1;
+  return TYPE_VECTOR_SUBPARTS (type);
+}
+
+static inline tree
+type_vector_subtype (tree type)
+{
+  if (TREE_CODE (type) != VECTOR_TYPE)
+    return type;
+  return TREE_TYPE (type);
+}
+
 #endif  /* GCC_TREE_H  */
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c	(revision 308952)
+++ tree-vect-loop-manip.c	(working copy)
@@ -1958,7 +1958,7 @@  vect_do_peeling_for_loop_bound (loop_vec
    may not hold when there are multiple-types in the loop.
    In this case, for some data-references in the loop the VF does not represent
    the number of elements that fit in the vector.  Therefore, instead of VF we
-   use TYPE_VECTOR_SUBPARTS.  */
+   use type_vector_subparts.  */
 
 static tree
 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
@@ -1978,7 +1978,7 @@  vect_gen_niters_for_prolog_loop (loop_ve
   tree niters_type = TREE_TYPE (loop_niters);
   int step = 1;
   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
-  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+  int nelements = type_vector_subparts (vectype);
 
   if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 308952)
+++ tree-vectorizer.h	(working copy)
@@ -718,6 +718,18 @@  vect_pow2 (int x)
   return res;
 }
 
+static inline tree
+vect_build_constructor (tree type, tree vals)
+{
+  tree value;
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    return build_constructor_from_list (type, vals);
+   
+  value = TREE_VALUE (vals);
+  gcc_assert (TREE_CHAIN (vals) == NULL_TREE);
+  return fold_convert (type, value);
+}
+
 /*-----------------------------------------------------------------*/
 /* Info on data references alignment.                              */
 /*-----------------------------------------------------------------*/
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 308952)
+++ tree-vect-loop.c	(working copy)
@@ -228,7 +228,7 @@  vect_determine_vectorization_factor (loo
 		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
 		}
 
-	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
+	      nunits = type_vector_subparts (vectype);
 	      if (vect_print_dump_info (REPORT_DETAILS))
 		fprintf (vect_dump, "nunits = %d", nunits);
 
@@ -322,7 +322,7 @@  vect_determine_vectorization_factor (loo
 	      print_generic_expr (vect_dump, vectype, TDF_SLIM);
 	    }
 
-	  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+	  nunits = type_vector_subparts (vectype);
 	  if (vect_print_dump_info (REPORT_DETAILS))
 	    fprintf (vect_dump, "nunits = %d", nunits);
 
@@ -336,7 +336,7 @@  vect_determine_vectorization_factor (loo
   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
-  if (vectorization_factor <= 1)
+  if (vectorization_factor < 1)
     {
       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: unsupported data-type");
@@ -2027,7 +2027,7 @@  vect_estimate_min_profitable_iters (loop
 	  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 	  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
 	  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
-	  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+	  int nelements = type_vector_subparts (vectype);
 
 	  peel_iters_prologue = nelements - (byte_misalign / element_size);
 	}
@@ -2395,7 +2395,7 @@  get_initial_def_for_induction (gimple iv
 
   vectype = get_vectype_for_scalar_type (scalar_type);
   gcc_assert (vectype);
-  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  nunits = type_vector_subparts (vectype);
   ncopies = vf / nunits;
 
   gcc_assert (phi_info);
@@ -2478,7 +2478,7 @@  get_initial_def_for_induction (gimple iv
 	  t = tree_cons (NULL_TREE, new_name, t);
 	}
       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
-      vec = build_constructor_from_list (vectype, nreverse (t));
+      vec = vect_build_constructor (vectype, nreverse (t));
       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
     }
 
@@ -2691,7 +2691,7 @@  get_initial_def_for_reduction (gimple st
   gimple def_stmt = NULL;
 
   gcc_assert (vectype);
-  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  nunits = type_vector_subparts (vectype);
 
   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
 	      || SCALAR_FLOAT_TYPE_P (scalar_type));
@@ -2779,7 +2779,7 @@  get_initial_def_for_reduction (gimple st
         if (TREE_CONSTANT (init_val))
           init_def = build_vector (vectype, t);
         else
-          init_def = build_constructor_from_list (vectype, t);
+          init_def = vect_build_constructor (vectype, t);
 
         break;
 
@@ -2799,7 +2799,7 @@  get_initial_def_for_reduction (gimple st
         if (TREE_CONSTANT (init_val))
           init_def = build_vector (vectype, t);
         else
-          init_def = build_constructor_from_list (vectype, t);
+          init_def = vect_build_constructor (vectype, t);
 
         break;
 
@@ -3221,7 +3221,7 @@  vect_create_epilog_for_reduction (tree v
 
       if (BYTES_BIG_ENDIAN)
 	bitpos = size_binop (MULT_EXPR,
-		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
+		       bitsize_int (type_vector_subparts (vectype) - 1),
 		       TYPE_SIZE (scalar_type));
       else
 	bitpos = bitsize_zero_node;
@@ -3467,7 +3467,7 @@  vectorizable_reduction (gimple stmt, gim
   stmt_vec_info orig_stmt_info;
   tree expr = NULL_TREE;
   int i;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits = type_vector_subparts (vectype);
   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
   int epilog_copies;
   stmt_vec_info prev_stmt_info, prev_phi_info;
@@ -4000,7 +4000,7 @@  vectorizable_induction (gimple phi, gimp
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits = type_vector_subparts (vectype);
   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
   tree vec_def;
 
@@ -4246,7 +4246,7 @@  vect_transform_loop (loop_vec_info loop_
 	      && !STMT_VINFO_LIVE_P (stmt_info))
 	    continue;
 
-	  if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
+	  if ((type_vector_subparts (STMT_VINFO_VECTYPE (stmt_info))
 	        != (unsigned HOST_WIDE_INT) vectorization_factor)
 	      && vect_print_dump_info (REPORT_DETAILS))
 	    fprintf (vect_dump, "multiple-types.");
@@ -4293,7 +4293,7 @@  vect_transform_loop (loop_vec_info loop_
 
 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
 	  nunits =
-	    (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+	    (unsigned int) type_vector_subparts (STMT_VINFO_VECTYPE (stmt_info));
 	  if (!STMT_SLP_TYPE (stmt_info)
 	      && nunits != (unsigned int) vectorization_factor
               && vect_print_dump_info (REPORT_DETAILS))
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 308952)
+++ tree-vect-stmts.c	(working copy)
@@ -890,7 +890,7 @@  vect_get_vec_def_for_operand (tree op, g
   stmt_vec_info def_stmt_info = NULL;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
-  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nunits = type_vector_subparts (vectype);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
   tree vec_inv;
   tree vec_cst;
@@ -952,7 +952,7 @@  vect_get_vec_def_for_operand (tree op, g
       {
 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
 	gcc_assert (vector_type);
-	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
+	nunits = type_vector_subparts (vector_type);
 
 	if (scalar_def)
 	  *scalar_def = def;
@@ -966,8 +966,7 @@  vect_get_vec_def_for_operand (tree op, g
             t = tree_cons (NULL_TREE, def, t);
           }
 
-	/* FIXME: use build_constructor directly.  */
-        vec_inv = build_constructor_from_list (vector_type, t);
+        vec_inv = vect_build_constructor (vector_type, t);
         return vect_init_vector (stmt, vec_inv, vector_type, NULL);
       }
 
@@ -1287,13 +1286,13 @@  vectorizable_call (gimple stmt, gimple_s
   vectype_in = get_vectype_for_scalar_type (rhs_type);
   if (!vectype_in)
     return false;
-  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+  nunits_in = type_vector_subparts (vectype_in);
 
   lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
   vectype_out = get_vectype_for_scalar_type (lhs_type);
   if (!vectype_out)
     return false;
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+  nunits_out = type_vector_subparts (vectype_out);
 
   /* FORNOW */
   if (nunits_in == nunits_out / 2)
@@ -1583,14 +1582,14 @@  vectorizable_conversion (gimple stmt, gi
   vectype_in = get_vectype_for_scalar_type (rhs_type);
   if (!vectype_in)
     return false;
-  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+  nunits_in = type_vector_subparts (vectype_in);
 
   scalar_dest = gimple_assign_lhs (stmt);
   lhs_type = TREE_TYPE (scalar_dest);
   vectype_out = get_vectype_for_scalar_type (lhs_type);
   if (!vectype_out)
     return false;
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+  nunits_out = type_vector_subparts (vectype_out);
 
   /* FORNOW */
   if (nunits_in == nunits_out / 2)
@@ -1805,7 +1804,7 @@  vectorizable_assignment (gimple stmt, gi
   tree def;
   gimple def_stmt;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
-  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nunits = type_vector_subparts (vectype);
   int ncopies;
   int i, j;
   VEC(tree,heap) *vec_oprnds = NULL;
@@ -1863,8 +1862,8 @@  vectorizable_assignment (gimple stmt, gi
   if (CONVERT_EXPR_CODE_P (code)
       && (!vectype_in
 	  || !vectype_out      
-	  || (TYPE_VECTOR_SUBPARTS (vectype_out)
-	      != TYPE_VECTOR_SUBPARTS (vectype_in))
+	  || (type_vector_subparts (vectype_out)
+	      != type_vector_subparts (vectype_in))
 	  || (GET_MODE_SIZE (TYPE_MODE (vectype_out))
 	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
     return false;
@@ -1952,7 +1951,7 @@  vectorizable_operation (gimple stmt, gim
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   gimple new_stmt = NULL;
   stmt_vec_info prev_stmt_info;
-  int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits_in = type_vector_subparts (vectype);
   int nunits_out;
   tree vectype_out;
   int ncopies;
@@ -1996,7 +1995,7 @@  vectorizable_operation (gimple stmt, gim
   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
   if (!vectype_out)
     return false;
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+  nunits_out = type_vector_subparts (vectype_out);
   if (nunits_out != nunits_in)
     return false;
 
@@ -2078,7 +2077,7 @@  vectorizable_operation (gimple stmt, gim
 		     so make sure the scalar is the right type if we are
 		     dealing with vectors of short/char.  */
 		  if (dt[1] == vect_constant_def)
-		    op1 = fold_convert (TREE_TYPE (vectype), op1);
+		    op1 = fold_convert (type_vector_subtype (vectype), op1);
 		}
 	    }
 	}
@@ -2447,13 +2446,13 @@  vectorizable_type_demotion (gimple stmt,
   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
   if (!vectype_in)
     return false;
-  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+  nunits_in = type_vector_subparts (vectype_in);
 
   scalar_dest = gimple_assign_lhs (stmt);
   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
   if (!vectype_out)
     return false;
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+  nunits_out = type_vector_subparts (vectype_out);
   if (nunits_in >= nunits_out)
     return false;
 
@@ -2717,14 +2716,14 @@  vectorizable_type_promotion (gimple stmt
   vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
   if (!vectype_in)
     return false;
-  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+  nunits_in = type_vector_subparts (vectype_in);
 
   scalar_dest = gimple_assign_lhs (stmt);
   vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
   if (!vectype_out)
     return false;
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
-  if (nunits_in <= nunits_out)
+  nunits_out = type_vector_subparts (vectype_out);
+  if (nunits_in < nunits_out)
     return false;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
@@ -2910,7 +2909,7 @@  vectorizable_store (gimple stmt, gimple_
   enum vect_def_type dt;
   stmt_vec_info prev_stmt_info = NULL;
   tree dataref_ptr = NULL_TREE;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits = type_vector_subparts (vectype);
   int ncopies;
   int j;
   gimple next_stmt, first_stmt = NULL;
@@ -2974,7 +2973,8 @@  vectorizable_store (gimple stmt, gimple_
 
   /* The scalar rhs type needs to be trivially convertible to the vector
      component type.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
+  if (!useless_type_conversion_p (type_vector_subtype (vectype),
+				  TREE_TYPE (op)))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3283,7 +3283,7 @@  vectorizable_load (gimple stmt, gimple_s
   enum dr_alignment_support alignment_support_scheme;
   tree dataref_ptr = NULL_TREE;
   gimple ptr_incr;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits = type_vector_subparts (vectype);
   int ncopies;
   int i, j, group_size;
   tree msq = NULL_TREE, lsq;
@@ -3370,7 +3370,8 @@  vectorizable_load (gimple stmt, gimple_s
 
   /* The vector component type needs to be trivially convertible to the
      scalar lhs.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
+  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest),
+				  type_vector_subtype (vectype)))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3558,7 +3559,7 @@  vectorizable_load (gimple stmt, gimple_s
       if (alignment_support_scheme == dr_explicit_realign_optimized)
 	{
 	  phi = SSA_NAME_DEF_STMT (msq);
-	  offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+	  offset = size_int (type_vector_subparts (vectype) - 1);
 	}
     }
   else
@@ -3603,7 +3604,7 @@  vectorizable_load (gimple stmt, gimple_s
 	    case dr_explicit_realign:
 	      {
 		tree ptr, bump;
-		tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+		tree vs_minus_1 = size_int (type_vector_subparts (vectype) - 1);
 
 		if (compute_in_loop)
 		  msq = vect_setup_realignment (first_stmt, gsi,
@@ -3696,8 +3697,7 @@  vectorizable_load (gimple stmt, gimple_s
 
 		  for (k = nunits - 1; k >= 0; --k)
 		    t = tree_cons (NULL_TREE, new_temp, t);
-		  /* FIXME: use build_constructor directly.  */
-		  vec_inv = build_constructor_from_list (vectype, t);
+		  vec_inv = vect_build_constructor (vectype, t);
 		  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
 		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
 		}
@@ -3832,7 +3832,7 @@  vectorizable_condition (gimple stmt, gim
   enum machine_mode vec_mode;
   tree def;
   enum vect_def_type dt;
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  int nunits = type_vector_subparts (vectype);
   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
   enum tree_code code;
 
@@ -3884,7 +3884,7 @@  vectorizable_condition (gimple stmt, gim
   /* We do not handle two different vector types for the condition
      and the values.  */
   if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
-			   TREE_TYPE (vectype)))
+			   type_vector_subtype (vectype)))
     return false;
 
   if (TREE_CODE (then_clause) == SSA_NAME)
@@ -4428,6 +4428,9 @@  get_vectype_for_scalar_type (tree scalar
   int nunits;
   tree vectype;
 
+  if (nbytes == UNITS_PER_SIMD_WORD (inner_mode))
+    return scalar_type;
+
   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
     return NULL_TREE;
 
Index: tree-vect-generic.c
===================================================================
--- tree-vect-generic.c	(revision 308952)
+++ tree-vect-generic.c	(working copy)
@@ -205,7 +205,7 @@  expand_vector_piecewise (gimple_stmt_ite
   VEC(constructor_elt,gc) *v;
   tree part_width = TYPE_SIZE (inner_type);
   tree index = bitsize_int (0);
-  int nunits = TYPE_VECTOR_SUBPARTS (type);
+  int nunits = type_vector_subparts (type);
   int delta = tree_low_cst (part_width, 1)
 	      / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
   int i;
@@ -278,7 +278,7 @@  expand_vector_addition (gimple_stmt_iter
 
   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
       && parts_per_word >= 4
-      && TYPE_VECTOR_SUBPARTS (type) >= 4)
+      && type_vector_subparts (type) >= 4)
     return expand_vector_parallel (gsi, f_parallel,
 				   type, a, b, code);
   else
@@ -483,8 +483,8 @@  expand_vector_operations_1 (gimple_stmt_
         = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
 				       TYPE_SATURATING (TREE_TYPE (type)));
       if (vector_compute_type != NULL_TREE
-	  && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
-	      < TYPE_VECTOR_SUBPARTS (compute_type)))
+	  && (type_vector_subparts (vector_compute_type)
+	      < type_vector_subparts (compute_type)))
 	compute_type = vector_compute_type;
     }
 
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 308952)
+++ tree-vect-slp.c	(working copy)
@@ -368,7 +368,7 @@  vect_build_slp_tree (loop_vec_info loop_
           return false;
         }
 
-      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
+      ncopies = vectorization_factor / type_vector_subparts (vectype);
       if (ncopies != 1)
         {
 	  if (vect_print_dump_info (REPORT_SLP))
@@ -380,8 +380,8 @@  vect_build_slp_tree (loop_vec_info loop_
         }
 
       /* In case of multiple types we need to detect the smallest type.  */
-      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
-        *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+      if (*max_nunits < type_vector_subparts (vectype))
+        *max_nunits = type_vector_subparts (vectype);
 
       if (is_gimple_call (stmt))
 	rhs_code = CALL_EXPR;
@@ -917,7 +917,7 @@  vect_analyze_slp_instance (loop_vec_info
       return false;
     }
 
-  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  nunits = type_vector_subparts (vectype);
   if (loop_vinfo)
     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   else
@@ -1466,7 +1466,7 @@  vect_get_constant_vectors (slp_tree slp_
   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
   gcc_assert (vector_type);
 
-  nunits = TYPE_VECTOR_SUBPARTS (vector_type);
+  nunits = type_vector_subparts (vector_type);
 
   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
      created vectors. It is greater than 1 if unrolling is performed.
@@ -1508,7 +1508,7 @@  vect_get_constant_vectors (slp_tree slp_
 	      if (constant_p)
 		vec_cst = build_vector (vector_type, t);
 	      else
-		vec_cst = build_constructor_from_list (vector_type, t);
+		vec_cst = vect_build_constructor (vector_type, t);
               VEC_quick_push (tree, voprnds,
                               vect_init_vector (stmt, vec_cst, vector_type, NULL));
               t = NULL_TREE;
@@ -1831,9 +1831,9 @@  vect_transform_slp_perm_load (gimple stm
     }
 
   mask_type = get_vectype_for_scalar_type (mask_element_type);
-  mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
+  mask_nunits = type_vector_subparts (mask_type);
   mask = (int *) xmalloc (sizeof (int) * mask_nunits);
-  nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  nunits = type_vector_subparts (vectype);
   scale = mask_nunits / nunits;
   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
 
@@ -1976,7 +1976,7 @@  vect_schedule_slp_instance (slp_tree nod
 
   /* VECTYPE is the type of the destination.  */
   vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
-  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
+  nunits = (unsigned int) type_vector_subparts (vectype);
   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
 
   /* For each SLP instance calculate number of vector stmts to be created