diff mbox series

[01/18] rs6000: Handle overloads during program parsing

Message ID 3194a60b1f7a0e9667ef58ff83e674ea7854a2b6.1630511334.git.wschmidt@linux.ibm.com
State New
Headers show
Series Replace the Power target-specific builtin machinery | expand

Commit Message

Bill Schmidt Sept. 1, 2021, 4:13 p.m. UTC
Although this patch looks quite large, the changes are fairly minimal.
Most of it is duplicating the large function that does the overload
resolution using the automatically generated data structures instead of
the old hand-generated ones.  This doesn't make the patch terribly easy to
review, unfortunately.  Just be aware that generally we aren't changing
the logic and functionality of overload handling.

2021-08-31  Bill Schmidt  <wschmidt@linux.ibm.com>

gcc/
	* config/rs6000/rs6000-c.c (rs6000-builtins.h): New include.
	(altivec_resolve_new_overloaded_builtin): New forward decl.
	(rs6000_new_builtin_type_compatible): New function.
	(altivec_resolve_overloaded_builtin): Call
	altivec_resolve_new_overloaded_builtin.
	(altivec_build_new_resolved_builtin): New function.
	(altivec_resolve_new_overloaded_builtin): Likewise.
	* config/rs6000/rs6000-call.c (rs6000_new_builtin_is_supported):
	Likewise.
	* config/rs6000/rs6000-gen-builtins.c (write_decls): Remove _p from
	name of rs6000_new_builtin_is_supported.
---
 gcc/config/rs6000/rs6000-c.c            | 1088 +++++++++++++++++++++++
 gcc/config/rs6000/rs6000-call.c         |   53 ++
 gcc/config/rs6000/rs6000-gen-builtins.c |    2 +-
 3 files changed, 1142 insertions(+), 1 deletion(-)

Comments

will schmidt Sept. 13, 2021, 5:17 p.m. UTC | #1
On Wed, 2021-09-01 at 11:13 -0500, Bill Schmidt via Gcc-patches wrote:

Hi, 
  Just a couple cosmetic nits noted below, the majority if which is also in
the original code this is based on.  
THanks
-Will


> Although this patch looks quite large, the changes are fairly minimal.
> Most of it is duplicating the large function that does the overload
> resolution using the automatically generated data structures instead of
> the old hand-generated ones.  This doesn't make the patch terribly easy to
> review, unfortunately.  Just be aware that generally we aren't changing
> the logic and functionality of overload handling.

ok


> 
> 2021-08-31  Bill Schmidt  <wschmidt@linux.ibm.com>
> 
> gcc/
> 	* config/rs6000/rs6000-c.c (rs6000-builtins.h): New include.
> 	(altivec_resolve_new_overloaded_builtin): New forward decl.
> 	(rs6000_new_builtin_type_compatible): New function.
> 	(altivec_resolve_overloaded_builtin): Call
> 	altivec_resolve_new_overloaded_builtin.
> 	(altivec_build_new_resolved_builtin): New function.
> 	(altivec_resolve_new_overloaded_builtin): Likewise.
> 	* config/rs6000/rs6000-call.c (rs6000_new_builtin_is_supported):
> 	Likewise.
> 	* config/rs6000/rs6000-gen-builtins.c (write_decls): Remove _p from
> 	name of rs6000_new_builtin_is_supported.


ok

> ---
>  gcc/config/rs6000/rs6000-c.c            | 1088 +++++++++++++++++++++++
>  gcc/config/rs6000/rs6000-call.c         |   53 ++
>  gcc/config/rs6000/rs6000-gen-builtins.c |    2 +-
>  3 files changed, 1142 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index afcb5bb6e39..aafb4e6a98f 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -35,6 +35,9 @@
>  #include "langhooks.h"
>  #include "c/c-tree.h"
> 
> +#include "rs6000-builtins.h"
> +
> +static tree altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
> 
> 
>  /* Handle the machine specific pragma longcall.  Its syntax is
> @@ -811,6 +814,30 @@ is_float128_p (tree t)
>  	      && t == long_double_type_node));
>  }
> 
> +static bool
> +rs6000_new_builtin_type_compatible (tree t, tree u)
> +{
> +  if (t == error_mark_node)
> +    return false;
> +
> +  if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (u))
> +    return true;
> +
> +  if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
> +      && is_float128_p (t) && is_float128_p (u))
> +    return true;
> +
> +  if (POINTER_TYPE_P (t) && POINTER_TYPE_P (u))
> +    {
> +      t = TREE_TYPE (t);
> +      u = TREE_TYPE (u);
> +      if (TYPE_READONLY (u))
> +	t = build_qualified_type (t, TYPE_QUAL_CONST);
> +    }
> +
> +  return lang_hooks.types_compatible_p (t, u);
> +}
> +

ok

>  static inline bool
>  rs6000_builtin_type_compatible (tree t, int id)
>  {
> @@ -927,6 +954,10 @@ tree
>  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>  				    void *passed_arglist)
>  {
> +  if (new_builtins_are_live)
> +    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
> +						   passed_arglist);
> +
>    vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
>    unsigned int nargs = vec_safe_length (arglist);
>    enum rs6000_builtins fcode

ok

> @@ -1930,3 +1961,1060 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>      return error_mark_node;
>    }
>  }
> +
> +/* Build a tree for a function call to an Altivec non-overloaded builtin.
> +   The overloaded builtin that matched the types and args is described
> +   by DESC.  The N arguments are given in ARGS, respectively.
> +
> +   Actually the only thing it does is calling fold_convert on ARGS, with
> +   a small exception for vec_{all,any}_{ge,le} predicates. */
> +
> +static tree
> +altivec_build_new_resolved_builtin (tree *args, int n, tree fntype,
> +				    tree ret_type,
> +				    rs6000_gen_builtins bif_id,
> +				    rs6000_gen_builtins ovld_id)
> +{
> +  tree argtypes = TYPE_ARG_TYPES (fntype);
> +  tree arg_type[MAX_OVLD_ARGS];
> +  tree fndecl = rs6000_builtin_decls_x[bif_id];
> +  tree call;
> +
> +  for (int i = 0; i < n; i++)
> +    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
> +
> +  /* The AltiVec overloading implementation is overall gross, but this
> +     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
> +     are completely different for floating-point vs. integer vector
> +     types, because the former has vcmpgefp, but the latter should use
> +     vcmpgtXX.
> +
> +     In practice, the second and third arguments are swapped, and the
> +     condition (LT vs. EQ, which is recognizable by bit 1 of the first
> +     argument) is reversed.  Patch the arguments here before building
> +     the resolved CALL_EXPR.  */
> +  if (n == 3
> +      && ovld_id == RS6000_OVLD_VEC_CMPGE_P
> +      && bif_id != RS6000_BIF_VCMPGEFP_P
> +      && bif_id != RS6000_BIF_XVCMPGEDP_P)
> +    {
> +      std::swap (args[1], args[2]);
> +      std::swap (arg_type[1], arg_type[2]);
> +
> +      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
> +			     build_int_cst (NULL_TREE, 2));
> +    }
> +
> +  /* If the number of arguments to an overloaded function increases,
> +     we must expand this switch.  */
> +  gcc_assert (MAX_OVLD_ARGS <= 4);

Ok.   


> +
> +  switch (n)
> +    {
> +    case 0:
> +      call = build_call_expr (fndecl, 0);
> +      break;
> +    case 1:
> +      call = build_call_expr (fndecl, 1,
> +			      fully_fold_convert (arg_type[0], args[0]));
> +      break;
> +    case 2:
> +      call = build_call_expr (fndecl, 2,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]));
> +      break;
> +    case 3:
> +      call = build_call_expr (fndecl, 3,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]),
> +			      fully_fold_convert (arg_type[2], args[2]));
> +      break;
> +    case 4:
> +      call = build_call_expr (fndecl, 4,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]),
> +			      fully_fold_convert (arg_type[2], args[2]),
> +			      fully_fold_convert (arg_type[3], args[3]));
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  return fold_convert (ret_type, call);
> +}
> +
> +/* Implementation of the resolve_overloaded_builtin target hook, to
> +   support Altivec's overloaded builtins.  */
> +
> +static tree
> +altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
> +					void *passed_arglist)
> +{
> +  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
> +  unsigned int nargs = vec_safe_length (arglist);
> +  enum rs6000_gen_builtins fcode
> +    = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
> +  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
> +  tree types[MAX_OVLD_ARGS], args[MAX_OVLD_ARGS];
> +  unsigned int n;
> +
> +  /* Return immediately if this isn't an overload.  */
> +  if (fcode <= RS6000_OVLD_NONE)
> +    return NULL_TREE;
> +
> +  unsigned int adj_fcode = fcode - RS6000_OVLD_NONE;
> +
> +  if (TARGET_DEBUG_BUILTIN)
> +    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
> +	     (int) fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
> +
> +  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
> +  if (fcode == RS6000_OVLD_VEC_LVSL && !BYTES_BIG_ENDIAN)
> +    warning (OPT_Wdeprecated,
> +	     "%<vec_lvsl%> is deprecated for little endian; use "
> +	     "assignment for unaligned loads and stores");
> +  else if (fcode == RS6000_OVLD_VEC_LVSR && !BYTES_BIG_ENDIAN)
> +    warning (OPT_Wdeprecated,
> +	     "%<vec_lvsr%> is deprecated for little endian; use "
> +	     "assignment for unaligned loads and stores");
> +
> +  if (fcode == RS6000_OVLD_VEC_MUL)
> +    {
> +      /* vec_mul needs to be special cased because there are no instructions
> +	 for it for the {un}signed char, {un}signed short, and {un}signed int
> +	 types.  */
> +      if (nargs != 2)
> +	{
> +	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
> +	  return error_mark_node;
> +	}
> +
> +      tree arg0 = (*arglist)[0];
> +      tree arg0_type = TREE_TYPE (arg0);
> +      tree arg1 = (*arglist)[1];
> +      tree arg1_type = TREE_TYPE (arg1);
> +
> +      /* Both arguments must be vectors and the types must be compatible.  */
> +      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
> +	goto bad;
> +
> +      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> +	{
> +	  case E_QImode:
> +	  case E_HImode:
> +	  case E_SImode:
> +	  case E_DImode:
> +	  case E_TImode:
> +	    {
> +	      /* For scalar types just use a multiply expression.  */
> +	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
> +				      fold_convert (TREE_TYPE (arg0), arg1));
> +	    }
> +	  case E_SFmode:
> +	    {
> +	      /* For floats use the xvmulsp instruction directly.  */
> +	      tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULSP];
> +	      return build_call_expr (call, 2, arg0, arg1);
> +	    }
> +	  case E_DFmode:
> +	    {
> +	      /* For doubles use the xvmuldp instruction directly.  */
> +	      tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULDP];
> +	      return build_call_expr (call, 2, arg0, arg1);
> +	    }
> +	  /* Other types are errors.  */
> +	  default:
> +	    goto bad;
> +	}
> +    }
> +
> +  if (fcode == RS6000_OVLD_VEC_CMPNE)
> +    {
> +      /* vec_cmpne needs to be special cased because there are no instructions
> +	 for it (prior to power 9).  */
> +      if (nargs != 2)
> +	{
> +	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
> +	  return error_mark_node;
> +	}
> +
> +      tree arg0 = (*arglist)[0];
> +      tree arg0_type = TREE_TYPE (arg0);
> +      tree arg1 = (*arglist)[1];
> +      tree arg1_type = TREE_TYPE (arg1);
> +
> +      /* Both arguments must be vectors and the types must be compatible.  */
> +      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
> +	goto bad;
> +
> +      /* Power9 instructions provide the most efficient implementation of
> +	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
> +	 or SFmode or DFmode.  */
> +      if (!TARGET_P9_VECTOR
> +	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
> +	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
> +	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
> +	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
> +	{
> +	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> +	    {
> +	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
> +		 vec_cmpeq (va, vb)).  */
> +	      /* Note:  vec_nand also works but opt changes vec_nand's
> +		 to vec_nor's anyway.  */
> +	    case E_QImode:
> +	    case E_HImode:
> +	    case E_SImode:
> +	    case E_DImode:
> +	    case E_TImode:
> +	    case E_SFmode:
> +	    case E_DFmode:
> +	      {
> +		/* call = vec_cmpeq (va, vb)
> +		   result = vec_nor (call, call).  */
> +		vec<tree, va_gc> *params = make_tree_vector ();
> +		vec_safe_push (params, arg0);
> +		vec_safe_push (params, arg1);
> +		tree call = altivec_resolve_new_overloaded_builtin
> +		  (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_CMPEQ],
> +		   params);
> +		/* Use save_expr to ensure that operands used more than once
> +		   that may have side effects (like calls) are only evaluated
> +		   once.  */
> +		call = save_expr (call);
> +		params = make_tree_vector ();
> +		vec_safe_push (params, call);
> +		vec_safe_push (params, call);
> +		return altivec_resolve_new_overloaded_builtin
> +		  (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_NOR], params);
> +	      }
> +	      /* Other types are errors.  */
> +	    default:
> +	      goto bad;
> +	    }
> +	}
> +      /* else, fall through and process the Power9 alternative below */
> +    }
> +
> +  if (fcode == RS6000_OVLD_VEC_ADDE || fcode == RS6000_OVLD_VEC_SUBE)
> +    {
> +      /* vec_adde needs to be special cased because there is no instruction
> +	  for the {un}signed int version.  */
> +      if (nargs != 3)
> +	{
> +	  const char *name;
> +	  name = fcode == RS6000_OVLD_VEC_ADDE ? "vec_adde" : "vec_sube";
> +	  error ("builtin %qs only accepts 3 arguments", name);
> +	  return error_mark_node;
> +	}
> +
> +      tree arg0 = (*arglist)[0];
> +      tree arg0_type = TREE_TYPE (arg0);
> +      tree arg1 = (*arglist)[1];
> +      tree arg1_type = TREE_TYPE (arg1);
> +      tree arg2 = (*arglist)[2];
> +      tree arg2_type = TREE_TYPE (arg2);
> +
> +      /* All 3 arguments must be vectors of (signed or unsigned) (int or
> +	 __int128) and the types must be compatible.  */
> +      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
> +	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
> +	goto bad;
> +
> +      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> +	{
> +	  /* For {un}signed ints,
> +	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
> +						   vec_and (carryv, 1)).
> +	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
> +						   vec_and (carryv, 1)).  */

Also commented out in the original code.   Since it's dead code, maybe
worth enhancing the comment to clarify why this is disabled?  

> +	  case E_SImode:
> +	    {
> +	      tree add_sub_builtin;
> +
> +	      vec<tree, va_gc> *params = make_tree_vector ();
> +	      vec_safe_push (params, arg0);
> +	      vec_safe_push (params, arg1);
> +
> +	      if (fcode == RS6000_OVLD_VEC_ADDE)
> +		add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD];
> +	      else
> +		add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB];
> +
> +	      tree call
> +		= altivec_resolve_new_overloaded_builtin (loc,
> +							  add_sub_builtin,
> +							  params);
> +	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
> +	      tree ones_vector = build_vector_from_val (arg0_type, const1);
> +	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
> +					       arg2, ones_vector);
> +	      params = make_tree_vector ();
> +	      vec_safe_push (params, call);
> +	      vec_safe_push (params, and_expr);
> +	      return altivec_resolve_new_overloaded_builtin (loc,
> +							     add_sub_builtin,
> +							     params);
> +	    }
> +	  /* For {un}signed __int128s use the vaddeuqm/vsubeuqm instruction
> +	     directly.  */
> +	  case E_TImode:
> +	    break;
> +
> +	  /* Types other than {un}signed int and {un}signed __int128
> +		are errors.  */
> +	  default:
> +	    goto bad;
> +	}
> +    }
> +
> +  if (fcode == RS6000_OVLD_VEC_ADDEC || fcode == RS6000_OVLD_VEC_SUBEC)
> +    {
> +      /* vec_addec and vec_subec needs to be special cased because there is
> +	 no instruction for the {un}signed int version.  */
> +      if (nargs != 3)
> +	{
> +	  const char *name;
> +	  name = fcode == RS6000_OVLD_VEC_ADDEC ? "vec_addec" : "vec_subec";
> +	  error ("builtin %qs only accepts 3 arguments", name);
> +	  return error_mark_node;
> +	}
> +
> +      tree arg0 = (*arglist)[0];
> +      tree arg0_type = TREE_TYPE (arg0);
> +      tree arg1 = (*arglist)[1];
> +      tree arg1_type = TREE_TYPE (arg1);
> +      tree arg2 = (*arglist)[2];
> +      tree arg2_type = TREE_TYPE (arg2);
> +
> +      /* All 3 arguments must be vectors of (signed or unsigned) (int or
> +	 __int128) and the types must be compatible.  */
> +      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
> +	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
> +	goto bad;
> +
> +      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
> +	{
> +	  /* For {un}signed ints,
> +	      vec_addec (va, vb, carryv) ==
> +				vec_or (vec_addc (va, vb),
> +					vec_addc (vec_add (va, vb),
> +						  vec_and (carryv, 0x1))).  */

similar here.

> +	  case E_SImode:
> +	    {
> +	    /* Use save_expr to ensure that operands used more than once
> +		that may have side effects (like calls) are only evaluated
> +		once.  */
> +	    tree as_builtin;
> +	    tree as_c_builtin;
> +
> +	    arg0 = save_expr (arg0);
> +	    arg1 = save_expr (arg1);
> +	    vec<tree, va_gc> *params = make_tree_vector ();
> +	    vec_safe_push (params, arg0);
> +	    vec_safe_push (params, arg1);
> +
> +	    if (fcode == RS6000_OVLD_VEC_ADDEC)
> +	      as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADDC];
> +	    else
> +	      as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUBC];
> +
> +	    tree call1 = altivec_resolve_new_overloaded_builtin (loc,
> +								 as_c_builtin,
> +								 params);
> +	    params = make_tree_vector ();
> +	    vec_safe_push (params, arg0);
> +	    vec_safe_push (params, arg1);
> +
> +

extra blank line?


> +	    if (fcode == RS6000_OVLD_VEC_ADDEC)
> +	      as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD];
> +	    else
> +	      as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB];
> +
> +	    tree call2 = altivec_resolve_new_overloaded_builtin (loc,
> +								 as_builtin,
> +								 params);
> +	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
> +	    tree ones_vector = build_vector_from_val (arg0_type, const1);
> +	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
> +					     arg2, ones_vector);
> +	    params = make_tree_vector ();
> +	    vec_safe_push (params, call2);
> +	    vec_safe_push (params, and_expr);
> +	    call2 = altivec_resolve_new_overloaded_builtin (loc, as_c_builtin,
> +							    params);
> +	    params = make_tree_vector ();
> +	    vec_safe_push (params, call1);
> +	    vec_safe_push (params, call2);
> +	    tree or_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_OR];
> +	    return altivec_resolve_new_overloaded_builtin (loc, or_builtin,
> +							   params);
> +	    }
> +	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
> +	     instructions.  This occurs through normal processing.  */
> +	  case E_TImode:
> +	    break;
> +
> +	  /* Types other than {un}signed int and {un}signed __int128
> +		are errors.  */
> +	  default:
> +	    goto bad;
> +	}
> +    }

ok

> +
> +  /* For now treat vec_splats and vec_promote as the same.  */
> +  if (fcode == RS6000_OVLD_VEC_SPLATS || fcode == RS6000_OVLD_VEC_PROMOTE)
> +    {
> +      tree type, arg;
> +      int size;
> +      int i;
> +      bool unsigned_p;
> +      vec<constructor_elt, va_gc> *vec;
> +      const char *name;
> +      name = fcode == RS6000_OVLD_VEC_SPLATS ? "vec_splats" : "vec_promote";
> +
> +      if (fcode == RS6000_OVLD_VEC_SPLATS && nargs != 1)
> +	{
> +	  error ("builtin %qs only accepts 1 argument", name);
> +	  return error_mark_node;
> +	}
> +      if (fcode == RS6000_OVLD_VEC_PROMOTE && nargs != 2)
> +	{
> +	  error ("builtin %qs only accepts 2 arguments", name);
> +	  return error_mark_node;
> +	}
> +      /* Ignore promote's element argument.  */
> +      if (fcode == RS6000_OVLD_VEC_PROMOTE
> +	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
> +	goto bad;
> +
> +      arg = (*arglist)[0];
> +      type = TREE_TYPE (arg);
> +      if (!SCALAR_FLOAT_TYPE_P (type)
> +	  && !INTEGRAL_TYPE_P (type))
> +	goto bad;
> +      unsigned_p = TYPE_UNSIGNED (type);
> +      switch (TYPE_MODE (type))
> +	{
> +	  case E_TImode:
> +	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
> +	    size = 1;
> +	    break;
> +	  case E_DImode:
> +	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
> +	    size = 2;
> +	    break;
> +	  case E_SImode:
> +	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
> +	    size = 4;
> +	    break;
> +	  case E_HImode:
> +	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
> +	    size = 8;
> +	    break;
> +	  case E_QImode:
> +	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
> +	    size = 16;
> +	    break;
> +	  case E_SFmode:
> +	    type = V4SF_type_node;
> +	    size = 4;
> +	    break;
> +	  case E_DFmode:
> +	    type = V2DF_type_node;
> +	    size = 2;
> +	    break;
> +	  default:
> +	    goto bad;
> +	}
> +      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
> +      vec_alloc (vec, size);
> +      for (i = 0; i < size; i++)
> +	{
> +	  constructor_elt elt = {NULL_TREE, arg};
> +	  vec->quick_push (elt);
> +	}
> +      return build_constructor (type, vec);
> +    }
> +
> +  /* For now use pointer tricks to do the extraction, unless we are on VSX
> +     extracting a double from a constant offset.  */
> +  if (fcode == RS6000_OVLD_VEC_EXTRACT)
> +    {
> +      tree arg1;
> +      tree arg1_type;
> +      tree arg2;
> +      tree arg1_inner_type;
> +      tree decl, stmt;
> +      tree innerptrtype;
> +      machine_mode mode;
> +
> +      /* No second argument. */
> +      if (nargs != 2)
> +	{
> +	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
> +	  return error_mark_node;
> +	}
> +
> +      arg2 = (*arglist)[1];
> +      arg1 = (*arglist)[0];
> +      arg1_type = TREE_TYPE (arg1);
> +
> +      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
> +	goto bad;
> +
> +      /* See if we can optimize vec_extracts with the current VSX instruction
> +	 set.  */
> +      mode = TYPE_MODE (arg1_type);
> +      if (VECTOR_MEM_VSX_P (mode))
> +
> +	{
> +	  tree call = NULL_TREE;
> +	  int nunits = GET_MODE_NUNITS (mode);
> +
> +	  arg2 = fold_for_warn (arg2);
> +
> +	  /* If the second argument is an integer constant, generate
> +	     the built-in code if we can.  We need 64-bit and direct
> +	     move to extract the small integer vectors.  */
> +	  if (TREE_CODE (arg2) == INTEGER_CST)
> +	    {
> +	      wide_int selector = wi::to_wide (arg2);
> +	      selector = wi::umod_trunc (selector, nunits);
> +	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
> +	      switch (mode)
> +		{
> +		default:
> +		  break;
> +
> +		case E_V1TImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V1TI];
> +		  break;
> +
> +		case E_V2DFmode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF];
> +		  break;
> +
> +		case E_V2DImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI];
> +		  break;
> +
> +		case E_V4SFmode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF];
> +		  break;
> +
> +		case E_V4SImode:
> +		  if (TARGET_DIRECT_MOVE_64BIT)
> +		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI];
> +		  break;
> +
> +		case E_V8HImode:
> +		  if (TARGET_DIRECT_MOVE_64BIT)
> +		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI];
> +		  break;
> +
> +		case E_V16QImode:
> +		  if (TARGET_DIRECT_MOVE_64BIT)
> +		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI];
> +		  break;
> +		}
> +	    }
> +
> +	  /* If the second argument is variable, we can optimize it if we are
> +	     generating 64-bit code on a machine with direct move.  */
> +	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
> +	    {
> +	      switch (mode)
> +		{
> +		default:
> +		  break;
> +
> +		case E_V2DFmode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF];
> +		  break;
> +
> +		case E_V2DImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI];
> +		  break;
> +
> +		case E_V4SFmode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF];
> +		  break;
> +
> +		case E_V4SImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI];
> +		  break;
> +
> +		case E_V8HImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI];
> +		  break;
> +
> +		case E_V16QImode:
> +		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI];
> +		  break;
> +		}
> +	    }
> +
> +	  if (call)
> +	    {
> +	      tree result = build_call_expr (call, 2, arg1, arg2);
> +	      /* Coerce the result to vector element type.  May be no-op.  */
> +	      arg1_inner_type = TREE_TYPE (arg1_type);
> +	      result = fold_convert (arg1_inner_type, result);
> +	      return result;
> +	    }
> +	}
> +
> +      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
> +      arg1_inner_type = TREE_TYPE (arg1_type);
> +      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
> +			      build_int_cst (TREE_TYPE (arg2),
> +					     TYPE_VECTOR_SUBPARTS (arg1_type)
> +					     - 1), 0);
> +      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
> +      DECL_EXTERNAL (decl) = 0;
> +      TREE_PUBLIC (decl) = 0;
> +      DECL_CONTEXT (decl) = current_function_decl;
> +      TREE_USED (decl) = 1;
> +      TREE_TYPE (decl) = arg1_type;
> +      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
> +      if (c_dialect_cxx ())
> +	{
> +	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
> +			 NULL_TREE, NULL_TREE);
> +	  SET_EXPR_LOCATION (stmt, loc);
> +	}
> +      else
> +	{
> +	  DECL_INITIAL (decl) = arg1;
> +	  stmt = build1 (DECL_EXPR, arg1_type, decl);
> +	  TREE_ADDRESSABLE (decl) = 1;
> +	  SET_EXPR_LOCATION (stmt, loc);
> +	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
> +	}
> +
> +      innerptrtype = build_pointer_type (arg1_inner_type);
> +
> +      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
> +      stmt = convert (innerptrtype, stmt);
> +      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
> +      stmt = build_indirect_ref (loc, stmt, RO_NULL);
> +
> +      /* PR83660: We mark this as having side effects so that
> +	 downstream in fold_build_cleanup_point_expr () it will get a
> +	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
> +	 later in gimplify_cleanup_point_expr ().  Potentially this
> +	 causes missed optimization because there actually is no side
> +	 effect.  */
> +      if (c_dialect_cxx ())
> +	TREE_SIDE_EFFECTS (stmt) = 1;
> +
> +      return stmt;
> +    }

ok

> +
> +  /* For now use pointer tricks to do the insertion, unless we are on VSX
> +     inserting a double to a constant offset..  */

Too many ending periods. :-)   (also in original)

> +  if (fcode == RS6000_OVLD_VEC_INSERT)
> +    {
> +      tree arg0;
> +      tree arg1;
> +      tree arg2;
> +      tree arg1_type;
> +      tree decl, stmt;
> +      machine_mode mode;
> +
> +      /* No second or third arguments. */
> +      if (nargs != 3)
> +	{
> +	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
> +	  return error_mark_node;
> +	}
> +
> +      arg0 = (*arglist)[0];
> +      arg1 = (*arglist)[1];
> +      arg1_type = TREE_TYPE (arg1);
> +      arg2 = fold_for_warn ((*arglist)[2]);
> +
> +      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
> +	goto bad;
> +      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
> +	goto bad;
> +
> +      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
> +      mode = TYPE_MODE (arg1_type);
> +      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
> +	  && TREE_CODE (arg2) == INTEGER_CST)
> +	{
> +	  wide_int selector = wi::to_wide (arg2);
> +	  selector = wi::umod_trunc (selector, 2);
> +	  tree call = NULL_TREE;
> +
> +	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
> +	  if (mode == V2DFmode)
> +	    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DF];
> +	  else if (mode == V2DImode)
> +	    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DI];
> +
> +	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
> +	     reversed.  */
> +	  if (call)
> +	    return build_call_expr (call, 3, arg1, arg0, arg2);
> +	}
> +      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
> +	       && TREE_CODE (arg2) == INTEGER_CST)
> +	{
> +	  tree call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V1TI];
> +	  wide_int selector = wi::zero(32);
> +
> +	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
> +	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
> +	     reversed.  */
> +	  return build_call_expr (call, 3, arg1, arg0, arg2);
> +	}
> +
> +      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0 with
> +	 VIEW_CONVERT_EXPR.  i.e.:
> +	 D.3192 = v1;
> +	 _1 = n & 3;
> +	 VIEW_CONVERT_EXPR<int[4]>(D.3192)[_1] = i;
> +	 v1 = D.3192;
> +	 D.3194 = v1;  */
> +      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
> +	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
> +      else
> +	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
> +				build_int_cst (TREE_TYPE (arg2),
> +					       TYPE_VECTOR_SUBPARTS (arg1_type)
> +					       - 1), 0);
> +      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
> +      DECL_EXTERNAL (decl) = 0;
> +      TREE_PUBLIC (decl) = 0;
> +      DECL_CONTEXT (decl) = current_function_decl;
> +      TREE_USED (decl) = 1;
> +      TREE_TYPE (decl) = arg1_type;
> +      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
> +      TREE_ADDRESSABLE (decl) = 1;
> +      if (c_dialect_cxx ())
> +	{
> +	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
> +			 NULL_TREE, NULL_TREE);
> +	  SET_EXPR_LOCATION (stmt, loc);
> +	}
> +      else
> +	{
> +	  DECL_INITIAL (decl) = arg1;
> +	  stmt = build1 (DECL_EXPR, arg1_type, decl);
> +	  SET_EXPR_LOCATION (stmt, loc);
> +	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
> +	}
> +
> +      if (TARGET_VSX)
> +	{
> +	  stmt = build_array_ref (loc, stmt, arg2);
> +	  stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
> +			      convert (TREE_TYPE (stmt), arg0));
> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> +	}
> +      else
> +	{
> +	  tree arg1_inner_type;
> +	  tree innerptrtype;
> +	  arg1_inner_type = TREE_TYPE (arg1_type);
> +	  innerptrtype = build_pointer_type (arg1_inner_type);
> +
> +	  stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
> +	  stmt = convert (innerptrtype, stmt);
> +	  stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
> +	  stmt = build_indirect_ref (loc, stmt, RO_NULL);
> +	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
> +			 convert (TREE_TYPE (stmt), arg0));
> +	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> +	}
> +      return stmt;
> +    }
> +
> +  for (n = 0;
> +       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
> +       fnargs = TREE_CHAIN (fnargs), n++)
> +    {
> +      tree decl_type = TREE_VALUE (fnargs);
> +      tree arg = (*arglist)[n];
> +      tree type;
> +
> +      if (arg == error_mark_node)
> +	return error_mark_node;
> +
> +      if (n >= MAX_OVLD_ARGS)
> +	abort ();
> +
> +      arg = default_conversion (arg);
> +
> +      /* The C++ front-end converts float * to const void * using
> +	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
> +      type = TREE_TYPE (arg);
> +      if (POINTER_TYPE_P (type)
> +	  && TREE_CODE (arg) == NOP_EXPR
> +	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
> +					    const_ptr_type_node)
> +	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
> +					    ptr_type_node))
> +	{
> +	  arg = TREE_OPERAND (arg, 0);
> +	  type = TREE_TYPE (arg);
> +	}
> +
> +      /* Remove the const from the pointers to simplify the overload
> +	 matching further down.  */
> +      if (POINTER_TYPE_P (decl_type)
> +	  && POINTER_TYPE_P (type)
> +	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
> +	{
> +	  if (TYPE_READONLY (TREE_TYPE (type))
> +	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
> +	    warning (0, "passing argument %d of %qE discards qualifiers from "
> +		     "pointer target type", n + 1, fndecl);
> +	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
> +							   0));
> +	  arg = fold_convert (type, arg);
> +	}
> +
> +      /* For RS6000_OVLD_VEC_LXVL, convert any const * to its non constant
> +	 equivalent to simplify the overload matching below.  */
> +      if (fcode == RS6000_OVLD_VEC_LXVL)
> +	{
> +	  if (POINTER_TYPE_P (type)
> +	      && TYPE_READONLY (TREE_TYPE (type)))
> +	    {
> +	      type = build_pointer_type (build_qualified_type (
> +						TREE_TYPE (type),0));
> +	      arg = fold_convert (type, arg);
> +	    }
> +	}
> +
> +      args[n] = arg;
> +      types[n] = type;
> +    }
> +
> +  /* If the number of arguments did not match the prototype, return NULL
> +     and the generic code will issue the appropriate error message.  */
> +  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
> +    return NULL;
> +
> +  if (fcode == RS6000_OVLD_VEC_STEP)
> +    {
> +      if (TREE_CODE (types[0]) != VECTOR_TYPE)
> +	goto bad;
> +
> +      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
> +    }
> +
> +  {
> +    bool unsupported_builtin = false;
> +    enum rs6000_gen_builtins overloaded_code;
> +    bool supported = false;
> +    ovlddata *instance = rs6000_overload_info[adj_fcode].first_instance;
> +    gcc_assert (instance != NULL);
> +
> +    /* Need to special case __builtin_cmpb because the overloaded forms
> +       of this function take (unsigned int, unsigned int) or (unsigned
> +       long long int, unsigned long long int).  Since C conventions
> +       allow the respective argument types to be implicitly coerced into
> +       each other, the default handling does not provide adequate
> +       discrimination between the desired forms of the function.  */
> +    if (fcode == RS6000_OVLD_SCAL_CMPB)
> +      {
> +	machine_mode arg1_mode = TYPE_MODE (types[0]);
> +	machine_mode arg2_mode = TYPE_MODE (types[1]);
> +
> +	if (nargs != 2)
> +	  {
> +	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
> +	    return error_mark_node;
> +	  }
> +
> +	/* If any supplied arguments are wider than 32 bits, resolve to
> +	   64-bit variant of built-in function.  */
> +	if ((GET_MODE_PRECISION (arg1_mode) > 32)
> +	    || (GET_MODE_PRECISION (arg2_mode) > 32))
> +	  {
> +	    /* Assure all argument and result types are compatible with
> +	       the built-in function represented by RS6000_BIF_CMPB.  */
> +	    overloaded_code = RS6000_BIF_CMPB;
> +	  }
> +	else
> +	  {
> +	    /* Assure all argument and result types are compatible with
> +	       the built-in function represented by RS6000_BIF_CMPB_32.  */
> +	    overloaded_code = RS6000_BIF_CMPB_32;
> +	  }
> +
> +	while (instance && instance->bifid != overloaded_code)
> +	  instance = instance->next;
> +
> +	gcc_assert (instance != NULL);
> +	tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
> +	tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype));
> +	tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype)));
> +
> +	if (rs6000_new_builtin_type_compatible (types[0], parmtype0)
> +	    && rs6000_new_builtin_type_compatible (types[1], parmtype1))
> +	  {
> +	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
> +		&& rs6000_new_builtin_is_supported (instance->bifid))
> +	      {
> +		tree ret_type = TREE_TYPE (instance->fntype);
> +		return altivec_build_new_resolved_builtin (args, n, fntype,
> +							   ret_type,
> +							   instance->bifid,
> +							   fcode);
> +	      }
> +	    else
> +	      unsupported_builtin = true;
> +	  }
> +      }
> +    else if (fcode == RS6000_OVLD_VEC_VSIE)

OK, noting that this is foo_VEC_VSIEDP in the original code. (DP
indicator dropped).


> +      {
> +	machine_mode arg1_mode = TYPE_MODE (types[0]);
> +
> +	if (nargs != 2)
> +	  {
> +	    error ("builtin %qs only accepts 2 arguments",
> +		   "scalar_insert_exp");
> +	    return error_mark_node;
> +	  }
> +
> +	/* If supplied first argument is wider than 64 bits, resolve to
> +	   128-bit variant of built-in function.  */
> +	if (GET_MODE_PRECISION (arg1_mode) > 64)
> +	  {
> +	    /* If first argument is of float variety, choose variant
> +	       that expects __ieee128 argument.  Otherwise, expect
> +	       __int128 argument.  */

Could use some "a" and/or "the" in the comment there.   similar below. 
This matches comment in original code, so nbd. :-)


> +	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
> +	      overloaded_code = RS6000_BIF_VSIEQPF;
> +	    else
> +	      overloaded_code = RS6000_BIF_VSIEQP;
> +	  }
> +	else
> +	  {
> +	    /* If first argument is of float variety, choose variant
> +	       that expects double argument.  Otherwise, expect
> +	       long long int argument.  */
> +	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
> +	      overloaded_code = RS6000_BIF_VSIEDPF;
> +	    else
> +	      overloaded_code = RS6000_BIF_VSIEDP;
> +	  }
> +
> +	while (instance && instance->bifid != overloaded_code)
> +	  instance = instance->next;
> +
> +	gcc_assert (instance != NULL);
> +	tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
> +	tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype));
> +	tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype)));
> +
> +	if (rs6000_new_builtin_type_compatible (types[0], parmtype0)
> +	    && rs6000_new_builtin_type_compatible (types[1], parmtype1))
> +	  {
> +	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
> +		&& rs6000_new_builtin_is_supported (instance->bifid))
> +	      {
> +		tree ret_type = TREE_TYPE (instance->fntype);
> +		return altivec_build_new_resolved_builtin (args, n, fntype,
> +							   ret_type,
> +							   instance->bifid,
> +							   fcode);
> +	      }
> +	    else
> +	      unsupported_builtin = true;
> +	  }
> +      }
> +    else
> +      {
> +	/* Functions with no arguments can have only one overloaded
> +	   instance.  */
> +	gcc_assert (n > 0 || !instance->next);
> +
> +	for (; instance != NULL; instance = instance->next)
> +	  {
> +	    bool mismatch = false;
> +	    tree nextparm = TYPE_ARG_TYPES (instance->fntype);
> +
> +	    for (unsigned int arg_i = 0;
> +		 arg_i < nargs && nextparm != NULL;
> +		 arg_i++)
> +	      {
> +		tree parmtype = TREE_VALUE (nextparm);
> +		if (!rs6000_new_builtin_type_compatible (types[arg_i],
> +							 parmtype))
> +		  {
> +		    mismatch = true;
> +		    break;
> +		  }
> +		nextparm = TREE_CHAIN (nextparm);
> +	      }
> +
> +	    if (mismatch)
> +	      continue;
> +
> +	    supported = rs6000_new_builtin_is_supported (instance->bifid);
> +	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
> +		&& supported)
> +	      {
> +		tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
> +		tree ret_type = TREE_TYPE (instance->fntype);
> +		return altivec_build_new_resolved_builtin (args, n, fntype,
> +							   ret_type,
> +							   instance->bifid,
> +							   fcode);
> +	      }
> +	    else
> +	      {
> +		unsupported_builtin = true;
> +		break;
> +	      }
> +	  }
> +      }
> +
> +    if (unsupported_builtin)
> +      {
> +	const char *name = rs6000_overload_info[adj_fcode].ovld_name;
> +	if (!supported)
> +	  {
> +	    const char *internal_name
> +	      = rs6000_builtin_info_x[instance->bifid].bifname;
> +	    /* An error message making reference to the name of the
> +	       non-overloaded function has already been issued.  Add
> +	       clarification of the previous message.  */
> +	    rich_location richloc (line_table, input_location);
> +	    inform (&richloc, "builtin %qs requires builtin %qs",
> +		    name, internal_name);
> +	  }
> +	else
> +	  error ("%qs is not supported in this compiler configuration", name);
> +	/* If an error-representing  result tree was returned from
> +	   altivec_build_resolved_builtin above, use it.  */

Extra space after error-representing.  Also in original code.


> +	/*
> +	return (result != NULL) ? result : error_mark_node;
> +	*/
> +	return error_mark_node;
> +      }
> +  }
> + bad:
> +  {
> +    const char *name = rs6000_overload_info[adj_fcode].ovld_name;
> +    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
> +    return error_mark_node;
> +  }
> +}

ok


> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index e8625d17d18..2c68aa3580c 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -12971,6 +12971,59 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>    return false;
>  }
> 
> +/* Check whether a builtin function is supported in this target
> +   configuration.  */
> +bool
> +rs6000_new_builtin_is_supported (enum rs6000_gen_builtins fncode)
> +{
> +  switch (rs6000_builtin_info_x[(size_t) fncode].enable)
> +    {
> +    default:
> +      gcc_unreachable ();
> +    case ENB_ALWAYS:
> +      return true;
> +    case ENB_P5:
> +      return TARGET_POPCNTB;
> +    case ENB_P6:
> +      return TARGET_CMPB;
> +    case ENB_ALTIVEC:
> +      return TARGET_ALTIVEC;
> +    case ENB_CELL:
> +      return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL;
> +    case ENB_VSX:
> +      return TARGET_VSX;
> +    case ENB_P7:
> +      return TARGET_POPCNTD;
> +    case ENB_P7_64:
> +      return TARGET_POPCNTD && TARGET_POWERPC64;
> +    case ENB_P8:
> +      return TARGET_DIRECT_MOVE;
> +    case ENB_P8V:
> +      return TARGET_P8_VECTOR;
> +    case ENB_P9:
> +      return TARGET_MODULO;
> +    case ENB_P9_64:
> +      return TARGET_MODULO && TARGET_POWERPC64;
> +    case ENB_P9V:
> +      return TARGET_P9_VECTOR;
> +    case ENB_IEEE128_HW:
> +      return TARGET_FLOAT128_HW;
> +    case ENB_DFP:
> +      return TARGET_DFP;
> +    case ENB_CRYPTO:
> +      return TARGET_CRYPTO;
> +    case ENB_HTM:
> +      return TARGET_HTM;
> +    case ENB_P10:
> +      return TARGET_POWER10;
> +    case ENB_P10_64:
> +      return TARGET_POWER10 && TARGET_POWERPC64;
> +    case ENB_MMA:
> +      return TARGET_MMA;
> +    }
> +  gcc_unreachable ();
> +}

ok

> +
>  /* Expand an expression EXP that calls a built-in function,
>     with result going to TARGET if that's convenient
>     (and in mode MODE if that's convenient).
> diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
> index f3d6156400a..f65932e1cd5 100644
> --- a/gcc/config/rs6000/rs6000-gen-builtins.c
> +++ b/gcc/config/rs6000/rs6000-gen-builtins.c
> @@ -2314,7 +2314,7 @@ write_decls (void)
> 
>    fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n");
>    fprintf (header_file,
> -	   "extern bool rs6000_new_builtin_is_supported_p "
> +	   "extern bool rs6000_new_builtin_is_supported "
>  	   "(rs6000_gen_builtins);\n");
>    fprintf (header_file,
>  	   "extern tree rs6000_builtin_decl (unsigned, "


ok.

Thanks
-Will
Segher Boessenkool Sept. 13, 2021, 11:53 p.m. UTC | #2
On Wed, Sep 01, 2021 at 11:13:37AM -0500, Bill Schmidt wrote:
> Although this patch looks quite large, the changes are fairly minimal.
> Most of it is duplicating the large function that does the overload
> resolution using the automatically generated data structures instead of
> the old hand-generated ones.  This doesn't make the patch terribly easy to
> review, unfortunately.  Just be aware that generally we aren't changing
> the logic and functionality of overload handling.

> 	(altivec_build_new_resolved_builtin): New function.
> 	(altivec_resolve_new_overloaded_builtin): Likewise.

A new function of 973 lines (plus the function comment).  Please factor
that (can be in a later patch, but please do, you know what it all means
and does currently, now is the time :-) ).

> +static bool
> +rs6000_new_builtin_type_compatible (tree t, tree u)

This needs a function comment.  Are t and u used symmetrically at all?

> +{
> +  if (t == error_mark_node)
> +    return false;

(not here)

> +  if (POINTER_TYPE_P (t) && POINTER_TYPE_P (u))
> +    {
> +      t = TREE_TYPE (t);
> +      u = TREE_TYPE (u);
> +      if (TYPE_READONLY (u))
> +	t = build_qualified_type (t, TYPE_QUAL_CONST);
> +    }

Esp. here.  And it still creates junk trees where those are not needed
afaics, and that is not a great idea for functions that are called so
often.

> +static tree
> +altivec_build_new_resolved_builtin (tree *args, int n, tree fntype,
> +				    tree ret_type,
> +				    rs6000_gen_builtins bif_id,
> +				    rs6000_gen_builtins ovld_id)
> +{
> +  tree argtypes = TYPE_ARG_TYPES (fntype);
> +  tree arg_type[MAX_OVLD_ARGS];
> +  tree fndecl = rs6000_builtin_decls_x[bif_id];
> +  tree call;

Don't declare things so far ahead please.  Declare them right before
they are assigned to, ideally.

> +  for (int i = 0; i < n; i++)
> +    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);

Please do not use comma operators where you could use separate
statements.

> +  /* The AltiVec overloading implementation is overall gross, but this

Ooh you spell "AltiVec" correctly here ;-)

You can do
  for (int j = 0; j < n; j++)
    args[j] = fully_fold_convert (arg_type[j], args[j]);
here and then the rest becomes simpler.

> +  switch (n)
> +    {
> +    case 0:
> +      call = build_call_expr (fndecl, 0);
> +      break;
> +    case 1:
> +      call = build_call_expr (fndecl, 1,
> +			      fully_fold_convert (arg_type[0], args[0]));
> +      break;
> +    case 2:
> +      call = build_call_expr (fndecl, 2,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]));
> +      break;
> +    case 3:
> +      call = build_call_expr (fndecl, 3,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]),
> +			      fully_fold_convert (arg_type[2], args[2]));
> +      break;
> +    case 4:
> +      call = build_call_expr (fndecl, 4,
> +			      fully_fold_convert (arg_type[0], args[0]),
> +			      fully_fold_convert (arg_type[1], args[1]),
> +			      fully_fold_convert (arg_type[2], args[2]),
> +			      fully_fold_convert (arg_type[3], args[3]));
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  return fold_convert (ret_type, call);
> +}

> +static tree
> +altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
> +					void *passed_arglist)
> +{
> +  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
> +  unsigned int nargs = vec_safe_length (arglist);
> +  enum rs6000_gen_builtins fcode
> +    = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
> +  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
> +  tree types[MAX_OVLD_ARGS], args[MAX_OVLD_ARGS];

Two separate lines please, they are very different things, and very
important things, too.

> +  unsigned int n;

You use this var first 792 lines later.  Please don't.

Oh well, this will become much better once this is more properly
factored.  Who knows, some of it may become readable / understandable
even!  :-)

> +      arg = (*arglist)[0];
> +      type = TREE_TYPE (arg);
> +      if (!SCALAR_FLOAT_TYPE_P (type)
> +	  && !INTEGRAL_TYPE_P (type))
> +	goto bad;

And all gotos still scream "FACTOR ME".

> +	  case E_TImode:
> +	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
> +	    size = 1;
> +	    break;

  type = signed_or_unsigned_type_for (unsigned_p, V1TI_type_node);
etc.

> +	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
> +				build_int_cst (TREE_TYPE (arg2),
> +					       TYPE_VECTOR_SUBPARTS (arg1_type)
> +					       - 1), 0);

This needs some temporaries.  Whenever you are clutching the right
margin chances are you should add some extra names for readability.

> +	  if (TYPE_READONLY (TREE_TYPE (type))
> +	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
> +	    warning (0, "passing argument %d of %qE discards qualifiers from "
> +		     "pointer target type", n + 1, fndecl);

It actually only tests the const qualifier.  Is there no utility
function to test all (or at least cv)?

> +	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
> +							   0));

No new line needed.

> +	if ((GET_MODE_PRECISION (arg1_mode) > 32)
> +	    || (GET_MODE_PRECISION (arg2_mode) > 32))

Useless extra parens making things harder to read.

> +bool
> +rs6000_new_builtin_is_supported (enum rs6000_gen_builtins fncode)
> +{
> +  switch (rs6000_builtin_info_x[(size_t) fncode].enable)
> +    {
> +    default:
> +      gcc_unreachable ();

default belongs last, not first.

> +    case ENB_ALWAYS:
> +      return true;
> +    case ENB_P5:
> +      return TARGET_POPCNTB;
> +    case ENB_P6:
> +      return TARGET_CMPB;
> +    case ENB_ALTIVEC:
> +      return TARGET_ALTIVEC;
> +    case ENB_CELL:
> +      return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL;
> +    case ENB_VSX:
> +      return TARGET_VSX;
> +    case ENB_P7:
> +      return TARGET_POPCNTD;
> +    case ENB_P7_64:
> +      return TARGET_POPCNTD && TARGET_POWERPC64;
> +    case ENB_P8:
> +      return TARGET_DIRECT_MOVE;
> +    case ENB_P8V:
> +      return TARGET_P8_VECTOR;
> +    case ENB_P9:
> +      return TARGET_MODULO;
> +    case ENB_P9_64:
> +      return TARGET_MODULO && TARGET_POWERPC64;
> +    case ENB_P9V:
> +      return TARGET_P9_VECTOR;
> +    case ENB_IEEE128_HW:
> +      return TARGET_FLOAT128_HW;
> +    case ENB_DFP:
> +      return TARGET_DFP;
> +    case ENB_CRYPTO:
> +      return TARGET_CRYPTO;
> +    case ENB_HTM:
> +      return TARGET_HTM;
> +    case ENB_P10:
> +      return TARGET_POWER10;
> +    case ENB_P10_64:
> +      return TARGET_POWER10 && TARGET_POWERPC64;
> +    case ENB_MMA:
> +      return TARGET_MMA;
> +    }
> +  gcc_unreachable ();
> +}

Could you put all the CPU ones together (except maybe Cell)?  The really
mean architecture version, and they should be renamed some day perhaps
(the TARGET_ names that is).  It now is some kind of revisionist
historical order :-)

> --- a/gcc/config/rs6000/rs6000-gen-builtins.c
> +++ b/gcc/config/rs6000/rs6000-gen-builtins.c
> @@ -2314,7 +2314,7 @@ write_decls (void)
>  
>    fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n");
>    fprintf (header_file,
> -	   "extern bool rs6000_new_builtin_is_supported_p "
> +	   "extern bool rs6000_new_builtin_is_supported "
>  	   "(rs6000_gen_builtins);\n");

This now fits on one line, too :-)


Okay for trunk with the trivial things fixed.  And everythin else needs
to be fixed later still.

Thanks!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index afcb5bb6e39..aafb4e6a98f 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -35,6 +35,9 @@ 
 #include "langhooks.h"
 #include "c/c-tree.h"
 
+#include "rs6000-builtins.h"
+
+static tree altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
 
 
 /* Handle the machine specific pragma longcall.  Its syntax is
@@ -811,6 +814,30 @@  is_float128_p (tree t)
 	      && t == long_double_type_node));
 }
   
+static bool
+rs6000_new_builtin_type_compatible (tree t, tree u)
+{
+  if (t == error_mark_node)
+    return false;
+
+  if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (u))
+    return true;
+
+  if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+      && is_float128_p (t) && is_float128_p (u))
+    return true;
+
+  if (POINTER_TYPE_P (t) && POINTER_TYPE_P (u))
+    {
+      t = TREE_TYPE (t);
+      u = TREE_TYPE (u);
+      if (TYPE_READONLY (u))
+	t = build_qualified_type (t, TYPE_QUAL_CONST);
+    }
+
+  return lang_hooks.types_compatible_p (t, u);
+}
+
 static inline bool
 rs6000_builtin_type_compatible (tree t, int id)
 {
@@ -927,6 +954,10 @@  tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				    void *passed_arglist)
 {
+  if (new_builtins_are_live)
+    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
+						   passed_arglist);
+
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int nargs = vec_safe_length (arglist);
   enum rs6000_builtins fcode
@@ -1930,3 +1961,1060 @@  altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
     return error_mark_node;
   }
 }
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_new_resolved_builtin (tree *args, int n, tree fntype,
+				    tree ret_type,
+				    rs6000_gen_builtins bif_id,
+				    rs6000_gen_builtins ovld_id)
+{
+  tree argtypes = TYPE_ARG_TYPES (fntype);
+  tree arg_type[MAX_OVLD_ARGS];
+  tree fndecl = rs6000_builtin_decls_x[bif_id];
+  tree call;
+
+  for (int i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (n == 3
+      && ovld_id == RS6000_OVLD_VEC_CMPGE_P
+      && bif_id != RS6000_BIF_VCMPGEFP_P
+      && bif_id != RS6000_BIF_XVCMPGEDP_P)
+    {
+      std::swap (args[1], args[2]);
+      std::swap (arg_type[1], arg_type[2]);
+
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  /* If the number of arguments to an overloaded function increases,
+     we must expand this switch.  */
+  gcc_assert (MAX_OVLD_ARGS <= 4);
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    case 4:
+      call = build_call_expr (fndecl, 4,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]),
+			      fully_fold_convert (arg_type[3], args[3]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+static tree
+altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
+					void *passed_arglist)
+{
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_gen_builtins fcode
+    = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[MAX_OVLD_ARGS], args[MAX_OVLD_ARGS];
+  unsigned int n;
+
+  /* Return immediately if this isn't an overload.  */
+  if (fcode <= RS6000_OVLD_NONE)
+    return NULL_TREE;
+
+  unsigned int adj_fcode = fcode - RS6000_OVLD_NONE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int) fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == RS6000_OVLD_VEC_LVSL && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsl%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+  else if (fcode == RS6000_OVLD_VEC_LVSR && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsr%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+
+  if (fcode == RS6000_OVLD_VEC_MUL)
+    {
+      /* vec_mul needs to be special cased because there are no instructions
+	 for it for the {un}signed char, {un}signed short, and {un}signed int
+	 types.  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  case E_QImode:
+	  case E_HImode:
+	  case E_SImode:
+	  case E_DImode:
+	  case E_TImode:
+	    {
+	      /* For scalar types just use a multiply expression.  */
+	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
+				      fold_convert (TREE_TYPE (arg0), arg1));
+	    }
+	  case E_SFmode:
+	    {
+	      /* For floats use the xvmulsp instruction directly.  */
+	      tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULSP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  case E_DFmode:
+	    {
+	      /* For doubles use the xvmuldp instruction directly.  */
+	      tree call = rs6000_builtin_decls_x[RS6000_BIF_XVMULDP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  /* Other types are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == RS6000_OVLD_VEC_CMPNE)
+    {
+      /* vec_cmpne needs to be special cased because there are no instructions
+	 for it (prior to power 9).  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      /* Power9 instructions provide the most efficient implementation of
+	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
+	 or SFmode or DFmode.  */
+      if (!TARGET_P9_VECTOR
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
+	{
+	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	    {
+	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+		 vec_cmpeq (va, vb)).  */
+	      /* Note:  vec_nand also works but opt changes vec_nand's
+		 to vec_nor's anyway.  */
+	    case E_QImode:
+	    case E_HImode:
+	    case E_SImode:
+	    case E_DImode:
+	    case E_TImode:
+	    case E_SFmode:
+	    case E_DFmode:
+	      {
+		/* call = vec_cmpeq (va, vb)
+		   result = vec_nor (call, call).  */
+		vec<tree, va_gc> *params = make_tree_vector ();
+		vec_safe_push (params, arg0);
+		vec_safe_push (params, arg1);
+		tree call = altivec_resolve_new_overloaded_builtin
+		  (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_CMPEQ],
+		   params);
+		/* Use save_expr to ensure that operands used more than once
+		   that may have side effects (like calls) are only evaluated
+		   once.  */
+		call = save_expr (call);
+		params = make_tree_vector ();
+		vec_safe_push (params, call);
+		vec_safe_push (params, call);
+		return altivec_resolve_new_overloaded_builtin
+		  (loc, rs6000_builtin_decls_x[RS6000_OVLD_VEC_NOR], params);
+	      }
+	      /* Other types are errors.  */
+	    default:
+	      goto bad;
+	    }
+	}
+      /* else, fall through and process the Power9 alternative below */
+    }
+
+  if (fcode == RS6000_OVLD_VEC_ADDE || fcode == RS6000_OVLD_VEC_SUBE)
+    {
+      /* vec_adde needs to be special cased because there is no instruction
+	  for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name;
+	  name = fcode == RS6000_OVLD_VEC_ADDE ? "vec_adde" : "vec_sube";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
+						   vec_and (carryv, 1)).  */
+	  case E_SImode:
+	    {
+	      tree add_sub_builtin;
+
+	      vec<tree, va_gc> *params = make_tree_vector ();
+	      vec_safe_push (params, arg0);
+	      vec_safe_push (params, arg1);
+
+	      if (fcode == RS6000_OVLD_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB];
+
+	      tree call
+		= altivec_resolve_new_overloaded_builtin (loc,
+							  add_sub_builtin,
+							  params);
+	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	      tree ones_vector = build_vector_from_val (arg0_type, const1);
+	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					       arg2, ones_vector);
+	      params = make_tree_vector ();
+	      vec_safe_push (params, call);
+	      vec_safe_push (params, and_expr);
+	      return altivec_resolve_new_overloaded_builtin (loc,
+							     add_sub_builtin,
+							     params);
+	    }
+	  /* For {un}signed __int128s use the vaddeuqm/vsubeuqm instruction
+	     directly.  */
+	  case E_TImode:
+	    break;
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == RS6000_OVLD_VEC_ADDEC || fcode == RS6000_OVLD_VEC_SUBEC)
+    {
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name;
+	  name = fcode == RS6000_OVLD_VEC_ADDEC ? "vec_addec" : "vec_subec";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	      vec_addec (va, vb, carryv) ==
+				vec_or (vec_addc (va, vb),
+					vec_addc (vec_add (va, vb),
+						  vec_and (carryv, 0x1))).  */
+	  case E_SImode:
+	    {
+	    /* Use save_expr to ensure that operands used more than once
+		that may have side effects (like calls) are only evaluated
+		once.  */
+	    tree as_builtin;
+	    tree as_c_builtin;
+
+	    arg0 = save_expr (arg0);
+	    arg1 = save_expr (arg1);
+	    vec<tree, va_gc> *params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+	    if (fcode == RS6000_OVLD_VEC_ADDEC)
+	      as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADDC];
+	    else
+	      as_c_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_new_overloaded_builtin (loc,
+								 as_c_builtin,
+								 params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+
+	    if (fcode == RS6000_OVLD_VEC_ADDEC)
+	      as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_ADD];
+	    else
+	      as_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_SUB];
+
+	    tree call2 = altivec_resolve_new_overloaded_builtin (loc,
+								 as_builtin,
+								 params);
+	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	    tree ones_vector = build_vector_from_val (arg0_type, const1);
+	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					     arg2, ones_vector);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call2);
+	    vec_safe_push (params, and_expr);
+	    call2 = altivec_resolve_new_overloaded_builtin (loc, as_c_builtin,
+							    params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call1);
+	    vec_safe_push (params, call2);
+	    tree or_builtin = rs6000_builtin_decls_x[RS6000_OVLD_VEC_OR];
+	    return altivec_resolve_new_overloaded_builtin (loc, or_builtin,
+							   params);
+	    }
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  This occurs through normal processing.  */
+	  case E_TImode:
+	    break;
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == RS6000_OVLD_VEC_SPLATS || fcode == RS6000_OVLD_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name;
+      name = fcode == RS6000_OVLD_VEC_SPLATS ? "vec_splats" : "vec_promote";
+
+      if (fcode == RS6000_OVLD_VEC_SPLATS && nargs != 1)
+	{
+	  error ("builtin %qs only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == RS6000_OVLD_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == RS6000_OVLD_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case E_TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case E_DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case E_SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case E_HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case E_QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case E_SFmode:
+	    type = V4SF_type_node;
+	    size = 4;
+	    break;
+	  case E_DFmode:
+	    type = V2DF_type_node;
+	    size = 2;
+	    break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for (i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+      return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == RS6000_OVLD_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* See if we can optimize vec_extracts with the current VSX instruction
+	 set.  */
+      mode = TYPE_MODE (arg1_type);
+      if (VECTOR_MEM_VSX_P (mode))
+
+	{
+	  tree call = NULL_TREE;
+	  int nunits = GET_MODE_NUNITS (mode);
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is an integer constant, generate
+	     the built-in code if we can.  We need 64-bit and direct
+	     move to extract the small integer vectors.  */
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    {
+	      wide_int selector = wi::to_wide (arg2);
+	      selector = wi::umod_trunc (selector, nunits);
+	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V1TImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V1TI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls_x[RS6000_BIF_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      tree result = build_call_expr (call, 2, arg1, arg2);
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      arg1_inner_type = TREE_TYPE (arg1_type);
+	      result = fold_convert (arg1_inner_type, result);
+	      return result;
+	    }
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      /* PR83660: We mark this as having side effects so that
+	 downstream in fold_build_cleanup_point_expr () it will get a
+	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
+	 later in gimplify_cleanup_point_expr ().  Potentially this
+	 causes missed optimization because there actually is no side
+	 effect.  */
+      if (c_dialect_cxx ())
+	TREE_SIDE_EFFECTS (stmt) = 1;
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == RS6000_OVLD_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree decl, stmt;
+      machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = fold_for_warn ((*arglist)[2]);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  wide_int selector = wi::to_wide (arg2);
+	  selector = wi::umod_trunc (selector, 2);
+	  tree call = NULL_TREE;
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  tree call = rs6000_builtin_decls_x[RS6000_BIF_VEC_SET_V1TI];
+	  wide_int selector = wi::zero(32);
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0 with
+	 VIEW_CONVERT_EXPR.  i.e.:
+	 D.3192 = v1;
+	 _1 = n & 3;
+	 VIEW_CONVERT_EXPR<int[4]>(D.3192)[_1] = i;
+	 v1 = D.3192;
+	 D.3194 = v1;  */
+      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+      else
+	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+				build_int_cst (TREE_TYPE (arg2),
+					       TYPE_VECTOR_SUBPARTS (arg1_type)
+					       - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      TREE_ADDRESSABLE (decl) = 1;
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      if (TARGET_VSX)
+	{
+	  stmt = build_array_ref (loc, stmt, arg2);
+	  stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
+			      convert (TREE_TYPE (stmt), arg0));
+	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+	}
+      else
+	{
+	  tree arg1_inner_type;
+	  tree innerptrtype;
+	  arg1_inner_type = TREE_TYPE (arg1_type);
+	  innerptrtype = build_pointer_type (arg1_inner_type);
+
+	  stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+	  stmt = convert (innerptrtype, stmt);
+	  stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+	  stmt = build_indirect_ref (loc, stmt, RO_NULL);
+	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+			 convert (TREE_TYPE (stmt), arg0));
+	  stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+	}
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= MAX_OVLD_ARGS)
+	abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+	  type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+	  if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing argument %d of %qE discards qualifiers from "
+		     "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      /* For RS6000_OVLD_VEC_LXVL, convert any const * to its non constant
+	 equivalent to simplify the overload matching below.  */
+      if (fcode == RS6000_OVLD_VEC_LXVL)
+	{
+	  if (POINTER_TYPE_P (type)
+	      && TYPE_READONLY (TREE_TYPE (type)))
+	    {
+	      type = build_pointer_type (build_qualified_type (
+						TREE_TYPE (type),0));
+	      arg = fold_convert (type, arg);
+	    }
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (fcode == RS6000_OVLD_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  {
+    bool unsupported_builtin = false;
+    enum rs6000_gen_builtins overloaded_code;
+    bool supported = false;
+    ovlddata *instance = rs6000_overload_info[adj_fcode].first_instance;
+    gcc_assert (instance != NULL);
+
+    /* Need to special case __builtin_cmpb because the overloaded forms
+       of this function take (unsigned int, unsigned int) or (unsigned
+       long long int, unsigned long long int).  Since C conventions
+       allow the respective argument types to be implicitly coerced into
+       each other, the default handling does not provide adequate
+       discrimination between the desired forms of the function.  */
+    if (fcode == RS6000_OVLD_SCAL_CMPB)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+	machine_mode arg2_mode = TYPE_MODE (types[1]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
+	    return error_mark_node;
+	  }
+
+	/* If any supplied arguments are wider than 32 bits, resolve to
+	   64-bit variant of built-in function.  */
+	if ((GET_MODE_PRECISION (arg1_mode) > 32)
+	    || (GET_MODE_PRECISION (arg2_mode) > 32))
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by RS6000_BIF_CMPB.  */
+	    overloaded_code = RS6000_BIF_CMPB;
+	  }
+	else
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by RS6000_BIF_CMPB_32.  */
+	    overloaded_code = RS6000_BIF_CMPB_32;
+	  }
+
+	while (instance && instance->bifid != overloaded_code)
+	  instance = instance->next;
+
+	gcc_assert (instance != NULL);
+	tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
+	tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype));
+	tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype)));
+
+	if (rs6000_new_builtin_type_compatible (types[0], parmtype0)
+	    && rs6000_new_builtin_type_compatible (types[1], parmtype1))
+	  {
+	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
+		&& rs6000_new_builtin_is_supported (instance->bifid))
+	      {
+		tree ret_type = TREE_TYPE (instance->fntype);
+		return altivec_build_new_resolved_builtin (args, n, fntype,
+							   ret_type,
+							   instance->bifid,
+							   fcode);
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if (fcode == RS6000_OVLD_VEC_VSIE)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments",
+		   "scalar_insert_exp");
+	    return error_mark_node;
+	  }
+
+	/* If supplied first argument is wider than 64 bits, resolve to
+	   128-bit variant of built-in function.  */
+	if (GET_MODE_PRECISION (arg1_mode) > 64)
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects __ieee128 argument.  Otherwise, expect
+	       __int128 argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = RS6000_BIF_VSIEQPF;
+	    else
+	      overloaded_code = RS6000_BIF_VSIEQP;
+	  }
+	else
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects double argument.  Otherwise, expect
+	       long long int argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = RS6000_BIF_VSIEDPF;
+	    else
+	      overloaded_code = RS6000_BIF_VSIEDP;
+	  }
+
+	while (instance && instance->bifid != overloaded_code)
+	  instance = instance->next;
+
+	gcc_assert (instance != NULL);
+	tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
+	tree parmtype0 = TREE_VALUE (TYPE_ARG_TYPES (fntype));
+	tree parmtype1 = TREE_VALUE (TREE_CHAIN (TYPE_ARG_TYPES (fntype)));
+
+	if (rs6000_new_builtin_type_compatible (types[0], parmtype0)
+	    && rs6000_new_builtin_type_compatible (types[1], parmtype1))
+	  {
+	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
+		&& rs6000_new_builtin_is_supported (instance->bifid))
+	      {
+		tree ret_type = TREE_TYPE (instance->fntype);
+		return altivec_build_new_resolved_builtin (args, n, fntype,
+							   ret_type,
+							   instance->bifid,
+							   fcode);
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else
+      {
+	/* Functions with no arguments can have only one overloaded
+	   instance.  */
+	gcc_assert (n > 0 || !instance->next);
+
+	for (; instance != NULL; instance = instance->next)
+	  {
+	    bool mismatch = false;
+	    tree nextparm = TYPE_ARG_TYPES (instance->fntype);
+
+	    for (unsigned int arg_i = 0;
+		 arg_i < nargs && nextparm != NULL;
+		 arg_i++)
+	      {
+		tree parmtype = TREE_VALUE (nextparm);
+		if (!rs6000_new_builtin_type_compatible (types[arg_i],
+							 parmtype))
+		  {
+		    mismatch = true;
+		    break;
+		  }
+		nextparm = TREE_CHAIN (nextparm);
+	      }
+
+	    if (mismatch)
+	      continue;
+
+	    supported = rs6000_new_builtin_is_supported (instance->bifid);
+	    if (rs6000_builtin_decl (instance->bifid, false) != error_mark_node
+		&& supported)
+	      {
+		tree fntype = rs6000_builtin_info_x[instance->bifid].fntype;
+		tree ret_type = TREE_TYPE (instance->fntype);
+		return altivec_build_new_resolved_builtin (args, n, fntype,
+							   ret_type,
+							   instance->bifid,
+							   fcode);
+	      }
+	    else
+	      {
+		unsupported_builtin = true;
+		break;
+	      }
+	  }
+      }
+
+    if (unsupported_builtin)
+      {
+	const char *name = rs6000_overload_info[adj_fcode].ovld_name;
+	if (!supported)
+	  {
+	    const char *internal_name
+	      = rs6000_builtin_info_x[instance->bifid].bifname;
+	    /* An error message making reference to the name of the
+	       non-overloaded function has already been issued.  Add
+	       clarification of the previous message.  */
+	    rich_location richloc (line_table, input_location);
+	    inform (&richloc, "builtin %qs requires builtin %qs",
+		    name, internal_name);
+	  }
+	else
+	  error ("%qs is not supported in this compiler configuration", name);
+	/* If an error-representing  result tree was returned from
+	   altivec_build_resolved_builtin above, use it.  */
+	/*
+	return (result != NULL) ? result : error_mark_node;
+	*/
+	return error_mark_node;
+      }
+  }
+ bad:
+  {
+    const char *name = rs6000_overload_info[adj_fcode].ovld_name;
+    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
+    return error_mark_node;
+  }
+}
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index e8625d17d18..2c68aa3580c 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12971,6 +12971,59 @@  rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return false;
 }
 
+/* Check whether a builtin function is supported in this target
+   configuration.  */
+bool
+rs6000_new_builtin_is_supported (enum rs6000_gen_builtins fncode)
+{
+  switch (rs6000_builtin_info_x[(size_t) fncode].enable)
+    {
+    default:
+      gcc_unreachable ();
+    case ENB_ALWAYS:
+      return true;
+    case ENB_P5:
+      return TARGET_POPCNTB;
+    case ENB_P6:
+      return TARGET_CMPB;
+    case ENB_ALTIVEC:
+      return TARGET_ALTIVEC;
+    case ENB_CELL:
+      return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL;
+    case ENB_VSX:
+      return TARGET_VSX;
+    case ENB_P7:
+      return TARGET_POPCNTD;
+    case ENB_P7_64:
+      return TARGET_POPCNTD && TARGET_POWERPC64;
+    case ENB_P8:
+      return TARGET_DIRECT_MOVE;
+    case ENB_P8V:
+      return TARGET_P8_VECTOR;
+    case ENB_P9:
+      return TARGET_MODULO;
+    case ENB_P9_64:
+      return TARGET_MODULO && TARGET_POWERPC64;
+    case ENB_P9V:
+      return TARGET_P9_VECTOR;
+    case ENB_IEEE128_HW:
+      return TARGET_FLOAT128_HW;
+    case ENB_DFP:
+      return TARGET_DFP;
+    case ENB_CRYPTO:
+      return TARGET_CRYPTO;
+    case ENB_HTM:
+      return TARGET_HTM;
+    case ENB_P10:
+      return TARGET_POWER10;
+    case ENB_P10_64:
+      return TARGET_POWER10 && TARGET_POWERPC64;
+    case ENB_MMA:
+      return TARGET_MMA;
+    }
+  gcc_unreachable ();
+}
+
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
    (and in mode MODE if that's convenient).
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index f3d6156400a..f65932e1cd5 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2314,7 +2314,7 @@  write_decls (void)
 
   fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n");
   fprintf (header_file,
-	   "extern bool rs6000_new_builtin_is_supported_p "
+	   "extern bool rs6000_new_builtin_is_supported "
 	   "(rs6000_gen_builtins);\n");
   fprintf (header_file,
 	   "extern tree rs6000_builtin_decl (unsigned, "