diff mbox series

[v2,2/2] aarch64: Improve part-variable vector initialization with SVE INDEX instruction [PR113328]

Message ID 20240912213120.17158-1-quic_pzheng@quicinc.com
State New
Headers show
Series None | expand

Commit Message

Pengxuan Zheng Sept. 12, 2024, 9:31 p.m. UTC
We can still use SVE's INDEX instruction to construct vectors even if not all
elements are constants. For example, { 0, x, 2, 3 } can be constructed by first
using "INDEX #0, #1" to generate { 0, 1, 2, 3 }, and then set the elements which
are non-constants separately.

	PR target/113328

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback):
	Improve part-variable vector generation with SVE's INDEX if TARGET_SVE
	is available.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
	check-function-bodies.
	* gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
	* gcc.target/aarch64/sve/vec_init_4.c: New test.
	* gcc.target/aarch64/sve/vec_init_5.c: New test.

Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
---
 gcc/config/aarch64/aarch64.cc                 | 81 ++++++++++++++++++-
 .../aarch64/sve/acle/general/dupq_1.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_2.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_3.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_4.c         | 18 ++++-
 .../gcc.target/aarch64/sve/vec_init_4.c       | 47 +++++++++++
 .../gcc.target/aarch64/sve/vec_init_5.c       | 12 +++
 7 files changed, 199 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c

Comments

Richard Sandiford Sept. 17, 2024, 12:50 p.m. UTC | #1
Pengxuan Zheng <quic_pzheng@quicinc.com> writes:
> We can still use SVE's INDEX instruction to construct vectors even if not all
> elements are constants. For example, { 0, x, 2, 3 } can be constructed by first
> using "INDEX #0, #1" to generate { 0, 1, 2, 3 }, and then set the elements which
> are non-constants separately.
>
> 	PR target/113328
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback):
> 	Improve part-variable vector generation with SVE's INDEX if TARGET_SVE
> 	is available.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
> 	check-function-bodies.
> 	* gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
> 	* gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
> 	* gcc.target/aarch64/sve/vec_init_4.c: New test.
> 	* gcc.target/aarch64/sve/vec_init_5.c: New test.
>
> Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
> ---
>  gcc/config/aarch64/aarch64.cc                 | 81 ++++++++++++++++++-
>  .../aarch64/sve/acle/general/dupq_1.c         | 18 ++++-
>  .../aarch64/sve/acle/general/dupq_2.c         | 18 ++++-
>  .../aarch64/sve/acle/general/dupq_3.c         | 18 ++++-
>  .../aarch64/sve/acle/general/dupq_4.c         | 18 ++++-
>  .../gcc.target/aarch64/sve/vec_init_4.c       | 47 +++++++++++
>  .../gcc.target/aarch64/sve/vec_init_5.c       | 12 +++
>  7 files changed, 199 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 6b3ca57d0eb..7305a5c6375 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23942,12 +23942,91 @@ aarch64_expand_vector_init_fallback (rtx target, rtx vals)
>    if (n_var != n_elts)
>      {
>        rtx copy = copy_rtx (vals);
> +      bool is_index_seq = false;
> +
> +      /* If at least half of the elements of the vector are constants and all
> +	 these constant elements form a linear sequence of the form { B, B + S,
> +	 B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
> +	 INDEX instruction if SVE is available and then set the elements which
> +	 are not constant separately.  More precisely, each constant element I
> +	 has to be B + I * S where B and S must be valid immediate operand for
> +	 an SVE INDEX instruction.
> +
> +	 For example, { X, 1, 2, 3} is a vector satisfying these conditions and
> +	 we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
> +	 and then set the first element of the vector to X.  */
> +
> +      if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> +	  && n_var <= n_elts / 2)
> +	{
> +	  int const_idx = -1;
> +	  HOST_WIDE_INT const_val = 0;
> +	  int base = 16;
> +	  int step = 16;
> +
> +	  for (int i = 0; i < n_elts; ++i)
> +	    {
> +	      rtx x = XVECEXP (vals, 0, i);
> +
> +	      if (!CONST_INT_P (x))
> +		continue;
> +
> +	      if (const_idx == -1)
> +		{
> +		  const_idx = i;
> +		  const_val = INTVAL (x);
> +		}
> +	      else
> +		{
> +		  if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
> +		    {
> +		      HOST_WIDE_INT s
> +			  = (INTVAL (x) - const_val) / (i - const_idx);
> +		      if (s >= -16 && s <= 15)
> +			{
> +			  int b = const_val - s * const_idx;
> +			  if (b >= -16 && b <= 15)
> +			    {
> +			      base = b;
> +			      step = s;
> +			    }
> +			}
> +		    }
> +		  break;
> +		}
> +	    }
> +
> +	  if (base != 16
> +	      && (!CONST_INT_P (v0)
> +		  || (CONST_INT_P (v0) && INTVAL (v0) == base)))
> +	    {
> +	      if (!CONST_INT_P (v0))
> +		XVECEXP (copy, 0, 0) = GEN_INT (base);
> +
> +	      is_index_seq = true;
> +	      for (int i = 1; i < n_elts; ++i)
> +		{
> +		  rtx x = XVECEXP (copy, 0, i);
> +
> +		  if (CONST_INT_P (x))
> +		    {
> +		      if (INTVAL (x) != base + i * step)
> +			{
> +			  is_index_seq = false;
> +			  break;
> +			}
> +		    }
> +		  else
> +		    XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
> +		}
> +	    }
> +	}

This seems a bit more complex than I was hoping for, although the
complexity is probably justified.

Seeing how awkard it is to do this using current interfaces, I think
I'd instead prefer to do something that I'd been vaguely hoping to do
for a while: extend vector-builder.h to accept wildcard/don't care values.
finalize () could then replace the wildcards with whatever gives the
"nicest" encoding.

That's also going to be relatively complex, but I think it'd be more
general, and might help with the existing vec_init code as well.
It would also be a step towards optimising -1 indices for
__builtin_shufflevector.  It might be a few weeks before I can post
something though.

Pushing 1/2 without 2/2 has meant that the dupq tests will fail in the
meantime, but that's ok.  In general, though, it's better not to push
individual patches from a series unless they've been tested in isolation
and are known to give clean test results.

Thanks,
Richard
Pengxuan Zheng Sept. 17, 2024, 4:40 p.m. UTC | #2
> Pengxuan Zheng <quic_pzheng@quicinc.com> writes:
> > We can still use SVE's INDEX instruction to construct vectors even if
> > not all elements are constants. For example, { 0, x, 2, 3 } can be
> > constructed by first using "INDEX #0, #1" to generate { 0, 1, 2, 3 },
> > and then set the elements which are non-constants separately.
> >
> > 	PR target/113328
> >
> > gcc/ChangeLog:
> >
> > 	* config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback):
> > 	Improve part-variable vector generation with SVE's INDEX if
> TARGET_SVE
> > 	is available.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	* gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
> > 	check-function-bodies.
> > 	* gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
> > 	* gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
> > 	* gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
> > 	* gcc.target/aarch64/sve/vec_init_4.c: New test.
> > 	* gcc.target/aarch64/sve/vec_init_5.c: New test.
> >
> > Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
> > ---
> >  gcc/config/aarch64/aarch64.cc                 | 81 ++++++++++++++++++-
> >  .../aarch64/sve/acle/general/dupq_1.c         | 18 ++++-
> >  .../aarch64/sve/acle/general/dupq_2.c         | 18 ++++-
> >  .../aarch64/sve/acle/general/dupq_3.c         | 18 ++++-
> >  .../aarch64/sve/acle/general/dupq_4.c         | 18 ++++-
> >  .../gcc.target/aarch64/sve/vec_init_4.c       | 47 +++++++++++
> >  .../gcc.target/aarch64/sve/vec_init_5.c       | 12 +++
> >  7 files changed, 199 insertions(+), 13 deletions(-)  create mode
> > 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
> >  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> >
> > diff --git a/gcc/config/aarch64/aarch64.cc
> > b/gcc/config/aarch64/aarch64.cc index 6b3ca57d0eb..7305a5c6375 100644
> > --- a/gcc/config/aarch64/aarch64.cc
> > +++ b/gcc/config/aarch64/aarch64.cc
> > @@ -23942,12 +23942,91 @@ aarch64_expand_vector_init_fallback (rtx
> target, rtx vals)
> >    if (n_var != n_elts)
> >      {
> >        rtx copy = copy_rtx (vals);
> > +      bool is_index_seq = false;
> > +
> > +      /* If at least half of the elements of the vector are constants and all
> > +	 these constant elements form a linear sequence of the form { B, B +
> S,
> > +	 B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
> > +	 INDEX instruction if SVE is available and then set the elements which
> > +	 are not constant separately.  More precisely, each constant element I
> > +	 has to be B + I * S where B and S must be valid immediate operand
> for
> > +	 an SVE INDEX instruction.
> > +
> > +	 For example, { X, 1, 2, 3} is a vector satisfying these conditions and
> > +	 we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
> > +	 and then set the first element of the vector to X.  */
> > +
> > +      if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> > +	  && n_var <= n_elts / 2)
> > +	{
> > +	  int const_idx = -1;
> > +	  HOST_WIDE_INT const_val = 0;
> > +	  int base = 16;
> > +	  int step = 16;
> > +
> > +	  for (int i = 0; i < n_elts; ++i)
> > +	    {
> > +	      rtx x = XVECEXP (vals, 0, i);
> > +
> > +	      if (!CONST_INT_P (x))
> > +		continue;
> > +
> > +	      if (const_idx == -1)
> > +		{
> > +		  const_idx = i;
> > +		  const_val = INTVAL (x);
> > +		}
> > +	      else
> > +		{
> > +		  if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
> > +		    {
> > +		      HOST_WIDE_INT s
> > +			  = (INTVAL (x) - const_val) / (i - const_idx);
> > +		      if (s >= -16 && s <= 15)
> > +			{
> > +			  int b = const_val - s * const_idx;
> > +			  if (b >= -16 && b <= 15)
> > +			    {
> > +			      base = b;
> > +			      step = s;
> > +			    }
> > +			}
> > +		    }
> > +		  break;
> > +		}
> > +	    }
> > +
> > +	  if (base != 16
> > +	      && (!CONST_INT_P (v0)
> > +		  || (CONST_INT_P (v0) && INTVAL (v0) == base)))
> > +	    {
> > +	      if (!CONST_INT_P (v0))
> > +		XVECEXP (copy, 0, 0) = GEN_INT (base);
> > +
> > +	      is_index_seq = true;
> > +	      for (int i = 1; i < n_elts; ++i)
> > +		{
> > +		  rtx x = XVECEXP (copy, 0, i);
> > +
> > +		  if (CONST_INT_P (x))
> > +		    {
> > +		      if (INTVAL (x) != base + i * step)
> > +			{
> > +			  is_index_seq = false;
> > +			  break;
> > +			}
> > +		    }
> > +		  else
> > +		    XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
> > +		}
> > +	    }
> > +	}
> 
> This seems a bit more complex than I was hoping for, although the complexity
> is probably justified.
> 
> Seeing how awkard it is to do this using current interfaces, I think I'd instead
> prefer to do something that I'd been vaguely hoping to do for a while: extend
> vector-builder.h to accept wildcard/don't care values.
> finalize () could then replace the wildcards with whatever gives the "nicest"
> encoding.
> 
> That's also going to be relatively complex, but I think it'd be more general, and
> might help with the existing vec_init code as well.
> It would also be a step towards optimising -1 indices for
> __builtin_shufflevector.  It might be a few weeks before I can post something
> though.

No problem, Richard.

I am also curious to see what this alternative implementation looks like. Please kindly keep me posted when your patch is ready. Thank you!

> 
> Pushing 1/2 without 2/2 has meant that the dupq tests will fail in the
> meantime, but that's ok.  In general, though, it's better not to push individual
> patches from a series unless they've been tested in isolation and are known
> to give clean test results.

In fact, the dupq tests were not affected. Patch 1/2 already adjusted the "scan-assembler" checks of the dupq tests based on the output of 1/2 alone. Patch 2/2 just replaces the "scan-assembler" checks with "check-function-bodies." So, the dupq tests still pass without 2/2.

Thanks,
Pengxuan
> 
> Thanks,
> Richard
Pengxuan Zheng Sept. 18, 2024, 7:47 p.m. UTC | #3
> > Pengxuan Zheng <quic_pzheng@quicinc.com> writes:
> > > We can still use SVE's INDEX instruction to construct vectors even
> > > if not all elements are constants. For example, { 0, x, 2, 3 } can
> > > be constructed by first using "INDEX #0, #1" to generate { 0, 1, 2,
> > > 3 }, and then set the elements which are non-constants separately.
> > >
> > > 	PR target/113328
> > >
> > > gcc/ChangeLog:
> > >
> > > 	* config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback):
> > > 	Improve part-variable vector generation with SVE's INDEX if
> > TARGET_SVE
> > > 	is available.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > 	* gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
> > > 	check-function-bodies.
> > > 	* gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
> > > 	* gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
> > > 	* gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
> > > 	* gcc.target/aarch64/sve/vec_init_4.c: New test.
> > > 	* gcc.target/aarch64/sve/vec_init_5.c: New test.
> > >
> > > Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
> > > ---
> > >  gcc/config/aarch64/aarch64.cc                 | 81 ++++++++++++++++++-
> > >  .../aarch64/sve/acle/general/dupq_1.c         | 18 ++++-
> > >  .../aarch64/sve/acle/general/dupq_2.c         | 18 ++++-
> > >  .../aarch64/sve/acle/general/dupq_3.c         | 18 ++++-
> > >  .../aarch64/sve/acle/general/dupq_4.c         | 18 ++++-
> > >  .../gcc.target/aarch64/sve/vec_init_4.c       | 47 +++++++++++
> > >  .../gcc.target/aarch64/sve/vec_init_5.c       | 12 +++
> > >  7 files changed, 199 insertions(+), 13 deletions(-)  create mode
> > > 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
> > >  create mode 100644
> > > gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
> > >
> > > diff --git a/gcc/config/aarch64/aarch64.cc
> > > b/gcc/config/aarch64/aarch64.cc index 6b3ca57d0eb..7305a5c6375
> > > 100644
> > > --- a/gcc/config/aarch64/aarch64.cc
> > > +++ b/gcc/config/aarch64/aarch64.cc
> > > @@ -23942,12 +23942,91 @@ aarch64_expand_vector_init_fallback (rtx
> > target, rtx vals)
> > >    if (n_var != n_elts)
> > >      {
> > >        rtx copy = copy_rtx (vals);
> > > +      bool is_index_seq = false;
> > > +
> > > +      /* If at least half of the elements of the vector are constants and all
> > > +	 these constant elements form a linear sequence of the form { B, B
> > > ++
> > S,
> > > +	 B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
> > > +	 INDEX instruction if SVE is available and then set the elements which
> > > +	 are not constant separately.  More precisely, each constant element I
> > > +	 has to be B + I * S where B and S must be valid immediate operand
> > for
> > > +	 an SVE INDEX instruction.
> > > +
> > > +	 For example, { X, 1, 2, 3} is a vector satisfying these conditions and
> > > +	 we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
> > > +	 and then set the first element of the vector to X.  */
> > > +
> > > +      if (TARGET_SVE && GET_MODE_CLASS (mode) ==
> MODE_VECTOR_INT
> > > +	  && n_var <= n_elts / 2)
> > > +	{
> > > +	  int const_idx = -1;
> > > +	  HOST_WIDE_INT const_val = 0;
> > > +	  int base = 16;
> > > +	  int step = 16;
> > > +
> > > +	  for (int i = 0; i < n_elts; ++i)
> > > +	    {
> > > +	      rtx x = XVECEXP (vals, 0, i);
> > > +
> > > +	      if (!CONST_INT_P (x))
> > > +		continue;
> > > +
> > > +	      if (const_idx == -1)
> > > +		{
> > > +		  const_idx = i;
> > > +		  const_val = INTVAL (x);
> > > +		}
> > > +	      else
> > > +		{
> > > +		  if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
> > > +		    {
> > > +		      HOST_WIDE_INT s
> > > +			  = (INTVAL (x) - const_val) / (i - const_idx);
> > > +		      if (s >= -16 && s <= 15)
> > > +			{
> > > +			  int b = const_val - s * const_idx;
> > > +			  if (b >= -16 && b <= 15)
> > > +			    {
> > > +			      base = b;
> > > +			      step = s;
> > > +			    }
> > > +			}
> > > +		    }
> > > +		  break;
> > > +		}
> > > +	    }
> > > +
> > > +	  if (base != 16
> > > +	      && (!CONST_INT_P (v0)
> > > +		  || (CONST_INT_P (v0) && INTVAL (v0) == base)))
> > > +	    {
> > > +	      if (!CONST_INT_P (v0))
> > > +		XVECEXP (copy, 0, 0) = GEN_INT (base);
> > > +
> > > +	      is_index_seq = true;
> > > +	      for (int i = 1; i < n_elts; ++i)
> > > +		{
> > > +		  rtx x = XVECEXP (copy, 0, i);
> > > +
> > > +		  if (CONST_INT_P (x))
> > > +		    {
> > > +		      if (INTVAL (x) != base + i * step)
> > > +			{
> > > +			  is_index_seq = false;
> > > +			  break;
> > > +			}
> > > +		    }
> > > +		  else
> > > +		    XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
> > > +		}
> > > +	    }
> > > +	}
> >
> > This seems a bit more complex than I was hoping for, although the
> > complexity is probably justified.
> >
> > Seeing how awkard it is to do this using current interfaces, I think
> > I'd instead prefer to do something that I'd been vaguely hoping to do
> > for a while: extend vector-builder.h to accept wildcard/don't care values.
> > finalize () could then replace the wildcards with whatever gives the "nicest"
> > encoding.
> >
> > That's also going to be relatively complex, but I think it'd be more
> > general, and might help with the existing vec_init code as well.
> > It would also be a step towards optimising -1 indices for
> > __builtin_shufflevector.  It might be a few weeks before I can post
> > something though.
> 
> No problem, Richard.
> 
> I am also curious to see what this alternative implementation looks like.
> Please kindly keep me posted when your patch is ready. Thank you!
> 
> >
> > Pushing 1/2 without 2/2 has meant that the dupq tests will fail in the
> > meantime, but that's ok.  In general, though, it's better not to push
> > individual patches from a series unless they've been tested in
> > isolation and are known to give clean test results.
> 
> In fact, the dupq tests were not affected. Patch 1/2 already adjusted the
> "scan-assembler" checks of the dupq tests based on the output of 1/2 alone.
> Patch 2/2 just replaces the "scan-assembler" checks with "check-function-
> bodies." So, the dupq tests still pass without 2/2.

Just realized that I got confused on what 1/2 does. You are right. The dupq tests will fail for now.

Again, sorry for the confusions caused. 😊

Thanks,
Pengxuan
> 
> Thanks,
> Pengxuan
> >
> > Thanks,
> > Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6b3ca57d0eb..7305a5c6375 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23942,12 +23942,91 @@  aarch64_expand_vector_init_fallback (rtx target, rtx vals)
   if (n_var != n_elts)
     {
       rtx copy = copy_rtx (vals);
+      bool is_index_seq = false;
+
+      /* If at least half of the elements of the vector are constants and all
+	 these constant elements form a linear sequence of the form { B, B + S,
+	 B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
+	 INDEX instruction if SVE is available and then set the elements which
+	 are not constant separately.  More precisely, each constant element I
+	 has to be B + I * S where B and S must be valid immediate operand for
+	 an SVE INDEX instruction.
+
+	 For example, { X, 1, 2, 3} is a vector satisfying these conditions and
+	 we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
+	 and then set the first element of the vector to X.  */
+
+      if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	  && n_var <= n_elts / 2)
+	{
+	  int const_idx = -1;
+	  HOST_WIDE_INT const_val = 0;
+	  int base = 16;
+	  int step = 16;
+
+	  for (int i = 0; i < n_elts; ++i)
+	    {
+	      rtx x = XVECEXP (vals, 0, i);
+
+	      if (!CONST_INT_P (x))
+		continue;
+
+	      if (const_idx == -1)
+		{
+		  const_idx = i;
+		  const_val = INTVAL (x);
+		}
+	      else
+		{
+		  if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
+		    {
+		      HOST_WIDE_INT s
+			  = (INTVAL (x) - const_val) / (i - const_idx);
+		      if (s >= -16 && s <= 15)
+			{
+			  int b = const_val - s * const_idx;
+			  if (b >= -16 && b <= 15)
+			    {
+			      base = b;
+			      step = s;
+			    }
+			}
+		    }
+		  break;
+		}
+	    }
+
+	  if (base != 16
+	      && (!CONST_INT_P (v0)
+		  || (CONST_INT_P (v0) && INTVAL (v0) == base)))
+	    {
+	      if (!CONST_INT_P (v0))
+		XVECEXP (copy, 0, 0) = GEN_INT (base);
+
+	      is_index_seq = true;
+	      for (int i = 1; i < n_elts; ++i)
+		{
+		  rtx x = XVECEXP (copy, 0, i);
+
+		  if (CONST_INT_P (x))
+		    {
+		      if (INTVAL (x) != base + i * step)
+			{
+			  is_index_seq = false;
+			  break;
+			}
+		    }
+		  else
+		    XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
+		}
+	    }
+	}
 
       /* Load constant part of vector.  We really don't care what goes into the
 	 parts we will overwrite, but we're more likely to be able to load the
 	 constant efficiently if it has fewer, larger, repeating parts
 	 (see aarch64_simd_valid_immediate).  */
-      for (int i = 0; i < n_elts; i++)
+      for (int i = 0; !is_index_seq && i < n_elts; i++)
 	{
 	  rtx x = XVECEXP (vals, 0, i);
 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
index 0940bedd0dd..80eb1efdc66 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
@@ -1,15 +1,27 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
 /* { dg-require-effective-target aarch64_little_endian } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 #include <arm_sve.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**	index	z0\.s, #0, #1
+**	ins	v0\.s\[0\], w0
+**	dup	z0\.q, z0\.q\[0\]
+**	ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index 218a6601337..afcad0a691e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -1,15 +1,27 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**	index	z0\.s, #3, #-1
+**	ins	v0\.s\[0\], w0
+**	dup	z0\.q, z0\.q\[0\]
+**	ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
index 245d43b75b5..f912f4b905c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
@@ -1,15 +1,27 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**	index	z0\.s, #0, #1
+**	ins	v0\.s\[2\], w0
+**	dup	z0\.q, z0\.q\[0\]
+**	ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index cbee6f27b62..0cfdb23101b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -1,15 +1,27 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**	index	z0\.s, #3, #-1
+**	ins	v0\.s\[2\], w0
+**	dup	z0\.q, z0\.q\[0\]
+**	ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
new file mode 100644
index 00000000000..898168dc8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long v2di __attribute__((vector_size(16)));
+
+/*
+** f:
+**	index	z0\.s, #0, #1
+**	ins	v0\.s\[1\], w0
+**	ret
+*/
+v4si
+f (int x)
+{
+  return (v4si){ 0, x, 2, 3 };
+}
+
+/*
+** f1:
+**	index	z0\.s, #3, #-4
+**	ins	v0\.s\[1\], w0
+**	ins	v0\.s\[2\], w1
+**	ret
+*/
+v4si
+f1 (int x, int y)
+{
+  return (v4si){ 3, x, y, -9 };
+}
+
+/*
+** f2:
+**	index	z0\.h, #4, #2
+**	ins	v0\.h\[0\], w0
+**	ins	v0\.h\[3\], w1
+**	ins	v0\.h\[7\], w2
+**	ret
+*/
+v8hi
+f2 (short x, short y, short z)
+{
+  return (v8hi){ x, 6, 8, y, 12, 14, 16, z };
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
new file mode 100644
index 00000000000..e4a71736f5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si
+f (int x, int y)
+{
+  return (v4si){ 1, x, y, 3 };
+}
+
+/* { dg-final { scan-assembler-not {index} } } */