diff mbox series

[RFC,4/5] vect: Ensure we add vector skip guard even when versioning for aliasing

Message ID Zx+l76/BJRXu5X0I@arm.com
State New
Headers show
Series vect: Force peeling for alignment to handle more early break loops | expand

Commit Message

Alex Coplan Oct. 28, 2024, 2:55 p.m. UTC
This fixes a latent wrong code issue whereby vect_do_peeling determined
the wrong condition for inserting the vector skip guard.  Specifically
in the case where the loop niters are unknown at compile time we used to
check:

  !LOOP_REQUIRES_VERSIONING (loop_vinfo)

but LOOP_REQUIRES_VERSIONING is true for loops which we have versioned
for aliasing, and that has nothing to do with prolog peeling.  I think
this condition should instead be checking specifically if we aren't
versioning for alignment.

As it stands, when we version for alignment, we don't peel, so the
vector skip guard is indeed redundant in that case.

With the testcase added (reduced from the Fortran frontend) we would
version for aliasing, omit the vector skip guard, and then at runtime we
would peel sufficient iterations for alignment that there wasn't a full
vector iteration left when we entered the vector body, thus overflowing
the output buffer.

gcc/ChangeLog:

	* tree-vect-loop-manip.cc (vect_do_peeling): Adjust skip_vector
	condition to only omit the edge if we're versioning for
	alignment.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/vect-early-break_130.c: New test.
---
 .../gcc.dg/vect/vect-early-break_130.c        | 91 +++++++++++++++++++
 gcc/tree-vect-loop-manip.cc                   |  2 +-
 2 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_130.c

Comments

Richard Biener Oct. 29, 2024, 12:43 p.m. UTC | #1
On Mon, 28 Oct 2024, Alex Coplan wrote:

> This fixes a latent wrong code issue whereby vect_do_peeling determined
> the wrong condition for inserting the vector skip guard.  Specifically
> in the case where the loop niters are unknown at compile time we used to
> check:
> 
>   !LOOP_REQUIRES_VERSIONING (loop_vinfo)
> 
> but LOOP_REQUIRES_VERSIONING is true for loops which we have versioned
> for aliasing, and that has nothing to do with prolog peeling.  I think
> this condition should instead be checking specifically if we aren't
> versioning for alignment.
> 
> As it stands, when we version for alignment, we don't peel, so the
> vector skip guard is indeed redundant in that case.
> 
> With the testcase added (reduced from the Fortran frontend) we would
> version for aliasing, omit the vector skip guard, and then at runtime we
> would peel sufficient iterations for alignment that there wasn't a full
> vector iteration left when we entered the vector body, thus overflowing
> the output buffer.

OK.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> 	* tree-vect-loop-manip.cc (vect_do_peeling): Adjust skip_vector
> 	condition to only omit the edge if we're versioning for
> 	alignment.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/vect/vect-early-break_130.c: New test.
> ---
>  .../gcc.dg/vect/vect-early-break_130.c        | 91 +++++++++++++++++++
>  gcc/tree-vect-loop-manip.cc                   |  2 +-
>  2 files changed, 92 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_130.c
> 
>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c
new file mode 100644
index 00000000000..ce43fcd5681
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_130.c
@@ -0,0 +1,91 @@ 
+/* { dg-require-effective-target mmap } */
+/* { dg-add-options vect_early_break } */
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+/* This was reduced from gcc/fortran/scanner.cc:gfc_widechar_to_char.
+   The problem was that we omitted adding the vector skip guard when
+   versioning for aliasing.  When invoked on a string that is 28 bytes
+   long, that caused us to enter the vector body after having peeled 15
+   iterations, leaving only 13 iterations to be performed as vector, but
+   the vector body performs 16 (thus overflowing the res buffer by three
+   bytes).  */
+__attribute__((noipa))
+void f (const uint32_t *s, char *res, int length)
+{
+  unsigned long i;
+
+  for (i = 0; i < length; i++)
+    {
+      if (s[i] > 255)
+        __builtin_abort ();
+      res[i] = (char)s[i];
+    }
+}
+
+int main(void)
+{
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed: %m\n");
+    return 0;
+  }
+
+  void *p = mmap (NULL,
+      pgsz * 2,
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (p == MAP_FAILED) {
+    fprintf (stderr, "mmap failed: %m\n");
+    return 0;
+  }
+
+  if (mprotect (p + pgsz, pgsz, PROT_NONE)) {
+    fprintf (stderr, "mprotect failed: %m\n");
+    return 0;
+  }
+
+  uint32_t in[128];
+  memset (in, 0, sizeof(in));
+
+  uintptr_t x = (uintptr_t)in;
+
+  /* We want to make our input pointer maximally misaligned (so we have
+     to peel the greatest possible number of iterations for alignment).
+     We need two bits of alignment for our uint32_t pointer to be
+     aligned.  Assuming we process 16 chars per vector iteration, we
+     will need to load 16 uint32_ts, thus we need a further 4 bits of
+     alignment.  */
+  const uintptr_t align_bits = 2 + 4;
+  const uintptr_t align_p2 = (1 << align_bits);
+  const uintptr_t align_p2m1 = align_p2 - 1;
+
+  if (x & align_p2m1 <= 4)
+    x &= -align_p2; /* Round down.  */
+  else
+    x = (x + align_p2m1) & -align_p2; /* Round up.  */
+
+  /* Add one uint32_t to get maximally misaligned.  */
+  uint32_t *inp = (uint32_t *)x + 1;
+
+  const char *str = "dec-comparison-complex_1.f90";
+  long n;
+#pragma GCC novector
+  for (n = 0; str[n]; n++)
+    inp[n] = str[n];
+
+  if (n > pgsz)
+    __builtin_abort ();
+
+  char *buf = p + pgsz - n;
+  f (inp, buf, n);
+
+#pragma GCC novector
+  for (int i = 0; i < n; i++)
+    if (buf[i] != str[i])
+      __builtin_abort ();
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index cf3a90219b7..55761b61185 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3278,7 +3278,7 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 		      ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
 				  bound_prolog + bound_epilog)
-		      : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
+		      : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
 			 || vect_epilogues));
 
   /* Epilog loop must be executed if the number of iterations for epilog