Message ID | d5cad739-77d0-4712-ac59-ec5b853bd964@gmail.com |
---|---|
State | New |
Headers | show |
Series | RISC-V: Recognize stepped series in expand_vec_perm_const. | expand |
It's more reasonable to fix it in vec_perm_const instead of fix it in middle-end.
LGTM.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-09 21:18
To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zhong@rivai.ai
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Recognize stepped series in expand_vec_perm_const.
Hi,
we currently try to recognize various forms of stepped (const_vector)
sequence variants in expand_const_vector. Because of complications with
canonicalization and encoding it is easier to identify such patterns
in expand_vec_perm_const_1 already where perm.series_p () is available.
This patch introduces shuffle_series as new permutation pattern and
tries to recognize series like [base0 base1 base1 + step ...]. If such
a series is found the series is expanded by expand_vec_series and a
gather is emitted.
On top the patch fixes the step recognition in expand_const_vector
for stepped series where such a series would end up before.
This fixes several execution failures when running code compiled for a
scalable vector size of 128 on a target with vlen = 256 or higher.
The problem was only noticed there because the encoding for a reversed
[2 2]-element vector ("3 2 1 0") is { [1 2], [0 2], [1 4] }.
Some testcases that failed were:
vect-alias-check-18.c
vect-alias-check-1.F90
pr64365.c
On a 128-bit target, only the first two elements are used. The
third element causing the complications only comes into effect at
vlen = 256.
With this patch the testsuite results are similar with vlen = 128 and
vlen = 256 (when built with -march=rv64gcv_zvl128b).
Regards
Robin
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Fix step
calculation.
(modulo_sel_indices): Also perform modulo for variable-length
constants.
(shuffle_series): Recognize series permutations.
(expand_vec_perm_const_1): Add shuffle_series.
---
gcc/config/riscv/riscv-v.cc | 66 +++++++++++++++++++++++++++++++++++--
1 file changed, 64 insertions(+), 2 deletions(-)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9b99d0aca84..fd6ef0660a2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1378,12 +1378,15 @@ expand_const_vector (rtx target, rtx src)
{ base0, base1, base1 + step, base1 + step * 2, ... } */
rtx base0 = builder.elt (0);
rtx base1 = builder.elt (1);
- rtx step = builder.elt (2);
+ rtx base2 = builder.elt (2);
+
+ scalar_mode elem_mode = GET_MODE_INNER (mode);
+ rtx step = simplify_binary_operation (MINUS, elem_mode, base2, base1);
+
/* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */
rtx tmp = gen_reg_rtx (mode);
expand_vec_series (tmp, base1, step);
/* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... } */
- scalar_mode elem_mode = GET_MODE_INNER (mode);
if (!rtx_equal_p (base0, const0_rtx))
base0 = force_reg (elem_mode, base0);
@@ -3395,6 +3398,63 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d)
return true;
}
+static bool
+shuffle_series (struct expand_vec_perm_d *d)
+{
+ if (!d->one_vector_p || d->perm.encoding ().npatterns () != 1)
+ return false;
+
+ poly_int64 el1 = d->perm[0];
+ poly_int64 el2 = d->perm[1];
+ poly_int64 el3 = d->perm[2];
+
+ poly_int64 step1 = el2 - el1;
+ poly_int64 step2 = el3 - el2;
+
+ bool need_insert = false;
+ bool have_series = false;
+
+ /* Check for a full series. */
+ if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1))
+ have_series = true;
+
+ /* Check for a series starting at the second element. */
+ else if (known_ne (step2, 0) && d->perm.series_p (1, 1, el2, step2))
+ {
+ have_series = true;
+ need_insert = true;
+ }
+
+ if (!have_series)
+ return false;
+
+ /* Get a vector int-mode to be used for the permute selector. */
+ machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
+ insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode);
+
+ /* We need to be able to insert an element and shift the vector. */
+ if (need_insert && icode == CODE_FOR_nothing)
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ /* Create the series. */
+ machine_mode eltmode = Pmode;
+ rtx series = gen_reg_rtx (sel_mode);
+ expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode),
+ gen_int_mode (need_insert ? step2 : step1, eltmode));
+
+ /* Insert the remaining element if necessary. */
+ if (need_insert)
+ emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode)));
+
+ emit_vlmax_gather_insn (d->target, d->op0, series);
+
+ return true;
+}
+
/* Recognize the pattern that can be shuffled by generic approach. */
static bool
@@ -3475,6 +3535,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
if (shuffle_extract_and_slide1up_patterns (d))
return true;
+ if (shuffle_series (d))
+ return true;
if (shuffle_generic_patterns (d))
return true;
return false;
+ if (shuffle_series (d))
+ return true;
Could you rename it into shuffle_series_patterns ?
Just to make naming consistent.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-12-09 21:18
To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zhong@rivai.ai
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Recognize stepped series in expand_vec_perm_const.
Hi,
we currently try to recognize various forms of stepped (const_vector)
sequence variants in expand_const_vector. Because of complications with
canonicalization and encoding it is easier to identify such patterns
in expand_vec_perm_const_1 already where perm.series_p () is available.
This patch introduces shuffle_series as new permutation pattern and
tries to recognize series like [base0 base1 base1 + step ...]. If such
a series is found the series is expanded by expand_vec_series and a
gather is emitted.
On top the patch fixes the step recognition in expand_const_vector
for stepped series where such a series would end up before.
This fixes several execution failures when running code compiled for a
scalable vector size of 128 on a target with vlen = 256 or higher.
The problem was only noticed there because the encoding for a reversed
[2 2]-element vector ("3 2 1 0") is { [1 2], [0 2], [1 4] }.
Some testcases that failed were:
vect-alias-check-18.c
vect-alias-check-1.F90
pr64365.c
On a 128-bit target, only the first two elements are used. The
third element causing the complications only comes into effect at
vlen = 256.
With this patch the testsuite results are similar with vlen = 128 and
vlen = 256 (when built with -march=rv64gcv_zvl128b).
Regards
Robin
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Fix step
calculation.
(modulo_sel_indices): Also perform modulo for variable-length
constants.
(shuffle_series): Recognize series permutations.
(expand_vec_perm_const_1): Add shuffle_series.
---
gcc/config/riscv/riscv-v.cc | 66 +++++++++++++++++++++++++++++++++++--
1 file changed, 64 insertions(+), 2 deletions(-)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9b99d0aca84..fd6ef0660a2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1378,12 +1378,15 @@ expand_const_vector (rtx target, rtx src)
{ base0, base1, base1 + step, base1 + step * 2, ... } */
rtx base0 = builder.elt (0);
rtx base1 = builder.elt (1);
- rtx step = builder.elt (2);
+ rtx base2 = builder.elt (2);
+
+ scalar_mode elem_mode = GET_MODE_INNER (mode);
+ rtx step = simplify_binary_operation (MINUS, elem_mode, base2, base1);
+
/* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */
rtx tmp = gen_reg_rtx (mode);
expand_vec_series (tmp, base1, step);
/* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... } */
- scalar_mode elem_mode = GET_MODE_INNER (mode);
if (!rtx_equal_p (base0, const0_rtx))
base0 = force_reg (elem_mode, base0);
@@ -3395,6 +3398,63 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d)
return true;
}
+static bool
+shuffle_series (struct expand_vec_perm_d *d)
+{
+ if (!d->one_vector_p || d->perm.encoding ().npatterns () != 1)
+ return false;
+
+ poly_int64 el1 = d->perm[0];
+ poly_int64 el2 = d->perm[1];
+ poly_int64 el3 = d->perm[2];
+
+ poly_int64 step1 = el2 - el1;
+ poly_int64 step2 = el3 - el2;
+
+ bool need_insert = false;
+ bool have_series = false;
+
+ /* Check for a full series. */
+ if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1))
+ have_series = true;
+
+ /* Check for a series starting at the second element. */
+ else if (known_ne (step2, 0) && d->perm.series_p (1, 1, el2, step2))
+ {
+ have_series = true;
+ need_insert = true;
+ }
+
+ if (!have_series)
+ return false;
+
+ /* Get a vector int-mode to be used for the permute selector. */
+ machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
+ insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode);
+
+ /* We need to be able to insert an element and shift the vector. */
+ if (need_insert && icode == CODE_FOR_nothing)
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ /* Create the series. */
+ machine_mode eltmode = Pmode;
+ rtx series = gen_reg_rtx (sel_mode);
+ expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode),
+ gen_int_mode (need_insert ? step2 : step1, eltmode));
+
+ /* Insert the remaining element if necessary. */
+ if (need_insert)
+ emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode)));
+
+ emit_vlmax_gather_insn (d->target, d->op0, series);
+
+ return true;
+}
+
/* Recognize the pattern that can be shuffled by generic approach. */
static bool
@@ -3475,6 +3535,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
if (shuffle_extract_and_slide1up_patterns (d))
return true;
+ if (shuffle_series (d))
+ return true;
if (shuffle_generic_patterns (d))
return true;
return false;
On 12/11/23 03:09, juzhe.zhong@rivai.ai wrote: > + if (shuffle_series (d)) > + return true; > > > Could you rename it into shuffle_series_patterns ? > > Just to make naming consistent. Done, going to push with that change in a while. Regards Robin
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9b99d0aca84..fd6ef0660a2 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1378,12 +1378,15 @@ expand_const_vector (rtx target, rtx src) { base0, base1, base1 + step, base1 + step * 2, ... } */ rtx base0 = builder.elt (0); rtx base1 = builder.elt (1); - rtx step = builder.elt (2); + rtx base2 = builder.elt (2); + + scalar_mode elem_mode = GET_MODE_INNER (mode); + rtx step = simplify_binary_operation (MINUS, elem_mode, base2, base1); + /* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */ rtx tmp = gen_reg_rtx (mode); expand_vec_series (tmp, base1, step); /* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... } */ - scalar_mode elem_mode = GET_MODE_INNER (mode); if (!rtx_equal_p (base0, const0_rtx)) base0 = force_reg (elem_mode, base0); @@ -3395,6 +3398,63 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d) return true; } +static bool +shuffle_series (struct expand_vec_perm_d *d) +{ + if (!d->one_vector_p || d->perm.encoding ().npatterns () != 1) + return false; + + poly_int64 el1 = d->perm[0]; + poly_int64 el2 = d->perm[1]; + poly_int64 el3 = d->perm[2]; + + poly_int64 step1 = el2 - el1; + poly_int64 step2 = el3 - el2; + + bool need_insert = false; + bool have_series = false; + + /* Check for a full series. */ + if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1)) + have_series = true; + + /* Check for a series starting at the second element. */ + else if (known_ne (step2, 0) && d->perm.series_p (1, 1, el2, step2)) + { + have_series = true; + need_insert = true; + } + + if (!have_series) + return false; + + /* Get a vector int-mode to be used for the permute selector. */ + machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); + insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode); + + /* We need to be able to insert an element and shift the vector. */ + if (need_insert && icode == CODE_FOR_nothing) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + /* Create the series. */ + machine_mode eltmode = Pmode; + rtx series = gen_reg_rtx (sel_mode); + expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode), + gen_int_mode (need_insert ? step2 : step1, eltmode)); + + /* Insert the remaining element if necessary. */ + if (need_insert) + emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode))); + + emit_vlmax_gather_insn (d->target, d->op0, series); + + return true; +} + /* Recognize the pattern that can be shuffled by generic approach. */ static bool @@ -3475,6 +3535,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_extract_and_slide1up_patterns (d)) return true; + if (shuffle_series (d)) + return true; if (shuffle_generic_patterns (d)) return true; return false;