Message ID | CAOvf_xwSEVJfhvvKETqSOkkt3oHCZN9Ek15q=0OUOmqyv0JooA@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Tue, Nov 11, 2014 at 3:21 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote: > Hi, > > The patch extends shift permutations technique on power of 2 cases > (previously even/odd transformations was used unconditionally). > Basically the patch just add loop for load group of length 2, like it > is done in "vect_permute_load_chain" function. > > For Silvermont it reduces insn sequence for load group of length 4 > from 31 to 20 insns. > Performance for the test in the patch improved by ~20%. > > Bootstrap passed. > Make check in progress. > > Is it ok? Ok. Thanks, Richard. > 2014-11-11 Evgeny Stupachenko <evstupac@gmail.com> > > gcc/testsuite > * gcc.target/i386/pr52252-atom-1.c: New. > > gcc/ > * tree-vect-data-refs.c (vect_shift_permute_load_chain): Extend shift > permutations on power of 2 cases. > > diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c > b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c > new file mode 100644 > index 0000000..1fbd258 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target ssse3 } */ > +/* { dg-options "-O2 -ftree-vectorize -mssse3 -mtune=slm" } */ > +#define byte unsigned char > + > +void > +pair_mul_sum(byte *in, byte *out, int size) > +{ > + int j; > + for(j = 0; j < size; j++) > + { > + byte a = in[0]; > + byte b = in[1]; > + byte c = in[2]; > + byte d = in[3]; > + out[0] = (byte)(a * b) + (byte)(b * c) + (byte)(c * d) + (byte)(d * a); > + in += 4; > + out += 1; > + } > +} > + > +/* { dg-final { scan-assembler "palignr" } } */ > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c > index 0bc0356..d2e0e93 100644 > --- a/gcc/tree-vect-data-refs.c > +++ b/gcc/tree-vect-data-refs.c > @@ -5379,8 +5379,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, > memcpy (result_chain->address (), dr_chain.address (), > length * sizeof (tree)); > > - if (length == 2 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4) > + if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4) > { > + unsigned int j, log_length = exact_log2 (length); > for (i = 0; i < nelt / 2; ++i) > sel[i] = i * 2; > for (i = 0; i < nelt / 2; ++i) > @@ -5441,37 +5442,44 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, > select_mask = vect_gen_perm_mask (vectype, sel); > gcc_assert (select_mask != NULL); > > - first_vect = dr_chain[0]; > - second_vect = dr_chain[1]; > - > - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); > - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > - first_vect, first_vect, > - perm2_mask1); > - vect_finish_stmt_generation (stmt, perm_stmt, gsi); > - vect[0] = data_ref; > + for (i = 0; i < log_length; i++) > + { > + for (j = 0; j < length; j += 2) > + { > + first_vect = dr_chain[j]; > + second_vect = dr_chain[j + 1]; > > - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); > - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > - second_vect, second_vect, > - perm2_mask2); > - vect_finish_stmt_generation (stmt, perm_stmt, gsi); > - vect[1] = data_ref; > + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); > + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > + first_vect, first_vect, > + perm2_mask1); > + vect_finish_stmt_generation (stmt, perm_stmt, gsi); > + vect[0] = data_ref; > > - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift"); > - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > - vect[0], vect[1], > - shift1_mask); > - vect_finish_stmt_generation (stmt, perm_stmt, gsi); > - (*result_chain)[1] = data_ref; > + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); > + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > + second_vect, > second_vect, > + perm2_mask2); > + vect_finish_stmt_generation (stmt, perm_stmt, gsi); > + vect[1] = data_ref; > > - data_ref = make_temp_ssa_name (vectype, NULL, "vect_select"); > - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > - vect[0], vect[1], > - select_mask); > - vect_finish_stmt_generation (stmt, perm_stmt, gsi); > - (*result_chain)[0] = data_ref; > + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift"); > + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > + vect[0], vect[1], > + shift1_mask); > + vect_finish_stmt_generation (stmt, perm_stmt, gsi); > + (*result_chain)[j/2 + length/2] = data_ref; > > + data_ref = make_temp_ssa_name (vectype, NULL, "vect_select"); > + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, > + vect[0], vect[1], > + select_mask); > + vect_finish_stmt_generation (stmt, perm_stmt, gsi); > + (*result_chain)[j/2] = data_ref; > + } > + memcpy (dr_chain.address (), result_chain->address (), > + length * sizeof (tree)); > + } > return true; > } > if (length == 3 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 2)
diff --git a/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c new file mode 100644 index 0000000..1fbd258 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr52252-atom-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target ssse3 } */ +/* { dg-options "-O2 -ftree-vectorize -mssse3 -mtune=slm" } */ +#define byte unsigned char + +void +pair_mul_sum(byte *in, byte *out, int size) +{ + int j; + for(j = 0; j < size; j++) + { + byte a = in[0]; + byte b = in[1]; + byte c = in[2]; + byte d = in[3]; + out[0] = (byte)(a * b) + (byte)(b * c) + (byte)(c * d) + (byte)(d * a); + in += 4; + out += 1; + } +} + +/* { dg-final { scan-assembler "palignr" } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 0bc0356..d2e0e93 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -5379,8 +5379,9 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, memcpy (result_chain->address (), dr_chain.address (), length * sizeof (tree)); - if (length == 2 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4) + if (exact_log2 (length) != -1 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) > 4) { + unsigned int j, log_length = exact_log2 (length); for (i = 0; i < nelt / 2; ++i) sel[i] = i * 2; for (i = 0; i < nelt / 2; ++i) @@ -5441,37 +5442,44 @@ vect_shift_permute_load_chain (vec<tree> dr_chain, select_mask = vect_gen_perm_mask (vectype, sel); gcc_assert (select_mask != NULL); - first_vect = dr_chain[0]; - second_vect = dr_chain[1]; - - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, - first_vect, first_vect, - perm2_mask1); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - vect[0] = data_ref; + for (i = 0; i < log_length; i++) + { + for (j = 0; j < length; j += 2) + { + first_vect = dr_chain[j]; + second_vect = dr_chain[j + 1]; - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, - second_vect, second_vect, - perm2_mask2); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - vect[1] = data_ref; + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + first_vect, first_vect, + perm2_mask1); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + vect[0] = data_ref; - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift"); - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, - vect[0], vect[1], - shift1_mask); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - (*result_chain)[1] = data_ref; + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + second_vect, second_vect, + perm2_mask2); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + vect[1] = data_ref; - data_ref = make_temp_ssa_name (vectype, NULL, "vect_select"); - perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, - vect[0], vect[1], - select_mask); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - (*result_chain)[0] = data_ref; + data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + vect[0], vect[1], + shift1_mask); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + (*result_chain)[j/2 + length/2] = data_ref; + data_ref = make_temp_ssa_name (vectype, NULL, "vect_select"); + perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref, + vect[0], vect[1], + select_mask); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + (*result_chain)[j/2] = data_ref; + } + memcpy (dr_chain.address (), result_chain->address (), + length * sizeof (tree)); + } return true; }