Message ID | 005501d7e460$9acba670$d062f350$@nextmovesoftware.com |
---|---|
State | New |
Headers | show |
Series | x86_64: Improved V1TImode rotations by non-constant amounts. | expand |
On Sun, Nov 28, 2021 at 3:02 PM Roger Sayle <roger@nextmovesoftware.com> wrote: > > > This patch builds on the recent improvements to TImode rotations (and > Jakub's fixes to shldq/shrdq patterns). Now that expanding a TImode > rotation can never fail, it is safe to allow general_operand constraints > on the QImode shift amounts in rotlv1ti3 and rotrv1ti3 patterns. > I've also made an additional tweak to ix86_expand_v1ti_to_ti to use > vec_extract via V2DImode, which avoid using memory and takes advantage > vpextrq on recent hardware. > > For the following test case: > > typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16))); > uv1ti rotr(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); } > > GCC with -O2 -mavx2 would previously generate: > > rotr: vmovdqa %xmm0, -24(%rsp) > movq -16(%rsp), %rdx > movl %edi, %ecx > xorl %esi, %esi > movq -24(%rsp), %rax > shrdq %rdx, %rax > shrq %cl, %rdx > testb $64, %dil > cmovne %rdx, %rax > cmovne %rsi, %rdx > negl %ecx > xorl %edi, %edi > andl $127, %ecx > vmovq %rax, %xmm2 > movq -24(%rsp), %rax > vpinsrq $1, %rdx, %xmm2, %xmm1 > movq -16(%rsp), %rdx > shldq %rax, %rdx > salq %cl, %rax > testb $64, %cl > cmovne %rax, %rdx > cmovne %rdi, %rax > vmovq %rax, %xmm3 > vpinsrq $1, %rdx, %xmm3, %xmm0 > vpor %xmm1, %xmm0, %xmm0 > ret > > with this patch, we now generate: > > rotr: movl %edi, %ecx > vpextrq $1, %xmm0, %rax > vmovq %xmm0, %rdx > shrdq %rax, %rdx > vmovq %xmm0, %rsi > shrdq %rsi, %rax > andl $64, %ecx > movq %rdx, %rsi > cmovne %rax, %rsi > cmove %rax, %rdx > vmovq %rsi, %xmm0 > vpinsrq $1, %rdx, %xmm0, %xmm0 > ret > > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap > and make -k check with no new failures. Ok for mainline? > > > 2021-11-28 Roger Sayle <roger@nextmovesoftware.com> > > gcc/ChangeLog > * config/i386/i386-expand.c (ix86_expand_v1ti_to_ti): Perform the > conversion via V2DImode using vec_extractv2didi on TARGET_SSE2. > * config/i386/sse.md (rotlv1ti3, rotrv1ti3): Change constraint > on QImode shift amounts from const_int_operand to general_operand. > > gcc/testsuite/ChangeLog > * gcc.target/i386/sse2-v1ti-rotate.c: New test case. OK. Thanks, Uros. > > > Thanks in advance, > Roger > -- >
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 088e6af..1e9734b 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -6162,7 +6162,17 @@ static rtx ix86_expand_v1ti_to_ti (rtx x) { rtx result = gen_reg_rtx (TImode); - emit_move_insn (result, gen_lowpart (TImode, x)); + if (TARGET_SSE2) + { + rtx temp = gen_reg_rtx (V2DImode); + emit_move_insn (temp, gen_lowpart (V2DImode, x)); + rtx lo = gen_lowpart (DImode, result); + emit_insn (gen_vec_extractv2didi (lo, temp, const0_rtx)); + rtx hi = gen_highpart (DImode, result); + emit_insn (gen_vec_extractv2didi (hi, temp, const1_rtx)); + } + else + emit_move_insn (result, gen_lowpart (TImode, x)); return result; } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2764a25..459eec9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15169,7 +15169,7 @@ [(set (match_operand:V1TI 0 "register_operand") (rotate:V1TI (match_operand:V1TI 1 "register_operand") - (match_operand:QI 2 "const_int_operand")))] + (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { ix86_expand_v1ti_rotate (ROTATE, operands); @@ -15180,7 +15180,7 @@ [(set (match_operand:V1TI 0 "register_operand") (rotatert:V1TI (match_operand:V1TI 1 "register_operand") - (match_operand:QI 2 "const_int_operand")))] + (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { ix86_expand_v1ti_rotate (ROTATERT, operands); diff --git a/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c b/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c new file mode 100644 index 0000000..b4b2814 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-v1ti-rotate.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +typedef unsigned __int128 uv1ti __attribute__ ((__vector_size__ (16))); + +uv1ti rotr(uv1ti x, unsigned int i) { return (x >> i) | (x << (128-i)); } +uv1ti rotl(uv1ti x, unsigned int i) { return (x << i) | (x >> (128-i)); } + +/* { dg-final { scan-assembler-not "shrq" } } */ +/* { dg-final { scan-assembler-not "salq" } } */