diff mbox series

[to-be-committed,RISC-V] Improve bset generation when bit position is limited

Message ID b259a0c6-82cf-49a5-a21b-67c3aff53d5a@gmail.com
State New
Headers show
Series [to-be-committed,RISC-V] Improve bset generation when bit position is limited | expand

Commit Message

Jeff Law June 18, 2024, 5:53 a.m. UTC
So more work in the ongoing effort to make better use of the Zbs 
extension.  This time we're trying to exploit knowledge of the shift 
count/bit position to allow us to use a bset instruction.

Consider this expression in SImode


  (1 << (pos & 0xf)

None of the resulting values will have bit 31 set.  So if there's an 
explicit zero or sign extension to DI we can drop that explicit 
extension and generate a simple bset with x0 as the input value.

Or another example (which I think came from spec at some point and IIRC 
was the primary motivation for this patch):



(1 << (7-(pos) % 8))



Before this change they'd generate something like this respectively:

         li      a5,1
         andi    a0,a0,15
         sllw    a0,a5,a0


         li      a5,7
         andn    a0,a5,a0
         li      a5,1
         sllw    a0,a5,a0



After this change they generate:



         andi    a0,a0,15        # 9     [c=4 l=4]  *anddi3/1
         bset    a0,x0,a0        # 17    [c=8 l=4]  *bsetdi_2


         li      a5,7            # 27    [c=4 l=4]  *movdi_64bit/1
         andn    a0,a5,a0        # 28    [c=4 l=4]  and_notdi3
         bset    a0,x0,a0        # 19    [c=8 l=4]  *bsetdi_2



We achieve this with simple define_splits which target the bsetdi_2 
pattern I recently added.  Much better than the original implementation 
I did a few months back :-)  I've got a bclr/binv variant from a few 
months back as well, but it needs to be updated to the simpler 
implementation found here.

Just ran this through my tester.  Will wait for the precommit CI to 
render its verdict before moving forward.


Jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 094bc2acf1c..dc7a7e7fba7 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -609,6 +609,36 @@  (define_insn "*bsetdi_2"
   "bset\t%0,x0,%1"
   [(set_attr "type" "bitmanip")])
 
+;; These two splitters take advantage of the limited range of the
+;; shift constant.   With the limited range we know the SImode sign
+;; bit is never set, thus we can treat this as zero extending and
+;; generate the bsetdi_2 pattern.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(any_extend:DI
+	 (ashift:SI (const_int 1)
+		    (subreg:QI (and:DI (not:DI (match_operand:DI 1 "register_operand"))
+			       (match_operand 2 "const_int_operand")) 0))))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "TARGET_64BIT
+   && TARGET_ZBS
+   && (TARGET_ZBB || TARGET_ZBKB)
+   && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+   [(set (match_dup 0) (and:DI (not:DI (match_dup 1)) (match_dup 2)))
+    (set (match_dup 0) (zero_extend:DI (ashift:SI (const_int 1) (subreg:QI (match_dup 0) 0))))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (any_extend:DI
+	 (ashift:SI (const_int 1)
+		    (subreg:QI (and:DI (match_operand:DI 1 "register_operand")
+				       (match_operand 2 "const_int_operand")) 0))))]
+  "TARGET_64BIT
+   && TARGET_ZBS
+   && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+   [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 2)))
+    (set (match_dup 0) (zero_extend:DI (ashift:SI (const_int 1) (subreg:QI (match_dup 0) 0))))])
+
 (define_insn "*bset<mode>_1_mask"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(ashift:X (const_int 1)
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
new file mode 100644
index 00000000000..301bc9d89c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" } } */
+
+
+typedef unsigned int uint32_t;
+uint32_t foo(uint32_t pos)
+{
+    return (1 << (7-(pos) % 8));
+}
+
+typedef unsigned int uint32_t;
+uint32_t foo2(uint32_t pos)
+{
+    return (1 << (pos & 0xf));
+}
+
+/* { dg-final { scan-assembler-not "sll\t" } } */
+/* { dg-final { scan-assembler-times "bset\t" 2 } } */
+/* { dg-final { scan-assembler-times "andi\t" 1 } } */
+/* { dg-final { scan-assembler-times "andn\t" 1 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "ret" 2 } } */
+