diff mbox series

[committed] i386: Implement .SAT_ADD for unsigned scalar integers [PR112600]

Message ID CAFULd4YP3wkJqkfxhuJ+tQsmFe_--Up08v=H5HVveQqb7Ds=-w@mail.gmail.com
State New
Headers show
Series [committed] i386: Implement .SAT_ADD for unsigned scalar integers [PR112600] | expand

Commit Message

Uros Bizjak June 8, 2024, 10:23 a.m. UTC
The following testcase:

unsigned
add_sat(unsigned x, unsigned y)
{
    unsigned z;
    return __builtin_add_overflow(x, y, &z) ? -1u : z;
}

currently compiles (-O2) to:

add_sat:
        addl    %esi, %edi
        jc      .L3
        movl    %edi, %eax
        ret
.L3:
        orl     $-1, %eax
        ret

We can expand through usadd{m}3 optab to use carry flag from the addition
and generate branchless code using SBB instruction implementing:

    unsigned res = x + y;
    res |= -(res < x);

add_sat:
        addl    %esi, %edi
        sbbl    %eax, %eax
        orl     %edi, %eax
        ret

    PR target/112600

gcc/ChangeLog:

    * config/i386/i386.md (usadd<mode>3): New expander.
    (x86_mov<mode>cc_0_m1_neg): Use SWI mode iterator.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/pr112600-a.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.

Comments

Gerald Pfeifer June 8, 2024, 12:09 p.m. UTC | #1
On Sat, 8 Jun 2024, Uros Bizjak wrote:
> gcc/ChangeLog:
> 
>     * config/i386/i386.md (usadd<mode>3): New expander.
>     (x86_mov<mode>cc_0_m1_neg): Use SWI mode iterator.

When you write "committed", did you actually push? 

If so, us being on Git now it might be good to adjust terminology.

Gerald
Uros Bizjak June 8, 2024, 8:50 p.m. UTC | #2
On Sat, Jun 8, 2024 at 2:09 PM Gerald Pfeifer <gerald@pfeifer.com> wrote:
>
> On Sat, 8 Jun 2024, Uros Bizjak wrote:
> > gcc/ChangeLog:
> >
> >     * config/i386/i386.md (usadd<mode>3): New expander.
> >     (x86_mov<mode>cc_0_m1_neg): Use SWI mode iterator.
>
> When you write "committed", did you actually push?

Yes, IIRC, the request was to mark pushed change with the word "committed".

> If so, us being on Git now it might be good to adjust terminology.

No problem, I can say "pushed" if that is more descriptive.

Thanks,
Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ffcf63e1cba..bc2ef819df6 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9870,6 +9870,26 @@  (define_insn_and_split "*sub<mode>3_ne_0"
     operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
+(define_expand "usadd<mode>3"
+  [(set (match_operand:SWI 0 "register_operand")
+	(us_plus:SWI (match_operand:SWI 1 "register_operand")
+		     (match_operand:SWI 2 "<general_operand>")))]
+  ""
+{
+  rtx res = gen_reg_rtx (<MODE>mode);
+  rtx msk = gen_reg_rtx (<MODE>mode);
+  rtx dst;
+
+  emit_insn (gen_add<mode>3_cc_overflow_1 (res, operands[1], operands[2]));
+  emit_insn (gen_x86_mov<mode>cc_0_m1_neg (msk));
+  dst = expand_simple_binop (<MODE>mode, IOR, res, msk,
+			     operands[0], 1, OPTAB_DIRECT);
+
+  if (!rtx_equal_p (dst, operands[0]))
+    emit_move_insn (operands[0], dst);
+  DONE;
+})
+
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "<insn>xf3"
@@ -24945,8 +24965,8 @@  (define_insn "*x86_mov<mode>cc_0_m1_neg"
 
 (define_expand "x86_mov<mode>cc_0_m1_neg"
   [(parallel
-    [(set (match_operand:SWI48 0 "register_operand")
-	  (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0))))
+    [(set (match_operand:SWI 0 "register_operand")
+	  (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))
      (clobber (reg:CC FLAGS_REG))])])
 
 (define_split
diff --git a/gcc/testsuite/gcc.target/i386/pr112600-a.c b/gcc/testsuite/gcc.target/i386/pr112600-a.c
new file mode 100644
index 00000000000..fa122bc7a3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112600-a.c
@@ -0,0 +1,32 @@ 
+/* PR target/112600 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "sbb" 4 } } */
+
+unsigned char
+add_sat_char (unsigned char x, unsigned char y)
+{
+  unsigned char z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned short
+add_sat_short (unsigned short x, unsigned short y)
+{
+  unsigned short z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned int
+add_sat_int (unsigned int x, unsigned int y)
+{
+  unsigned int z;
+  return __builtin_add_overflow(x, y, &z) ? -1u : z;
+}
+
+unsigned long
+add_sat_long (unsigned long x, unsigned long y)
+{
+  unsigned long z;
+  return __builtin_add_overflow(x, y, &z) ? -1ul : z;
+}