diff mbox series

[v4] RISC-V: use fclass insns to implement isfinite, isnormal and isinf builtins

Message ID 20240713032211.151318-1-vineetg@rivosinc.com
State New
Headers show
Series [v4] RISC-V: use fclass insns to implement isfinite, isnormal and isinf builtins | expand

Commit Message

Vineet Gupta July 13, 2024, 3:22 a.m. UTC
Changes since v3:
  - Remove '*' from define_insn for fclass
  - Remove the dummy expander for fclass.
  - De-duplicate the expanders code by using a helper which takes fclass
    val.

Changes since v2:
  - fclass define_insn tightened to check op0 mode "X" with additional
    expander w/o mode for callers.
  - builtins expander explicit mode check and FAIL if mode not appropriate.
  - subreg promoted handling to elide potential extension of ret val.
  - Added isinf builtin with bimodal retun value as others.

Changes since v1:
  - Removed UNSPEC_{INFINITE,ISNORMAL}
  - Don't hardcode SI in patterns, try to keep X to avoid potential
    sign extension pitfalls. Implementation wise requires skipping
    :MODE specifier in match_operand which is flagged as missing mode
    warning.
---

Currently thsse builtins use float compare instructions which require
FP flags to be save/restore around them.
Our perf team complained this could be costly in uarch.
RV Base ISA already has FCLASS.{d,s,h} instruction to compare/identify FP
values w/o disturbing FP exception flags.

Coincidently, upstream very recently got support for the corresponding
optabs. So this just requires wiring up in the backend.

Tested for rv64, one additioal failure g++.dg/opt/pr107569.C needs
upstream ranger fix for the new optab.

gcc/ChangeLog:
	* config/riscv/riscv-protos.h (riscv_emit_fp_classify): New
	function declaration.
	* config/riscv/riscv.cc (riscv_emit_fp_classify): New helper for
	the expanders.
	* config/riscv/riscv.md: Add UNSPEC_FCLASS.
	define_insn for fclass insn.
	define_expand for isfinite, isnormal, isinf.

gcc/testsuite/ChangeLog:
	* gcc.target/riscv/fclass.c: New tests.

Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
---
 gcc/config/riscv/riscv-protos.h         |  1 +
 gcc/config/riscv/riscv.cc               | 51 ++++++++++++++++++++
 gcc/config/riscv/riscv.md               | 63 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/fclass.c | 38 +++++++++++++++
 4 files changed, 153 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/fclass.c
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0f..b49cd5cd5a91 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -170,6 +170,7 @@  extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
 extern bool riscv_reg_frame_related (rtx);
 extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
 					HOST_WIDE_INT *);
+extern bool riscv_emit_fp_classify (rtx, rtx, HOST_WIDE_INT);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9bba5da016e9..3d6bd29f42e9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4865,6 +4865,57 @@  riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
     riscv_emit_binary (code, target, op0, op1);
 }
 
+/* Helper for FP classify builtin expanders.
+   For fp value OP1, generate fclass insn with int OP0 output bitval.
+   If output matches FCLASS_VAL (also bitval) generate 1 as
+   builtin output, 0 otherwise.
+   Return false on any failures, true otherwise.  */
+
+bool
+riscv_emit_fp_classify (rtx op0, rtx op1, HOST_WIDE_INT fclass_val)
+{
+  /* Allow SI for rv32/rv64 and DI for rv64.  */
+  if (GET_MODE (op0) != SImode
+      && GET_MODE (op0) != word_mode)
+    return false;
+
+  rtx t = gen_reg_rtx (word_mode);
+  rtx t_op0 = gen_reg_rtx (word_mode);
+
+  if (GET_MODE (op1) == SFmode && TARGET_64BIT)
+    emit_insn (gen_fclass_sfdi (t, op1));
+  else if (GET_MODE (op1) == SFmode)
+    emit_insn (gen_fclass_sfsi (t, op1));
+  else if (GET_MODE (op1) == DFmode && TARGET_64BIT)
+    emit_insn (gen_fclass_dfdi (t, op1));
+  else if (GET_MODE (op1) == DFmode)
+    emit_insn (gen_fclass_dfsi (t, op1));
+  else if (GET_MODE (op1) == HFmode && TARGET_64BIT)
+    emit_insn (gen_fclass_hfdi (t, op1));
+  else if (GET_MODE (op1) == HFmode)
+    emit_insn (gen_fclass_hfsi (t, op1));
+  else
+    gcc_unreachable ();
+
+  riscv_emit_binary (AND, t, t, GEN_INT (fclass_val));
+  rtx cmp = gen_rtx_NE (word_mode, t, const0_rtx);
+
+  if (TARGET_64BIT)
+    emit_insn (gen_cstoredi4 (t_op0, cmp, t, const0_rtx));
+  else
+    emit_insn (gen_cstoresi4 (t_op0, cmp, t, const0_rtx));
+
+  if (TARGET_64BIT)
+    {
+      t_op0 = gen_lowpart (SImode, t_op0);
+      SUBREG_PROMOTED_VAR_P (t_op0) = 1;
+      SUBREG_PROMOTED_SET (t_op0, SRP_SIGNED);
+    }
+
+  emit_move_insn (op0, t_op0);
+  return true;
+}
+
 /* Jump to LABEL if (CODE OP0 OP1) holds.  */
 
 void
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index ff37125e3f28..d67e6908a47c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -68,6 +68,7 @@ 
   UNSPEC_FMAX
   UNSPEC_FMINM
   UNSPEC_FMAXM
+  UNSPEC_FCLASS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -3436,6 +3437,68 @@ 
    (set_attr "mode" "<UNITMODE>")
    (set (attr "length") (const_int 16))])
 
+;; fclass instruction output bitmap
+;;   0 negative infinity
+;;   1 negative normal number.
+;;   2 negative subnormal number.
+;;   3 -0
+;;   4 +0
+;;   5 positive subnormal number.
+;;   6 positive normal number.
+;;   7 positive infinity
+;;   8 signaling NaN.
+;;   9 quiet NaN
+
+(define_insn "fclass_<ANYF:mode><X:mode>"
+  [(set (match_operand:X	     0 "register_operand" "=r")
+	(unspec [(match_operand:ANYF 1 "register_operand" " f")]
+		   UNSPEC_FCLASS))]
+  "TARGET_HARD_FLOAT"
+  "fclass.<fmt>\t%0,%1";
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "<UNITMODE>")])
+
+;; Implements optab for isfinite
+
+(define_expand "isfinite<ANYF:mode>2"
+  [(set (match_operand      0 "register_operand" "=r")
+	(match_operand:ANYF 1 "register_operand" " f"))]
+  "TARGET_HARD_FLOAT"
+{
+  if (riscv_emit_fp_classify (operands[0], operands[1], 0x7e))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Implements optab for isnormal
+
+(define_expand "isnormal<ANYF:mode>2"
+  [(set (match_operand      0 "register_operand" "=r")
+	(match_operand:ANYF 1 "register_operand" " f"))]
+  "TARGET_HARD_FLOAT"
+{
+  if (riscv_emit_fp_classify (operands[0], operands[1], 0x42))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Implements optab for isinf
+;; Note: glibc man page states tri-modal (+ve inf ? 1 : -ve inf ? -1 : 0)
+;;       However gcc testsuite tg-test.h expect 1 for -ve.
+
+(define_expand "isinf<ANYF:mode>2"
+  [(set (match_operand      0 "register_operand" "=r")
+	(match_operand:ANYF 1 "register_operand" " f"))]
+  "TARGET_HARD_FLOAT"
+{
+  if (riscv_emit_fp_classify (operands[0], operands[1], 0x81))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_insn "*seq_zero_<X:mode><GPR:mode>"
   [(set (match_operand:GPR       0 "register_operand" "=r")
 	(eq:GPR (match_operand:X 1 "register_operand" " r")
diff --git a/gcc/testsuite/gcc.target/riscv/fclass.c b/gcc/testsuite/gcc.target/riscv/fclass.c
new file mode 100644
index 000000000000..ea0f173ecf4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/fclass.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target hard_float } */
+/* { dg-options "-march=rv64gc -mabi=lp64d  -ftrapping-math" { target { rv64 } } } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -ftrapping-math" { target { rv32 } } } */
+
+int d_isfinite(double a)
+{
+  return __builtin_isfinite(a);
+}
+
+int d_isnormal(double a)
+{
+  return __builtin_isnormal(a);
+}
+
+int d_isinf(double a)
+{
+  return __builtin_isinf(a);
+}
+
+int f_isfinite(float a)
+{
+  return __builtin_isfinite(a);
+}
+
+int f_isnormal(float a)
+{
+  return __builtin_isnormal(a);
+}
+
+int f_isinf(float a)
+{
+  return __builtin_isinf(a);
+}
+
+/* { dg-final { scan-assembler-not   {\mfrflags}  } } */
+/* { dg-final { scan-assembler-not   {\mfsflags}  } } */
+/* { dg-final { scan-assembler-times {\tfclass} 6 } } */