diff mbox series

[RFC] tree-optimization/114659 - VN and FP to int punning

Message ID 20240721103302.5AE8F13ABD@imap1.dmz-prg2.suse.org
State New
Headers show
Series [RFC] tree-optimization/114659 - VN and FP to int punning | expand

Commit Message

Richard Biener July 21, 2024, 10:32 a.m. UTC
The following addresses another case where x87 FP loads mangle the
bit representation and thus are not suitable for a representative
in other types.  VN was value-numbering a later integer load of 'x'
as the same as a former float load of 'x'.

The following disables this when the result is not known constant.

This now regresses gcc.dg/tree-ssa/ssa-fre-7.c but for x87 float
the optimization might elide a FP load/store "noop" move that isn't
noop on x87 and thus the desired transform is invalid.

Nevertheless it's bad to pessimize all targets for this.  I was
wondering if it's possible to key this on reg_raw_mode[] but
that needs a hard register number (and suspiciously the array
has no DFmode or SFmode on x86_64 but only XFmode).  So would
this need a new target hook?  Should this use some other
mechanism to query for the correctness of performing the load
in another mode and then punning to the destination mode?

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/114659
	* tree-ssa-sccvn.cc (visit_reference_op_load): Do not
	pun from a scalar floating point mode load to a different
	type unless we can do so by constnat folding.

	* gcc.target/i386/pr114659.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr114659.c | 62 ++++++++++++++++++++++++
 gcc/tree-ssa-sccvn.cc                    |  7 +++
 2 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr114659.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/i386/pr114659.c b/gcc/testsuite/gcc.target/i386/pr114659.c
new file mode 100644
index 00000000000..e1e24d55687
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr114659.c
@@ -0,0 +1,62 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+int
+my_totalorderf (float const *x, float const *y)
+{
+  int xs = __builtin_signbit (*x);
+  int ys = __builtin_signbit (*y);
+  if (!xs != !ys)
+    return xs;
+
+  int xn = __builtin_isnan (*x);
+  int yn = __builtin_isnan (*y);
+  if (!xn != !yn)
+    return !xn == !xs;
+  if (!xn)
+    return *x <= *y;
+
+  unsigned int extended_sign = -!!xs;
+  union { unsigned int i; float f; } xu = {0}, yu = {0};
+  __builtin_memcpy (&xu.f, x, sizeof (float));
+  __builtin_memcpy (&yu.f, y, sizeof (float));
+  return (xu.i ^ extended_sign) <= (yu.i ^ extended_sign);
+}
+
+static float
+positive_NaNf ()
+{
+  float volatile nan = 0.0f / 0.0f;
+  return (__builtin_signbit (nan) ? - nan : nan);
+}
+
+typedef union { float value; unsigned int word[1]; } memory_float;
+
+static memory_float
+construct_memory_SNaNf (float quiet_value)
+{
+  memory_float m;
+  m.value = quiet_value;
+  m.word[0] ^= (unsigned int) 1 << 22;
+  m.word[0] |= (unsigned int) 1;
+  return m;
+}
+
+memory_float x[7] =
+  {
+    { 0 },
+    { 1e-5 },
+    { 1 },
+    { 1e37 },
+    { 1.0f / 0.0f },
+  };
+
+int
+main ()
+{
+  x[5] = construct_memory_SNaNf (positive_NaNf ());
+  x[6] = (memory_float) { positive_NaNf () };
+  if (! my_totalorderf (&x[5].value, &x[6].value))
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 0139f1b4e30..62f3de11b56 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -5825,6 +5825,13 @@  visit_reference_op_load (tree lhs, tree op, gimple *stmt)
 	result = NULL_TREE;
       else if (CONSTANT_CLASS_P (result))
 	result = const_unop (VIEW_CONVERT_EXPR, TREE_TYPE (op), result);
+      /* Do not treat a float-mode load as preserving the bit
+	 representation.  See PR114659, on for x87 FP modes there
+	 is no load instruction that does not at least turn sNaNs
+	 into qNaNs.  But allow the case of a constant FP value we an
+	 fold above.  */
+      else if (SCALAR_FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (result))))
+	result = NULL_TREE;
       else
 	{
 	  /* We will be setting the value number of lhs to the value number