diff mbox series

[v2] libstdc++: Preserve signbit of nan when converting float to double [PR113578]

Message ID 20240802103325.474858-1-jwakely@redhat.com
State New
Headers show
Series [v2] libstdc++: Preserve signbit of nan when converting float to double [PR113578] | expand

Commit Message

Jonathan Wakely Aug. 2, 2024, 10:30 a.m. UTC
Rivos CI found a problem in the v1 patch (see the PR comments). This
should solve it, by using 'constexpr' not _GLIBCXX17_CONSTEXPR. That
requires diagnostic pragmas to suppress -Wc++17-extensions and isn't
possible at all in C++98 mode, so just use the slightly less efficient
__builtin_signbit(__f) ? _To(-1.0) : _To(+1.0)
code for C++98.

I also noticed that the definition of __sign needed to be outside the
#if group.

Tested x86_64-linux.

-- >8 --

LWG 117 specifies that inserting a float into an ostream should cast it
to double, because there's no std::num_put::put member that takes a
float. However, on RISC-V converting a NaN float to double loses the
sign, which means that negative NaN floats are printed as positive.

This has been reported as LWG 4101 and there is good support for fixing
the standard to preserve the sign bit when printing negative NaN values.

This change uses copysign((double)f, (double)std::bit_cast<int>(f)) to
get a double that preserves the sign. The bit_cast gives us an integer
with the same signbit, and casting that to the target type preserves
the signbit. We don't care about the value, as copysign only uses the
signbit.

The inserters for extended floating-point types need the same treatment,
so add a new _S_cast_flt helper to do the signbit-preserving conversion
generically.

So far only RISC-V has been confirmed to need this treatment, but we
might need to extend it to other targets later.

libstdc++-v3/ChangeLog:

	PR libstdc++/113578
	* include/std/ostream (_S_cast_flt): New static member function
	to restore signbit after casting to double or long double.
	(operator<<(float), operator<<(_Float16), operator<<(_Float32))
	(operator<<(_Float64), operator(_Float128))
	(operator<<(__bfloat16_t)): Use _S_cast_flt.
	testsuite/27_io/basic_ostream/inserters_arithmetic/lwg4101.cc:
	New test.

Co-authored-by: Andrew Waterman <andrew@sifive.com>
---
 libstdc++-v3/include/std/ostream              | 43 ++++++++++++++++---
 .../inserters_arithmetic/lwg4101.cc           | 14 ++++++
 2 files changed, 51 insertions(+), 6 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/lwg4101.cc
diff mbox series

Patch

diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream
index 12be6c4fd17..09572f8389c 100644
--- a/libstdc++-v3/include/std/ostream
+++ b/libstdc++-v3/include/std/ostream
@@ -233,7 +233,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	// _GLIBCXX_RESOLVE_LIB_DEFECTS
 	// 117. basic_ostream uses nonexistent num_put member functions.
-	return _M_insert(static_cast<double>(__f));
+	return _M_insert(_S_cast_flt<double>(__f));
       }
 
       __ostream_type&
@@ -246,7 +246,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __ostream_type&
       operator<<(_Float16 __f)
       {
-	return _M_insert(static_cast<double>(__f));
+	return _M_insert(_S_cast_flt<double>(__f));
       }
 #endif
 
@@ -255,7 +255,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __ostream_type&
       operator<<(_Float32 __f)
       {
-	return _M_insert(static_cast<double>(__f));
+	return _M_insert(_S_cast_flt<double>(__f));
       }
 #endif
 
@@ -264,7 +264,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __ostream_type&
       operator<<(_Float64 __f)
       {
-	return _M_insert(static_cast<double>(__f));
+	return _M_insert(_S_cast_flt<double>(__f));
       }
 #endif
 
@@ -273,7 +273,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __ostream_type&
       operator<<(_Float128 __f)
       {
-	return _M_insert(static_cast<long double>(__f));
+	return _M_insert(_S_cast_flt<long double>(__f));
       }
 #endif
 
@@ -282,7 +282,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __ostream_type&
       operator<<(__gnu_cxx::__bfloat16_t __f)
       {
-	return _M_insert(static_cast<double>(__f));
+	return _M_insert(_S_cast_flt<double>(__f));
       }
 #endif
 
@@ -473,7 +473,38 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_write(const char_type* __s, streamsize __n)
       { std::__ostream_insert(*this, __s, __n); }
 #endif
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // for if-constexpr
+      template<typename _To, typename _From>
+	static _To
+	_S_cast_flt(_From __f)
+	{
+	  _To __d = static_cast<_To>(__f);
+	  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+	  // 4101: LWG 117 loses the sign for negative NaN on some arches.
+#if defined __riscv
+	  _To __sign;
+#if __cpp_constexpr && __has_builtin(__builtin_bit_cast)
+	  if constexpr (sizeof(__f) == sizeof(short))
+	    __sign = static_cast<_To>(__builtin_bit_cast(short, __f));
+	  else if constexpr (sizeof(__f) == sizeof(int))
+	    __sign = static_cast<_To>(__builtin_bit_cast(int, __f));
+	  else if constexpr (sizeof(__f) == sizeof(long long))
+	    __sign = static_cast<_To>(__builtin_bit_cast(long long, __f));
+	  else
+#endif
+	  __sign = __builtin_signbit(__f) ? _To(-1.0) : _To(+1.0);
+
+	  if _GLIBCXX17_CONSTEXPR (__is_same(_To, double))
+	    __d = __builtin_copysign(__d, __sign);
+	  else if _GLIBCXX17_CONSTEXPR (__is_same(_To, long double))
+	    __d = __builtin_copysignl(__d, __sign);
+#endif
+	  return __d;
+	}
     };
+#pragma GCC diagnostic pop
 
   /**
    *  @brief  Performs setup work for output streams.
diff --git a/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/lwg4101.cc b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/lwg4101.cc
new file mode 100644
index 00000000000..1e1b8e08535
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/basic_ostream/inserters_arithmetic/lwg4101.cc
@@ -0,0 +1,14 @@ 
+// { dg-do run }
+// LWG 4101. LWG 117 loses the sign for negative NaN on some architectures
+
+#include <sstream>
+#include <limits>
+#include <testsuite_hooks.h>
+
+int main()
+{
+  float nan = std::numeric_limits<float>::quiet_NaN();
+  std::ostringstream os;
+  os << -nan;
+  VERIFY( os.str()[0] == '-' );
+}