diff mbox series

, PR libgcc/83112, Add ifunc support for _mulkc3 and _divkc3

Message ID 20171127223738.GA16878@ibm-tiger.the-meissners.org
State New
Headers show
Series , PR libgcc/83112, Add ifunc support for _mulkc3 and _divkc3 | expand

Commit Message

Michael Meissner Nov. 27, 2017, 10:37 p.m. UTC
Unlike the other _Float128 emulation support in the PowerPC libgcc, the support
for _Complex _Float128 multiply and divide doesn't resolve into a single
instruction on the power9 system.

But these two functions do benefit if they are compiled for ISA 3.0 _Float128
hardware instructions, by eliminating calling __{add,sub,mul,div}kf2 through
PLT functions to get to the hardware instruction, and instead using the native
instruction.

I have done bootstrap builds on a little endian power8 system with/without the
patches and there were no regressions in the testsuite.

I have also built the compiler on a little endian power9 prototype system, and
I ran a test that did 100,000,000 passes of complex multiply and adds and then
100,000,000 passes of complex divide and minus.  The test with these fixes was
roughly 45% faster than the test with the unpatched compiler.  I also ran the
test on a power8 system, and it runs using the software emulation.

Can I check this patch into the trunk, assuming that the previously posted
patch for PR libgcc/813112 has also been applied?

2017-11-27  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR libgcc/83103
	* config/rs6000/quad-float128.h (TF): Don't define if long double
	is IEEE 128-bit floating point.
	(TCtype): Define as either TCmode or KCmode, depending on whether
	long double is IEEE 128-bit floating point.
	(__mulkc3_sw): Add declarations for software/hardware versions of
	complex multiply/divide.
	(__divkc3_sw): Likewise.
	(__mulkc3_hw): Likewise.
	(__divkc3_hw): Likewise.
	* config/rs6000/_mulkc3.c (_mulkc3): If we are building ifunc
	handlers to switch between using software emulation and hardware
	float128 instructions, build the complex multiply/divide functions
	for both software and hardware support.
	* config/rs6000/_divkc3.c (_divkc3): Likewise.
	* config/rs6000/float128-ifunc.c (__mulkc3_resolve): Likewise.
	(__divkc3_resolve): Likewise.
	(__mulkc3): Likewise.
	(__divkc3): Likewise.
	* config/rs6000/t-float128-hw (fp128_hardfp_src): Likewise.
	(fp128_hw_src): Likewise.
	(fp128_hw_static_obj): Likewise.
	(fp128_hw_shared_obj): Likewise.
	(_mulkc3-hw.c): Likewise.
	(_divkc3-hw.c): Likewise.
	* config/rs6000/t-float128 (clean-float128): Add deleting
	_mulkc3-hw.c and _divkc3-hw.c.
diff mbox series

Patch

Index: libgcc/config/rs6000/quad-float128.h
===================================================================
--- libgcc/config/rs6000/quad-float128.h	(revision 255177)
+++ libgcc/config/rs6000/quad-float128.h	(working copy)
@@ -30,13 +30,20 @@ 
 /* quad.h defines the TFtype type by:
    typedef float TFtype __attribute__ ((mode (TF)));
 
-   This define forces it to use KFmode (aka, ieee 128-bit floating point).  */
+   This define forces it to use KFmode (aka, ieee 128-bit floating point).
+   However, when the compiler's default is changed so that long double is IEEE
+   128-bit floating point, we need to go back to using TFmode and TCmode.  */
+#ifndef __LONG_DOUBLE_IEEE128__
 #define TF KF
 
 /* We also need TCtype to represent complex ieee 128-bit float for
    __mulkc3 and __divkc3.  */
 typedef __complex float TCtype __attribute__ ((mode (KC)));
 
+#else
+typedef __complex float TCtype __attribute__ ((mode (TC)));
+#endif
+
 /* Force the use of the VSX instruction set.  */
 #if defined(_ARCH_PPC) && (!defined(__VSX__) || !defined(__FLOAT128__))
 #pragma GCC target ("vsx,float128")
@@ -88,6 +95,8 @@  extern TFtype __floatunsikf_sw (USItype_
 extern TFtype __floatundikf_sw (UDItype_ppc);
 extern IBM128_TYPE __extendkftf2_sw (TFtype);
 extern TFtype __trunctfkf2_sw (IBM128_TYPE);
+extern TCtype __mulkc3_sw (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3_sw (TFtype, TFtype, TFtype, TFtype);
 
 #ifdef _ARCH_PPC64
 /* We do not provide ifunc resolvers for __fixkfti, __fixunskfti, __floattikf,
@@ -128,6 +137,8 @@  extern TFtype __floatunsikf_hw (USItype_
 extern TFtype __floatundikf_hw (UDItype_ppc);
 extern IBM128_TYPE __extendkftf2_hw (TFtype);
 extern TFtype __trunctfkf2_hw (IBM128_TYPE);
+extern TCtype __mulkc3_hw (TFtype, TFtype, TFtype, TFtype);
+extern TCtype __divkc3_hw (TFtype, TFtype, TFtype, TFtype);
 
 /* Ifunc function declarations, to automatically switch between software
    emulation and hardware support.  */
Index: libgcc/config/rs6000/_mulkc3.c
===================================================================
--- libgcc/config/rs6000/_mulkc3.c	(revision 255177)
+++ libgcc/config/rs6000/_mulkc3.c	(working copy)
@@ -31,6 +31,10 @@  typedef __complex float KCtype __attribu
 #define isnan __builtin_isnan
 #define isinf __builtin_isinf
 
+#if defined(FLOAT128_HW_INSNS) && !defined(__mulkc3)
+#define __mulkc3 __mulkc3_sw
+#endif
+
 KCtype
 __mulkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
 {
Index: libgcc/config/rs6000/_divkc3.c
===================================================================
--- libgcc/config/rs6000/_divkc3.c	(revision 255177)
+++ libgcc/config/rs6000/_divkc3.c	(working copy)
@@ -33,6 +33,10 @@  typedef __complex float KCtype __attribu
 #define isinf __builtin_isinf
 #define isfinite __builtin_isfinite
 
+#if defined(FLOAT128_HW_INSNS) && !defined(__divkc3)
+#define __divkc3 __divkc3_sw
+#endif
+
 KCtype
 __divkc3 (KFtype a, KFtype b, KFtype c, KFtype d)
 {
Index: libgcc/config/rs6000/float128-ifunc.c
===================================================================
--- libgcc/config/rs6000/float128-ifunc.c	(revision 255177)
+++ libgcc/config/rs6000/float128-ifunc.c	(working copy)
@@ -71,6 +71,8 @@  typedef TFtype (f128_func_usi_t)(USItype
 typedef TFtype (f128_func_udi_t)(UDItype_ppc);
 typedef IBM128_TYPE (ibm_func_f128_t)(TFtype);
 typedef TFtype (f128_func_ibm_t)(IBM128_TYPE);
+typedef TCtype (cf128_func_f128_f128_f128_f128_t) (TFtype, TFtype, TFtype,
+						   TFtype);
 
 static f128_func_f128_f128_t *__addkf3_resolve (void);
 static f128_func_f128_f128_t *__subkf3_resolve (void);
@@ -98,6 +100,8 @@  static f128_func_usi_t *__floatunsikf_re
 static f128_func_udi_t *__floatundikf_resolve (void);
 static ibm_func_f128_t *__extendkftf2_resolve (void);
 static f128_func_ibm_t *__trunctfkf2_resolve (void);
+static cf128_func_f128_f128_f128_f128_t *__mulkc3_resolve (void);
+static cf128_func_f128_f128_f128_f128_t *__divkc3_resolve (void);
 
 static f128_func_f128_f128_t *
 __addkf3_resolve (void)
@@ -210,7 +214,19 @@  __extendkftf2_resolve (void)
 static f128_func_ibm_t *
 __trunctfkf2_resolve (void)
 {
-  return (void *) SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw);
+  return SW_OR_HW (__trunctfkf2_sw, __trunctfkf2_hw);
+}
+
+static cf128_func_f128_f128_f128_f128_t *
+__mulkc3_resolve (void)
+{
+  return SW_OR_HW (__mulkc3_sw, __mulkc3_hw);
+}
+
+static cf128_func_f128_f128_f128_f128_t *
+__divkc3_resolve (void)
+{
+  return SW_OR_HW (__divkc3_sw, __divkc3_hw);
 }
 
 static cmp_func_f128_f128_t *
@@ -338,3 +354,9 @@  IBM128_TYPE __extendkftf2 (TFtype)
 
 TFtype __trunctfkf2 (IBM128_TYPE)
   __attribute__ ((__ifunc__ ("__trunctfkf2_resolve")));
+
+TCtype __mulkc3 (TFtype, TFtype, TFtype, TFtype)
+  __attribute__ ((__ifunc__ ("__mulkc3_resolve")));
+
+TCtype __divkc3 (TFtype, TFtype, TFtype, TFtype)
+  __attribute__ ((__ifunc__ ("__divkc3_resolve")));
Index: libgcc/config/rs6000/t-float128-hw
===================================================================
--- libgcc/config/rs6000/t-float128-hw	(revision 255177)
+++ libgcc/config/rs6000/t-float128-hw	(working copy)
@@ -5,10 +5,12 @@ 
 FLOAT128_HW_INSNS	= -DFLOAT128_HW_INSNS
 
 # New functions for hardware support
-fp128_hw_funcs		= float128-hw
-fp128_hw_src		= $(srcdir)/config/rs6000/float128-hw.c
-fp128_hw_static_obj	= float128-hw$(objext)
-fp128_hw_shared_obj	= float128-hw_s$(objext)
+fp128_hardfp_src	= _mulkc3-hw.c _divkc3-hw.c
+fp128_hw_funcs		= float128-hw _mulkc3-hw _divkc3-hw
+fp128_hw_src		= $(srcdir)/config/rs6000/float128-hw.c _mulkc3-hw.c \
+			  _divkc3-hw.c
+fp128_hw_static_obj	= $(addsuffix $(objext),$(fp128_hw_funcs))
+fp128_hw_shared_obj	= $(addsuffix _s$(objext),$(fp128_hw_funcs))
 fp128_hw_obj		= $(fp128_hw_static_obj) $(fp128_hw_shared_obj)
 
 fp128_ifunc_funcs	= float128-ifunc
@@ -33,3 +35,13 @@  $(fp128_hw_obj)		 : $(srcdir)/config/rs6
 
 $(fp128_ifunc_obj)	 : INTERNAL_CFLAGS += $(FP128_CFLAGS_SW)
 $(fp128_ifunc_obj)	 : $(srcdir)/config/rs6000/t-float128-hw
+
+_mulkc3-hw.c: $(srcdir)/config/rs6000/_mulkc3.c
+	rm -rf _mulkc3.c
+	(echo "#define __mulkc3 __mulkc3_hw"; \
+	 cat $(srcdir)/config/rs6000/_mulkc3.c) > _mulkc3-hw.c
+
+_divkc3-hw.c: $(srcdir)/config/rs6000/_divkc3.c
+	rm -rf _divkc3.c
+	(echo "#define __divkc3 __divkc3_hw"; \
+	 cat $(srcdir)/config/rs6000/_divkc3.c) > _divkc3-hw.c
Index: libgcc/config/rs6000/t-float128
===================================================================
--- libgcc/config/rs6000/t-float128	(revision 255177)
+++ libgcc/config/rs6000/t-float128	(working copy)
@@ -86,7 +86,7 @@  test:
 	for x in $(fp128_obj); do echo "    $$x"; done;
 
 clean-float128:
-	rm -rf $(fp128_softfp_src)
+	rm -rf $(fp128_softfp_src) $(fp128_hardfp_src)
 	@$(MULTICLEAN) multi-clean DO=clean-float128
 
 # For now, only put it in the static library