diff mbox

[libitm] Work around missing AVX support

Message ID 4EBB1BBA.7000209@gmail.com
State New
Headers show

Commit Message

Richard Henderson Nov. 10, 2011, 12:32 a.m. UTC
Not pretty at all.  But given the corresponding irritation in writing assembler
wrapper functions, it seems like it's about a wash.

Tested with and without HAVE_AS_AVX on x86_64-linux.


r~
commit 856dd9f4777fbafce3038e889e9a9bf48222215d
Author: Richard Henderson <rth@redhat.com>
Date:   Wed Nov 9 16:28:45 2011 -0800

    libitm: Work around assembler missing AVX insns.

Comments

Jakub Jelinek Nov. 10, 2011, 8:16 a.m. UTC | #1
On Wed, Nov 09, 2011 at 04:32:58PM -0800, Richard Henderson wrote:
> Not pretty at all.  But given the corresponding irritation in writing assembler
> wrapper functions, it seems like it's about a wash.
> 
> Tested with and without HAVE_AS_AVX on x86_64-linux.

Shouldn't -mavx be also not passed in that case?  Then you wouldn't need
to undef __AVX__ and we wouldn't risk gcc doesn't decide to optimize memcpy
or something similar using AVX instructions...

	Jakub
diff mbox

Patch

diff --git a/libitm/ChangeLog b/libitm/ChangeLog
index b1629b1..8aeb589 100644
--- a/libitm/ChangeLog
+++ b/libitm/ChangeLog
@@ -1,5 +1,12 @@ 
 2011-11-09  Richard Henderson  <rth@redhat.com>
 
+	* acinclude.m4 (LIBITM_CHECK_AS_AVX): New.
+	* configure.ac: Use it.
+	* config.h.in, configure: Rebuild.
+	* config/x86/x86_avx.cc: Handle !HAVE_AS_AVX.
+
+2011-11-09  Richard Henderson  <rth@redhat.com>
+
 	* barrier.tpl, memcpy.cc, memset.cc, method-wbetl.cc: Remove file.
 	* config/alpha/unaligned.h: Remove file.
 	* config/generic/unaligned.h: Remove file.
diff --git a/libitm/acinclude.m4 b/libitm/acinclude.m4
index 8fcde4b..58fbf42 100644
--- a/libitm/acinclude.m4
+++ b/libitm/acinclude.m4
@@ -95,6 +95,20 @@  AC_DEFUN([LIBITM_CHECK_SIZE_T_MANGLING], [
     [Define to the letter to which size_t is mangled.])
 ])
 
+dnl Check if as supports AVX instructions.
+AC_DEFUN([LIBITM_CHECK_AS_AVX], [
+case "${target_cpu}" in
+i[3456]86 | x86_64)
+  AC_CACHE_CHECK([if the assembler supports AVX], libitm_cv_as_avx, [
+    AC_TRY_COMPILE([], [asm("vzeroupper");],
+		   [libitm_cv_as_avx=yes], [libitm_cv_as_avx=no])
+  ])
+  if test x$libitm_cv_as_avx = xyes; then
+    AC_DEFINE(HAVE_AS_AVX, 1, [Define to 1 if the assembler supports AVX.])
+  fi
+  ;;
+esac])
+
 sinclude(../libtool.m4)
 dnl The lines below arrange for aclocal not to bring an installed
 dnl libtool.m4 into aclocal.m4, while still arranging for automake to
diff --git a/libitm/config/x86/x86_avx.cc b/libitm/config/x86/x86_avx.cc
index 30420aa..cd20fe2 100644
--- a/libitm/config/x86/x86_avx.cc
+++ b/libitm/config/x86/x86_avx.cc
@@ -22,9 +22,66 @@ 
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "config.h"
+
+// ??? This is pretty gross, but we're going to frob types of the functions.
+// Is this better or worse than just admitting we need to do this in pure
+// assembly?
+
+#ifndef HAVE_AS_AVX
+#undef __AVX__
+#endif
+
 #include "libitm_i.h"
 #include "dispatch.h"
 
+extern "C" {
+
+#ifndef HAVE_AS_AVX
+typedef float _ITM_TYPE_M256 __attribute__((vector_size(32), may_alias));
+#endif
+
+// ??? Re-define the memcpy implementations so that we can frob the
+// interface to deal with possibly missing AVX instruction set support.
+
+#ifdef HAVE_AS_AVX
+#define RETURN(X)	return X
+#define STORE(X,Y)	X = Y
+#define OUTPUT(T)	_ITM_TYPE_##T
+#define INPUT(T,X)	, _ITM_TYPE_##T X
+#else
+/* Emit vmovaps (%rax),%ymm0.  */
+#define RETURN(X) \
+  asm volatile(".byte 0xc5,0xfc,0x28,0x00" : "=m"(X) : "a"(&X));
+/* Emit vmovaps %ymm0,(%rax); vzeroupper.  */
+#define STORE(X,Y) \
+  asm volatile(".byte 0xc5,0xfc,0x29,0x00,0xc5,0xf8,0x77" : "=m"(X) : "a"(&X));
+#define OUTPUT(T)	void
+#define INPUT(T,X)
+#endif
+
+#undef ITM_READ_MEMCPY
+#define ITM_READ_MEMCPY(T, LSMOD, TARGET, M2)				\
+OUTPUT(T) ITM_REGPARM _ITM_##LSMOD##T (const _ITM_TYPE_##T *ptr)	\
+{									\
+  _ITM_TYPE_##T v;							\
+  TARGET memtransfer##M2(&v, ptr, sizeof(_ITM_TYPE_##T), false,		\
+			 GTM::abi_dispatch::NONTXNAL,			\
+			 GTM::abi_dispatch::LSMOD);			\
+  RETURN(v);								\
+}
+
+#undef ITM_WRITE_MEMCPY
+#define ITM_WRITE_MEMCPY(T, LSMOD, TARGET, M2)				\
+void ITM_REGPARM _ITM_##LSMOD##T (_ITM_TYPE_##T *ptr INPUT(T,in))	\
+{									\
+  _ITM_TYPE_##T v;							\
+  STORE(v, in);								\
+  TARGET memtransfer##M2(ptr, &v, sizeof(_ITM_TYPE_##T), false,		\
+			 GTM::abi_dispatch::LSMOD,			\
+			 GTM::abi_dispatch::NONTXNAL);			\
+}
+
 // ??? Use memcpy for now, until we have figured out how to best instantiate
 // these loads/stores.
 CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(M256, GTM::abi_disp()->, )
@@ -34,3 +91,5 @@  _ITM_LM256 (const _ITM_TYPE_M256 *ptr)
 {
   GTM::GTM_LB (ptr, sizeof (*ptr));
 }
+
+}
diff --git a/libitm/configure.ac b/libitm/configure.ac
index c40ecb5..7de5cbe 100644
--- a/libitm/configure.ac
+++ b/libitm/configure.ac
@@ -237,6 +237,7 @@  CFLAGS="$save_CFLAGS $XCFLAGS"
 # had a chance to set XCFLAGS.
 LIBITM_CHECK_SYNC_BUILTINS
 LIBITM_CHECK_64BIT_SYNC_BUILTINS
+LIBITM_CHECK_AS_AVX
 
 # Cleanup and exit.
 CFLAGS="$save_CFLAGS"