From patchwork Thu Dec  1 01:13:02 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@redhat.com>
X-Patchwork-Id: 128622
Return-Path: 
 <gcc-patches-return-308817-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	by ozlabs.org (Postfix) with SMTP id 48D5EB6F69
	for <incoming@patchwork.ozlabs.org>;
	Thu,  1 Dec 2011 12:13:34 +1100 (EST)
Received: (qmail 28016 invoked by alias); 1 Dec 2011 01:13:32 -0000
Received: (qmail 28002 invoked by uid 22791); 1 Dec 2011 01:13:25 -0000
X-SWARE-Spam-Status: No, hits=-6.4 required=5.0	tests=AWL, BAYES_05,
	RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS, TW_TX
X-Spam-Check-By: sourceware.org
Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by
	sourceware.org (qpsmtpd/0.43rc1) with ESMTP;
	Thu, 01 Dec 2011 01:13:03 +0000
Received: from int-mx02.intmail.prod.int.phx2.redhat.com
	(int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12])	by
	mx1.redhat.com (8.14.4/8.14.4) with ESMTP id
	pB11D3Yk023381	(version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA
	bits=256 verify=OK)	for <gcc-patches@gcc.gnu.org>;
	Wed, 30 Nov 2011 20:13:03 -0500
Received: from anchor.twiddle.net (vpn-229-14.phx2.redhat.com
	[10.3.229.14])	by int-mx02.intmail.prod.int.phx2.redhat.com
	(8.13.8/8.13.8) with ESMTP id pB11D2E2021894;
	Wed, 30 Nov 2011 20:13:02 -0500
Message-ID: <4ED6D49E.5070001@redhat.com>
Date: Wed, 30 Nov 2011 17:13:02 -0800
From: Richard Henderson <rth@redhat.com>
User-Agent: Mozilla/5.0 (X11; Linux x86_64;
	rv:8.0) Gecko/20111115 Thunderbird/8.0
MIME-Version: 1.0
To: triegel@redhat.com, GCC Patches <gcc-patches@gcc.gnu.org>
Subject: [RFC][libitm] Convert to c++11 atomics
X-IsSubscribed: yes
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org

The library is written in C++, so in theory we can use the real atomic<> templates, etc.  Except that we have the same horrid problem finding the C++ headers as did for <type_traits>, so again we have a local copy of <atomic>.  Blah.  But given that it is a copy, the rest of the code is using the real interfaces.

This passes the testsuite on power7 (gcc110), which *is* picky about memory barriers, but I seem to recall that the larger external tests that Velox used were much better at picking out problems.  And I've misplaced those...

Torvald, if you'd be so kind as to cast another set of eyes across this, I'd be grateful.


r~
commit 29a0e940865e62c99308c308387f1fb6b1060566
Author: Richard Henderson <rth@twiddle.net>
Date:   Tue Nov 29 16:14:34 2011 -0800

    libitm: Conversion to c++11 atomics.
diff --git a/libitm/beginend.cc b/libitm/beginend.cc
index d8ef651..bcc8516 100644
--- a/libitm/beginend.cc
+++ b/libitm/beginend.cc
@@ -37,12 +37,18 @@ gtm_thread *GTM::gtm_thread::list_of_threads = 0;
 unsigned GTM::gtm_thread::number_of_threads = 0;
 
 gtm_stmlock GTM::gtm_stmlock_array[LOCK_ARRAY_SIZE];
-gtm_version GTM::gtm_clock;
+atomic<gtm_version> GTM::gtm_clock;
 
 /* ??? Move elsewhere when we figure out library initialization.  */
 uint64_t GTM::gtm_spin_count_var = 1000;
 
+#ifdef HAVE_64BIT_SYNC_BUILTINS
+static atomic<_ITM_transactionId_t> global_tid;
+#else
 static _ITM_transactionId_t global_tid;
+static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
 
 // Provides a on-thread-exit callback used to release per-thread data.
 static pthread_key_t thr_release_key;
@@ -114,7 +120,7 @@ GTM::gtm_thread::gtm_thread ()
   // This object's memory has been set to zero by operator new, so no need
   // to initialize any of the other primitive-type members that do not have
   // constructors.
-  shared_state = ~(typeof shared_state)0;
+  shared_state.store(-1, memory_order_relaxed);
 
   // Register this transaction with the list of all threads' transactions.
   serial_lock.write_lock ();
@@ -132,13 +138,8 @@ GTM::gtm_thread::gtm_thread ()
     GTM_fatal("Setting thread release TLS key failed.");
 }
 
-
-
-#ifndef HAVE_64BIT_SYNC_BUILTINS
-static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER;
-#endif
-
-static inline uint32_t choose_code_path(uint32_t prop, abi_dispatch *disp)
+static inline uint32_t
+choose_code_path(uint32_t prop, abi_dispatch *disp)
 {
   if ((prop & pr_uninstrumentedCode) && disp->can_run_uninstrumented_code())
     return a_runUninstrumentedCode;
@@ -258,7 +259,7 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
   else
     {
 #ifdef HAVE_64BIT_SYNC_BUILTINS
-      tx->id = __sync_add_and_fetch (&global_tid, tid_block_size);
+      tx->id = global_tid.fetch_add(tid_block_size, memory_order_relaxed);
       tx->local_tid = tx->id + 1;
 #else
       pthread_mutex_lock (&global_tid_lock);
@@ -480,7 +481,7 @@ GTM::gtm_thread::trycommit ()
 	      it = it->next_thread)
 	    {
 	      if (it == this) continue;
-	      while (it->shared_state < priv_time)
+	      while (it->shared_state.load(memory_order_relaxed) < priv_time)
 		cpu_relax();
 	    }
 	}
diff --git a/libitm/config/alpha/target.h b/libitm/config/alpha/target.h
index 121546f..12b1d89 100644
--- a/libitm/config/alpha/target.h
+++ b/libitm/config/alpha/target.h
@@ -45,16 +45,4 @@ cpu_relax (void)
   __asm volatile ("" : : : "memory");
 }
 
-static inline void
-atomic_read_barrier (void)
-{
-  __sync_synchronize ();
-}
-
-static inline void
-atomic_write_barrier (void)
-{
-  __asm volatile ("wmb" : : : "memory");
-}
-
 } // namespace GTM
diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc
index c1e935e..3471049 100644
--- a/libitm/config/linux/rwlock.cc
+++ b/libitm/config/linux/rwlock.cc
@@ -36,10 +36,9 @@ gtm_rwlock::read_lock (gtm_thread *tx)
   for (;;)
     {
       // Fast path: first announce our intent to read, then check for
-      // conflicting intents to write. The barrier makes sure that this
-      // happens in exactly this order.
+      // conflicting intents to write.  Note that direct assignment to
+      // an atomic object is memory_order_seq_cst.
       tx->shared_state = 0;
-      __sync_synchronize();
       if (likely(writers == 0))
 	return;
 
@@ -51,8 +50,7 @@ gtm_rwlock::read_lock (gtm_thread *tx)
       // We need the barrier here for the same reason that we need it in
       // read_unlock().
       // TODO Potentially too many wake-ups. See comments in read_unlock().
-      tx->shared_state = ~(typeof tx->shared_state)0;
-      __sync_synchronize();
+      tx->shared_state = -1;
       if (writer_readers > 0)
 	{
 	  writer_readers = 0;
@@ -71,7 +69,7 @@ gtm_rwlock::read_lock (gtm_thread *tx)
 	  // are no writers anymore after the barrier because this pending
 	  // store could then lead to lost wake-ups at other readers.
 	  readers = 1;
-	  __sync_synchronize();
+	  atomic_thread_fence(memory_order_acq_rel);
 	  if (writers)
 	    futex_wait(&readers, 1);
 	}
diff --git a/libitm/config/posix/rwlock.cc b/libitm/config/posix/rwlock.cc
index f379383..e1e3dcf 100644
--- a/libitm/config/posix/rwlock.cc
+++ b/libitm/config/posix/rwlock.cc
@@ -53,10 +53,9 @@ void
 gtm_rwlock::read_lock (gtm_thread *tx)
 {
   // Fast path: first announce our intent to read, then check for conflicting
-  // intents to write. The barrier makes sure that this happens in exactly
-  // this order.
+  // intents to write.  Note that direct assignment to an atomic object
+  // is memory_order_seq_cst.
   tx->shared_state = 0;
-  __sync_synchronize();
   unsigned int sum = this->summary;
   if (likely(!(sum & (a_writer | w_writer))))
     return;
@@ -69,7 +68,7 @@ gtm_rwlock::read_lock (gtm_thread *tx)
   // to happen before we leave the slow path and before we wait for any
   // writer).
   // ??? Add a barrier to enforce early visibility of this?
-  tx->shared_state = ~(typeof tx->shared_state)0;
+  tx->shared_state.store(-1, memory_order_relaxed);
 
   pthread_mutex_lock (&this->mutex);
 
@@ -101,7 +100,7 @@ gtm_rwlock::read_lock (gtm_thread *tx)
     }
 
   // Otherwise we can acquire the lock for read.
-  tx->shared_state = 0;
+  tx->shared_state.store(0, memory_order_relaxed);
 
   pthread_mutex_unlock(&this->mutex);
 }
@@ -153,11 +152,11 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
   // sure that we first set our write intent and check for active readers
   // after that, in strictly this order (similar to the barrier in the fast
   // path of read_lock()).
-  __sync_synchronize();
+  atomic_thread_fence(memory_order_acq_rel);
 
   // If this is an upgrade, we are not a reader anymore.
   if (tx != 0)
-    tx->shared_state = ~(typeof tx->shared_state)0;
+    tx->shared_state.store(-1, memory_order_relaxed);
 
   // Count the number of active readers to be able to decrease the number of
   // wake-ups and wait calls that are necessary.
@@ -194,7 +193,7 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
 	  it = it->next_thread)
 	{
 	  // Don't count ourself if this is an upgrade.
-	  if (it->shared_state != ~(typeof it->shared_state)0)
+	  if (it->shared_state.load(memory_order_relaxed) != -1)
 	    readers++;
 	}
 
@@ -236,8 +235,7 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
 void
 gtm_rwlock::read_unlock (gtm_thread *tx)
 {
-  tx->shared_state = ~(typeof tx->shared_state)0;
-  __sync_synchronize();
+  tx->shared_state = -1;
   unsigned int sum = this->summary;
   if (likely(!(sum & (a_writer | w_writer))))
     return;
diff --git a/libitm/config/powerpc/target.h b/libitm/config/powerpc/target.h
index 2d036cd..f76ae34 100644
--- a/libitm/config/powerpc/target.h
+++ b/libitm/config/powerpc/target.h
@@ -50,16 +50,4 @@ cpu_relax (void)
   __asm volatile ("" : : : "memory");
 }
 
-static inline void
-atomic_read_barrier (void)
-{
-  __sync_synchronize ();
-}
-
-static inline void
-atomic_write_barrier (void)
-{
-  __sync_synchronize ();
-}
-
 } // namespace GTM
diff --git a/libitm/config/x86/target.h b/libitm/config/x86/target.h
index 197faeb..a59608f 100644
--- a/libitm/config/x86/target.h
+++ b/libitm/config/x86/target.h
@@ -66,20 +66,6 @@ cpu_relax (void)
   __asm volatile ("rep; nop" : : : "memory");
 }
 
-static inline void
-atomic_read_barrier (void)
-{
-  /* x86 is a strong memory ordering machine.  */
-  __asm volatile ("" : : : "memory");
-}
-
-static inline void
-atomic_write_barrier (void)
-{
-  /* x86 is a strong memory ordering machine.  */
-  __asm volatile ("" : : : "memory");
-}
-
 } // namespace GTM
 
 // We'll be using some of the cpu builtins, and their associated types.
diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h
index ff6884e..d57872e 100644
--- a/libitm/libitm_i.h
+++ b/libitm/libitm_i.h
@@ -37,6 +37,7 @@
 #include <string.h>
 #include <unwind.h>
 #include "local_type_traits"
+#include "local_atomic"
 
 #include "common.h"
 
@@ -206,7 +207,7 @@ struct gtm_thread
 
   // If this transaction is inactive, shared_state is ~0. Otherwise, this is
   // an active or serial transaction.
-  gtm_word shared_state;
+  atomic<gtm_word> shared_state;
 
   // The lock that provides access to serial mode.  Non-serialized
   // transactions acquire read locks; a serialized transaction aquires
diff --git a/libitm/local_atomic b/libitm/local_atomic
new file mode 100644
index 0000000..e664463
--- /dev/null
+++ b/libitm/local_atomic
@@ -0,0 +1,1903 @@
+// -*- C++ -*- header.
+
+// Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+// ????????????????????????????????????????????????????????????????????
+//
+// This is a copy of the libstdc++ header, with the trivial modification
+// of ignoring the c++config.h include.  If and when the top-level build is
+// fixed so that target libraries can be built using the newly built, we can
+// delete this file.
+//
+// ????????????????????????????????????????????????????????????????????
+
+/** @file include/atomic
+ *  This is a Standard C++ Library header.
+ */
+
+// Based on "C++ Atomic Types and Operations" by Hans Boehm and Lawrence Crowl.
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2427.html
+
+#ifndef _GLIBCXX_ATOMIC
+#define _GLIBCXX_ATOMIC 1
+
+// #pragma GCC system_header
+
+// #ifndef __GXX_EXPERIMENTAL_CXX0X__
+// # include <bits/c++0x_warning.h>
+// #endif
+
+// #include <bits/atomic_base.h>
+
+namespace std // _GLIBCXX_VISIBILITY(default)
+{
+// _GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  /**
+   * @defgroup atomics Atomics
+   *
+   * Components for performing atomic operations.
+   * @{
+   */
+
+  /// Enumeration for memory_order
+  typedef enum memory_order
+    {
+      memory_order_relaxed,
+      memory_order_consume,
+      memory_order_acquire,
+      memory_order_release,
+      memory_order_acq_rel,
+      memory_order_seq_cst
+    } memory_order;
+
+  inline memory_order
+  __calculate_memory_order(memory_order __m) noexcept
+  {
+    const bool __cond1 = __m == memory_order_release;
+    const bool __cond2 = __m == memory_order_acq_rel;
+    memory_order __mo1(__cond1 ? memory_order_relaxed : __m);
+    memory_order __mo2(__cond2 ? memory_order_acquire : __mo1);
+    return __mo2;
+  }
+
+  inline void
+  atomic_thread_fence(memory_order __m) noexcept
+  {
+    __atomic_thread_fence (__m);
+  }
+
+  inline void
+  atomic_signal_fence(memory_order __m) noexcept
+  {
+    __atomic_thread_fence (__m);
+  }
+
+  /// kill_dependency
+  template<typename _Tp>
+    inline _Tp
+    kill_dependency(_Tp __y) noexcept
+    {
+      _Tp __ret(__y);
+      return __ret;
+    }
+
+  /// Lock-free Property
+
+
+#define ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
+#define ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
+#define ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define ATOMIC_SHORT_LOCK_FREE		__GCC_ATOMIC_SHORT_LOCK_FREE
+#define ATOMIC_INT_LOCK_FREE		__GCC_ATOMIC_INT_LOCK_FREE
+#define ATOMIC_LONG_LOCK_FREE		__GCC_ATOMIC_LONG_LOCK_FREE
+#define ATOMIC_LLONG_LOCK_FREE		__GCC_ATOMIC_LLONG_LOCK_FREE
+#define ATOMIC_POINTER_LOCK_FREE	__GCC_ATOMIC_POINTER_LOCK_FREE
+
+  // Base types for atomics.
+  template<typename _IntTp>
+    struct __atomic_base;
+
+  /// atomic_char
+  typedef __atomic_base<char>  	       		atomic_char;
+
+  /// atomic_schar
+  typedef __atomic_base<signed char>	     	atomic_schar;
+
+  /// atomic_uchar
+  typedef __atomic_base<unsigned char>		atomic_uchar;
+
+  /// atomic_short
+  typedef __atomic_base<short>			atomic_short;
+
+  /// atomic_ushort
+  typedef __atomic_base<unsigned short>	 	atomic_ushort;
+
+  /// atomic_int
+  typedef __atomic_base<int>  	       		atomic_int;
+
+  /// atomic_uint
+  typedef __atomic_base<unsigned int>	     	atomic_uint;
+
+  /// atomic_long
+  typedef __atomic_base<long>  	       		atomic_long;
+
+  /// atomic_ulong
+  typedef __atomic_base<unsigned long>		atomic_ulong;
+
+  /// atomic_llong
+  typedef __atomic_base<long long>  		atomic_llong;
+
+  /// atomic_ullong
+  typedef __atomic_base<unsigned long long> 	atomic_ullong;
+
+  /// atomic_wchar_t
+  typedef __atomic_base<wchar_t>  		atomic_wchar_t;
+
+  /// atomic_char16_t
+  typedef __atomic_base<char16_t>  		atomic_char16_t;
+
+  /// atomic_char32_t
+  typedef __atomic_base<char32_t>  		atomic_char32_t;
+
+  /// atomic_char32_t
+  typedef __atomic_base<char32_t>  		atomic_char32_t;
+
+
+  /// atomic_int_least8_t
+  typedef __atomic_base<int_least8_t>  		atomic_int_least8_t;
+
+  /// atomic_uint_least8_t
+  typedef __atomic_base<uint_least8_t>	       	atomic_uint_least8_t;
+
+  /// atomic_int_least16_t
+  typedef __atomic_base<int_least16_t>	       	atomic_int_least16_t;
+
+  /// atomic_uint_least16_t
+  typedef __atomic_base<uint_least16_t>	       	atomic_uint_least16_t;
+
+  /// atomic_int_least32_t
+  typedef __atomic_base<int_least32_t>	       	atomic_int_least32_t;
+
+  /// atomic_uint_least32_t
+  typedef __atomic_base<uint_least32_t>	       	atomic_uint_least32_t;
+
+  /// atomic_int_least64_t
+  typedef __atomic_base<int_least64_t>	       	atomic_int_least64_t;
+
+  /// atomic_uint_least64_t
+  typedef __atomic_base<uint_least64_t>	       	atomic_uint_least64_t;
+
+
+  /// atomic_int_fast8_t
+  typedef __atomic_base<int_fast8_t>  		atomic_int_fast8_t;
+
+  /// atomic_uint_fast8_t
+  typedef __atomic_base<uint_fast8_t>	      	atomic_uint_fast8_t;
+
+  /// atomic_int_fast16_t
+  typedef __atomic_base<int_fast16_t>	      	atomic_int_fast16_t;
+
+  /// atomic_uint_fast16_t
+  typedef __atomic_base<uint_fast16_t>	      	atomic_uint_fast16_t;
+
+  /// atomic_int_fast32_t
+  typedef __atomic_base<int_fast32_t>	      	atomic_int_fast32_t;
+
+  /// atomic_uint_fast32_t
+  typedef __atomic_base<uint_fast32_t>	      	atomic_uint_fast32_t;
+
+  /// atomic_int_fast64_t
+  typedef __atomic_base<int_fast64_t>	      	atomic_int_fast64_t;
+
+  /// atomic_uint_fast64_t
+  typedef __atomic_base<uint_fast64_t>	      	atomic_uint_fast64_t;
+
+
+  /// atomic_intptr_t
+  typedef __atomic_base<intptr_t>  	       	atomic_intptr_t;
+
+  /// atomic_uintptr_t
+  typedef __atomic_base<uintptr_t>  	       	atomic_uintptr_t;
+
+  /// atomic_size_t
+  typedef __atomic_base<size_t>	 	       	atomic_size_t;
+
+  /// atomic_intmax_t
+  typedef __atomic_base<intmax_t>  	       	atomic_intmax_t;
+
+  /// atomic_uintmax_t
+  typedef __atomic_base<uintmax_t>  	       	atomic_uintmax_t;
+
+  /// atomic_ptrdiff_t
+  typedef __atomic_base<ptrdiff_t>  	       	atomic_ptrdiff_t;
+
+
+#define ATOMIC_VAR_INIT(_VI) { _VI }
+
+  template<typename _Tp>
+    struct atomic;
+
+  template<typename _Tp>
+    struct atomic<_Tp*>;
+
+
+  /**
+   *  @brief Base type for atomic_flag.
+   *
+   *  Base type is POD with data, allowing atomic_flag to derive from
+   *  it and meet the standard layout type requirement. In addition to
+   *  compatibilty with a C interface, this allows different
+   *  implementations of atomic_flag to use the same atomic operation
+   *  functions, via a standard conversion to the __atomic_flag_base
+   *  argument.
+  */
+  // _GLIBCXX_BEGIN_EXTERN_C
+
+  struct __atomic_flag_base
+  {
+    bool _M_i;
+  };
+
+  // _GLIBCXX_END_EXTERN_C
+
+#define ATOMIC_FLAG_INIT { false }
+
+  /// atomic_flag
+  struct atomic_flag : public __atomic_flag_base
+  {
+    atomic_flag() noexcept = default;
+    ~atomic_flag() noexcept = default;
+    atomic_flag(const atomic_flag&) = delete;
+    atomic_flag& operator=(const atomic_flag&) = delete;
+    atomic_flag& operator=(const atomic_flag&) volatile = delete;
+
+    // Conversion to ATOMIC_FLAG_INIT.
+    atomic_flag(bool __i) noexcept : __atomic_flag_base({ __i }) { }
+
+    bool
+    test_and_set(memory_order __m = memory_order_seq_cst) noexcept
+    {
+      return __atomic_test_and_set (&_M_i, __m);
+    }
+
+    bool
+    test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept
+    {
+      return __atomic_test_and_set (&_M_i, __m);
+    }
+
+    void
+    clear(memory_order __m = memory_order_seq_cst) noexcept
+    {
+      // __glibcxx_assert(__m != memory_order_consume);
+      // __glibcxx_assert(__m != memory_order_acquire);
+      // __glibcxx_assert(__m != memory_order_acq_rel);
+
+      __atomic_clear (&_M_i, __m);
+    }
+
+    void
+    clear(memory_order __m = memory_order_seq_cst) volatile noexcept
+    {
+      // __glibcxx_assert(__m != memory_order_consume);
+      // __glibcxx_assert(__m != memory_order_acquire);
+      // __glibcxx_assert(__m != memory_order_acq_rel);
+
+      __atomic_clear (&_M_i, __m);
+    }
+  };
+
+
+  /// Base class for atomic integrals.
+  //
+  // For each of the integral types, define atomic_[integral type] struct
+  //
+  // atomic_bool     bool
+  // atomic_char     char
+  // atomic_schar    signed char
+  // atomic_uchar    unsigned char
+  // atomic_short    short
+  // atomic_ushort   unsigned short
+  // atomic_int      int
+  // atomic_uint     unsigned int
+  // atomic_long     long
+  // atomic_ulong    unsigned long
+  // atomic_llong    long long
+  // atomic_ullong   unsigned long long
+  // atomic_char16_t char16_t
+  // atomic_char32_t char32_t
+  // atomic_wchar_t  wchar_t
+  //
+  // NB: Assuming _ITp is an integral scalar type that is 1, 2, 4, or
+  // 8 bytes, since that is what GCC built-in functions for atomic
+  // memory access expect.
+  template<typename _ITp>
+    struct __atomic_base
+    {
+    private:
+      typedef _ITp 	__int_type;
+
+      __int_type 	_M_i;
+
+    public:
+      __atomic_base() noexcept = default;
+      ~__atomic_base() noexcept = default;
+      __atomic_base(const __atomic_base&) = delete;
+      __atomic_base& operator=(const __atomic_base&) = delete;
+      __atomic_base& operator=(const __atomic_base&) volatile = delete;
+
+      // Requires __int_type convertible to _M_i.
+      constexpr __atomic_base(__int_type __i) noexcept : _M_i (__i) { }
+
+      operator __int_type() const noexcept
+      { return load(); }
+
+      operator __int_type() const volatile noexcept
+      { return load(); }
+
+      __int_type
+      operator=(__int_type __i) noexcept
+      {
+	store(__i);
+	return __i;
+      }
+
+      __int_type
+      operator=(__int_type __i) volatile noexcept
+      {
+	store(__i);
+	return __i;
+      }
+
+      __int_type
+      operator++(int) noexcept
+      { return fetch_add(1); }
+
+      __int_type
+      operator++(int) volatile noexcept
+      { return fetch_add(1); }
+
+      __int_type
+      operator--(int) noexcept
+      { return fetch_sub(1); }
+
+      __int_type
+      operator--(int) volatile noexcept
+      { return fetch_sub(1); }
+
+      __int_type
+      operator++() noexcept
+      { return __atomic_add_fetch(&_M_i, 1, memory_order_seq_cst); }
+
+      __int_type
+      operator++() volatile noexcept
+      { return __atomic_add_fetch(&_M_i, 1, memory_order_seq_cst); }
+
+      __int_type
+      operator--() noexcept
+      { return __atomic_sub_fetch(&_M_i, 1, memory_order_seq_cst); }
+
+      __int_type
+      operator--() volatile noexcept
+      { return __atomic_sub_fetch(&_M_i, 1, memory_order_seq_cst); }
+
+      __int_type
+      operator+=(__int_type __i) noexcept
+      { return __atomic_add_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator+=(__int_type __i) volatile noexcept
+      { return __atomic_add_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator-=(__int_type __i) noexcept
+      { return __atomic_sub_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator-=(__int_type __i) volatile noexcept
+      { return __atomic_sub_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator&=(__int_type __i) noexcept
+      { return __atomic_and_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator&=(__int_type __i) volatile noexcept
+      { return __atomic_and_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator|=(__int_type __i) noexcept
+      { return __atomic_or_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator|=(__int_type __i) volatile noexcept
+      { return __atomic_or_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator^=(__int_type __i) noexcept
+      { return __atomic_xor_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      __int_type
+      operator^=(__int_type __i) volatile noexcept
+      { return __atomic_xor_fetch(&_M_i, __i, memory_order_seq_cst); }
+
+      bool
+      is_lock_free() const noexcept
+      { return __atomic_is_lock_free (sizeof (_M_i), &_M_i); }
+
+      bool
+      is_lock_free() const volatile noexcept
+      { return __atomic_is_lock_free (sizeof (_M_i), &_M_i); }
+
+      void
+      store(__int_type __i, memory_order __m = memory_order_seq_cst) noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_acquire);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+	// __glibcxx_assert(__m != memory_order_consume);
+
+	__atomic_store_n(&_M_i, __i, __m);
+      }
+
+      void
+      store(__int_type __i,
+	    memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_acquire);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+	// __glibcxx_assert(__m != memory_order_consume);
+
+	__atomic_store_n(&_M_i, __i, __m);
+      }
+
+      __int_type
+      load(memory_order __m = memory_order_seq_cst) const noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_release);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+
+	return __atomic_load_n(&_M_i, __m);
+      }
+
+      __int_type
+      load(memory_order __m = memory_order_seq_cst) const volatile noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_release);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+
+	return __atomic_load_n(&_M_i, __m);
+      }
+
+      __int_type
+      exchange(__int_type __i,
+	       memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return __atomic_exchange_n(&_M_i, __i, __m);
+      }
+
+
+      __int_type
+      exchange(__int_type __i,
+	       memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return __atomic_exchange_n(&_M_i, __i, __m);
+      }
+
+      bool
+      compare_exchange_weak(__int_type& __i1, __int_type __i2,
+			    memory_order __m1, memory_order __m2) noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_i, &__i1, __i2, 1, __m1, __m2);
+      }
+
+      bool
+      compare_exchange_weak(__int_type& __i1, __int_type __i2,
+			    memory_order __m1,
+			    memory_order __m2) volatile noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_i, &__i1, __i2, 1, __m1, __m2);
+      }
+
+      bool
+      compare_exchange_weak(__int_type& __i1, __int_type __i2,
+			    memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return compare_exchange_weak(__i1, __i2, __m,
+				     __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_weak(__int_type& __i1, __int_type __i2,
+		   memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return compare_exchange_weak(__i1, __i2, __m,
+				     __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_strong(__int_type& __i1, __int_type __i2,
+			      memory_order __m1, memory_order __m2) noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_i, &__i1, __i2, 0, __m1, __m2);
+      }
+
+      bool
+      compare_exchange_strong(__int_type& __i1, __int_type __i2,
+			      memory_order __m1,
+			      memory_order __m2) volatile noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_i, &__i1, __i2, 0, __m1, __m2);
+      }
+
+      bool
+      compare_exchange_strong(__int_type& __i1, __int_type __i2,
+			      memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return compare_exchange_strong(__i1, __i2, __m,
+				       __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_strong(__int_type& __i1, __int_type __i2,
+		 memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return compare_exchange_strong(__i1, __i2, __m,
+				       __calculate_memory_order(__m));
+      }
+
+      __int_type
+      fetch_add(__int_type __i,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_add(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_add(__int_type __i,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_add(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_sub(__int_type __i,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_sub(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_sub(__int_type __i,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_sub(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_and(__int_type __i,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_and(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_and(__int_type __i,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_and(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_or(__int_type __i,
+	       memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_or(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_or(__int_type __i,
+	       memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_or(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_xor(__int_type __i,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_xor(&_M_i, __i, __m); }
+
+      __int_type
+      fetch_xor(__int_type __i,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_xor(&_M_i, __i, __m); }
+    };
+
+
+  /// Partial specialization for pointer types.
+  template<typename _PTp>
+    struct __atomic_base<_PTp*>
+    {
+    private:
+      typedef _PTp* 	__pointer_type;
+
+      __pointer_type 	_M_p;
+
+    public:
+      __atomic_base() noexcept = default;
+      ~__atomic_base() noexcept = default;
+      __atomic_base(const __atomic_base&) = delete;
+      __atomic_base& operator=(const __atomic_base&) = delete;
+      __atomic_base& operator=(const __atomic_base&) volatile = delete;
+
+      // Requires __pointer_type convertible to _M_p.
+      constexpr __atomic_base(__pointer_type __p) noexcept : _M_p (__p) { }
+
+      operator __pointer_type() const noexcept
+      { return load(); }
+
+      operator __pointer_type() const volatile noexcept
+      { return load(); }
+
+      __pointer_type
+      operator=(__pointer_type __p) noexcept
+      {
+	store(__p);
+	return __p;
+      }
+
+      __pointer_type
+      operator=(__pointer_type __p) volatile noexcept
+      {
+	store(__p);
+	return __p;
+      }
+
+      __pointer_type
+      operator++(int) noexcept
+      { return fetch_add(1); }
+
+      __pointer_type
+      operator++(int) volatile noexcept
+      { return fetch_add(1); }
+
+      __pointer_type
+      operator--(int) noexcept
+      { return fetch_sub(1); }
+
+      __pointer_type
+      operator--(int) volatile noexcept
+      { return fetch_sub(1); }
+
+      __pointer_type
+      operator++() noexcept
+      { return __atomic_add_fetch(&_M_p, 1, memory_order_seq_cst); }
+
+      __pointer_type
+      operator++() volatile noexcept
+      { return __atomic_add_fetch(&_M_p, 1, memory_order_seq_cst); }
+
+      __pointer_type
+      operator--() noexcept
+      { return __atomic_sub_fetch(&_M_p, 1, memory_order_seq_cst); }
+
+      __pointer_type
+      operator--() volatile noexcept
+      { return __atomic_sub_fetch(&_M_p, 1, memory_order_seq_cst); }
+
+      __pointer_type
+      operator+=(ptrdiff_t __d) noexcept
+      { return __atomic_add_fetch(&_M_p, __d, memory_order_seq_cst); }
+
+      __pointer_type
+      operator+=(ptrdiff_t __d) volatile noexcept
+      { return __atomic_add_fetch(&_M_p, __d, memory_order_seq_cst); }
+
+      __pointer_type
+      operator-=(ptrdiff_t __d) noexcept
+      { return __atomic_sub_fetch(&_M_p, __d, memory_order_seq_cst); }
+
+      __pointer_type
+      operator-=(ptrdiff_t __d) volatile noexcept
+      { return __atomic_sub_fetch(&_M_p, __d, memory_order_seq_cst); }
+
+      bool
+      is_lock_free() const noexcept
+      { return __atomic_is_lock_free (sizeof (_M_p), &_M_p); }
+
+      bool
+      is_lock_free() const volatile noexcept
+      { return __atomic_is_lock_free (sizeof (_M_p), &_M_p); }
+
+      void
+      store(__pointer_type __p,
+	    memory_order __m = memory_order_seq_cst) noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_acquire);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+	// __glibcxx_assert(__m != memory_order_consume);
+
+	__atomic_store_n(&_M_p, __p, __m);
+      }
+
+      void
+      store(__pointer_type __p,
+	    memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_acquire);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+	// __glibcxx_assert(__m != memory_order_consume);
+
+	__atomic_store_n(&_M_p, __p, __m);
+      }
+
+      __pointer_type
+      load(memory_order __m = memory_order_seq_cst) const noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_release);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+
+	return __atomic_load_n(&_M_p, __m);
+      }
+
+      __pointer_type
+      load(memory_order __m = memory_order_seq_cst) const volatile noexcept
+      {
+	// __glibcxx_assert(__m != memory_order_release);
+	// __glibcxx_assert(__m != memory_order_acq_rel);
+
+	return __atomic_load_n(&_M_p, __m);
+      }
+
+      __pointer_type
+      exchange(__pointer_type __p,
+	       memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return __atomic_exchange_n(&_M_p, __p, __m);
+      }
+
+
+      __pointer_type
+      exchange(__pointer_type __p,
+	       memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return __atomic_exchange_n(&_M_p, __p, __m);
+      }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+			      memory_order __m1,
+			      memory_order __m2) noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_p, &__p1, __p2, 0, __m1, __m2);
+      }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+			      memory_order __m1,
+			      memory_order __m2) volatile noexcept
+      {
+	// __glibcxx_assert(__m2 != memory_order_release);
+	// __glibcxx_assert(__m2 != memory_order_acq_rel);
+	// __glibcxx_assert(__m2 <= __m1);
+
+	return __atomic_compare_exchange_n(&_M_p, &__p1, __p2, 0, __m1, __m2);
+      }
+
+      __pointer_type
+      fetch_add(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_add(&_M_p, __d, __m); }
+
+      __pointer_type
+      fetch_add(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_add(&_M_p, __d, __m); }
+
+      __pointer_type
+      fetch_sub(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return __atomic_fetch_sub(&_M_p, __d, __m); }
+
+      __pointer_type
+      fetch_sub(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return __atomic_fetch_sub(&_M_p, __d, __m); }
+    };
+
+
+  /**
+   * @addtogroup atomics
+   * @{
+   */
+
+  /// atomic_bool
+  // NB: No operators or fetch-operations for this type.
+  struct atomic_bool
+  {
+  private:
+    __atomic_base<bool>	_M_base;
+
+  public:
+    atomic_bool() noexcept = default;
+    ~atomic_bool() noexcept = default;
+    atomic_bool(const atomic_bool&) = delete;
+    atomic_bool& operator=(const atomic_bool&) = delete;
+    atomic_bool& operator=(const atomic_bool&) volatile = delete;
+
+    constexpr atomic_bool(bool __i) noexcept : _M_base(__i) { }
+
+    bool
+    operator=(bool __i) noexcept
+    { return _M_base.operator=(__i); }
+
+    operator bool() const noexcept
+    { return _M_base.load(); }
+
+    operator bool() const volatile noexcept
+    { return _M_base.load(); }
+
+    bool
+    is_lock_free() const noexcept { return _M_base.is_lock_free(); }
+
+    bool
+    is_lock_free() const volatile noexcept { return _M_base.is_lock_free(); }
+
+    void
+    store(bool __i, memory_order __m = memory_order_seq_cst) noexcept
+    { _M_base.store(__i, __m); }
+
+    void
+    store(bool __i, memory_order __m = memory_order_seq_cst) volatile noexcept
+    { _M_base.store(__i, __m); }
+
+    bool
+    load(memory_order __m = memory_order_seq_cst) const noexcept
+    { return _M_base.load(__m); }
+
+    bool
+    load(memory_order __m = memory_order_seq_cst) const volatile noexcept
+    { return _M_base.load(__m); }
+
+    bool
+    exchange(bool __i, memory_order __m = memory_order_seq_cst) noexcept
+    { return _M_base.exchange(__i, __m); }
+
+    bool
+    exchange(bool __i,
+	     memory_order __m = memory_order_seq_cst) volatile noexcept
+    { return _M_base.exchange(__i, __m); }
+
+    bool
+    compare_exchange_weak(bool& __i1, bool __i2, memory_order __m1,
+			  memory_order __m2) noexcept
+    { return _M_base.compare_exchange_weak(__i1, __i2, __m1, __m2); }
+
+    bool
+    compare_exchange_weak(bool& __i1, bool __i2, memory_order __m1,
+			  memory_order __m2) volatile noexcept
+    { return _M_base.compare_exchange_weak(__i1, __i2, __m1, __m2); }
+
+    bool
+    compare_exchange_weak(bool& __i1, bool __i2,
+			  memory_order __m = memory_order_seq_cst) noexcept
+    { return _M_base.compare_exchange_weak(__i1, __i2, __m); }
+
+    bool
+    compare_exchange_weak(bool& __i1, bool __i2,
+		     memory_order __m = memory_order_seq_cst) volatile noexcept
+    { return _M_base.compare_exchange_weak(__i1, __i2, __m); }
+
+    bool
+    compare_exchange_strong(bool& __i1, bool __i2, memory_order __m1,
+			    memory_order __m2) noexcept
+    { return _M_base.compare_exchange_strong(__i1, __i2, __m1, __m2); }
+
+    bool
+    compare_exchange_strong(bool& __i1, bool __i2, memory_order __m1,
+			    memory_order __m2) volatile noexcept
+    { return _M_base.compare_exchange_strong(__i1, __i2, __m1, __m2); }
+
+    bool
+    compare_exchange_strong(bool& __i1, bool __i2,
+			    memory_order __m = memory_order_seq_cst) noexcept
+    { return _M_base.compare_exchange_strong(__i1, __i2, __m); }
+
+    bool
+    compare_exchange_strong(bool& __i1, bool __i2,
+		    memory_order __m = memory_order_seq_cst) volatile noexcept
+    { return _M_base.compare_exchange_strong(__i1, __i2, __m); }
+  };
+
+
+  /// atomic
+  /// 29.4.3, Generic atomic type, primary class template.
+  template<typename _Tp>
+    struct atomic
+    {
+    private:
+      _Tp _M_i;
+
+    public:
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(_Tp __i) noexcept : _M_i(__i) { }
+
+      operator _Tp() const noexcept
+      { return load(); }
+
+      operator _Tp() const volatile noexcept
+      { return load(); }
+
+      _Tp
+      operator=(_Tp __i) noexcept 
+      { store(__i); return __i; }
+
+      _Tp
+      operator=(_Tp __i) volatile noexcept 
+      { store(__i); return __i; }
+
+      bool
+      is_lock_free() const noexcept
+      { return __atomic_is_lock_free(sizeof(_M_i), &_M_i); }
+
+      bool
+      is_lock_free() const volatile noexcept
+      { return __atomic_is_lock_free(sizeof(_M_i), &_M_i); }
+
+      void
+      store(_Tp __i, memory_order _m = memory_order_seq_cst) noexcept
+      { __atomic_store(&_M_i, &__i, _m); }
+
+      void
+      store(_Tp __i, memory_order _m = memory_order_seq_cst) volatile noexcept
+      { __atomic_store(&_M_i, &__i, _m); }
+
+      _Tp
+      load(memory_order _m = memory_order_seq_cst) const noexcept
+      { 
+        _Tp tmp;
+	__atomic_load(&_M_i, &tmp, _m); 
+	return tmp;
+      }
+
+      _Tp
+      load(memory_order _m = memory_order_seq_cst) const volatile noexcept
+      { 
+        _Tp tmp;
+	__atomic_load(&_M_i, &tmp, _m); 
+	return tmp;
+      }
+
+      _Tp
+      exchange(_Tp __i, memory_order _m = memory_order_seq_cst) noexcept
+      { 
+        _Tp tmp;
+	__atomic_exchange(&_M_i, &__i, &tmp, _m); 
+	return tmp;
+      }
+
+      _Tp
+      exchange(_Tp __i, 
+	       memory_order _m = memory_order_seq_cst) volatile noexcept
+      { 
+        _Tp tmp;
+	__atomic_exchange(&_M_i, &__i, &tmp, _m); 
+	return tmp;
+      }
+
+      bool
+      compare_exchange_weak(_Tp& __e, _Tp __i, memory_order __s, 
+			    memory_order __f) noexcept
+      {
+	return __atomic_compare_exchange(&_M_i, &__e, &__i, true, __s, __f); 
+      }
+
+      bool
+      compare_exchange_weak(_Tp& __e, _Tp __i, memory_order __s, 
+			    memory_order __f) volatile noexcept
+      {
+	return __atomic_compare_exchange(&_M_i, &__e, &__i, true, __s, __f); 
+      }
+
+      bool
+      compare_exchange_weak(_Tp& __e, _Tp __i,
+			    memory_order __m = memory_order_seq_cst) noexcept
+      { return compare_exchange_weak(__e, __i, __m, __m); }
+
+      bool
+      compare_exchange_weak(_Tp& __e, _Tp __i,
+		     memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return compare_exchange_weak(__e, __i, __m, __m); }
+
+      bool
+      compare_exchange_strong(_Tp& __e, _Tp __i, memory_order __s, 
+			      memory_order __f) noexcept
+      {
+	return __atomic_compare_exchange(&_M_i, &__e, &__i, false, __s, __f); 
+      }
+
+      bool
+      compare_exchange_strong(_Tp& __e, _Tp __i, memory_order __s, 
+			      memory_order __f) volatile noexcept
+      {
+	return __atomic_compare_exchange(&_M_i, &__e, &__i, false, __s, __f); 
+      }
+
+      bool
+      compare_exchange_strong(_Tp& __e, _Tp __i,
+			       memory_order __m = memory_order_seq_cst) noexcept
+      { return compare_exchange_strong(__e, __i, __m, __m); }
+
+      bool
+      compare_exchange_strong(_Tp& __e, _Tp __i,
+		     memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return compare_exchange_strong(__e, __i, __m, __m); }
+    };
+
+
+  /// Partial specialization for pointer types.
+  template<typename _Tp>
+    struct atomic<_Tp*>
+    {
+      typedef _Tp* 			__pointer_type;
+      typedef __atomic_base<_Tp*>	__base_type;
+      __base_type			_M_b;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__pointer_type __p) noexcept : _M_b(__p) { }
+
+      operator __pointer_type() const noexcept
+      { return __pointer_type(_M_b); }
+
+      operator __pointer_type() const volatile noexcept
+      { return __pointer_type(_M_b); }
+
+      __pointer_type
+      operator=(__pointer_type __p) noexcept
+      { return _M_b.operator=(__p); }
+
+      __pointer_type
+      operator=(__pointer_type __p) volatile noexcept
+      { return _M_b.operator=(__p); }
+
+      __pointer_type
+      operator++(int) noexcept
+      { return _M_b++; }
+
+      __pointer_type
+      operator++(int) volatile noexcept
+      { return _M_b++; }
+
+      __pointer_type
+      operator--(int) noexcept
+      { return _M_b--; }
+
+      __pointer_type
+      operator--(int) volatile noexcept
+      { return _M_b--; }
+
+      __pointer_type
+      operator++() noexcept
+      { return ++_M_b; }
+
+      __pointer_type
+      operator++() volatile noexcept
+      { return ++_M_b; }
+
+      __pointer_type
+      operator--() noexcept
+      { return --_M_b; }
+
+      __pointer_type
+      operator--() volatile noexcept
+      { return --_M_b; }
+
+      __pointer_type
+      operator+=(ptrdiff_t __d) noexcept
+      { return _M_b.operator+=(__d); }
+
+      __pointer_type
+      operator+=(ptrdiff_t __d) volatile noexcept
+      { return _M_b.operator+=(__d); }
+
+      __pointer_type
+      operator-=(ptrdiff_t __d) noexcept
+      { return _M_b.operator-=(__d); }
+
+      __pointer_type
+      operator-=(ptrdiff_t __d) volatile noexcept
+      { return _M_b.operator-=(__d); }
+
+      bool
+      is_lock_free() const noexcept
+      { return _M_b.is_lock_free(); }
+
+      bool
+      is_lock_free() const volatile noexcept
+      { return _M_b.is_lock_free(); }
+
+      void
+      store(__pointer_type __p,
+	    memory_order __m = memory_order_seq_cst) noexcept
+      { return _M_b.store(__p, __m); }
+
+      void
+      store(__pointer_type __p,
+	    memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return _M_b.store(__p, __m); }
+
+      __pointer_type
+      load(memory_order __m = memory_order_seq_cst) const noexcept
+      { return _M_b.load(__m); }
+
+      __pointer_type
+      load(memory_order __m = memory_order_seq_cst) const volatile noexcept
+      { return _M_b.load(__m); }
+
+      __pointer_type
+      exchange(__pointer_type __p,
+	       memory_order __m = memory_order_seq_cst) noexcept
+      { return _M_b.exchange(__p, __m); }
+
+      __pointer_type
+      exchange(__pointer_type __p,
+	       memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return _M_b.exchange(__p, __m); }
+
+      bool
+      compare_exchange_weak(__pointer_type& __p1, __pointer_type __p2,
+			    memory_order __m1, memory_order __m2) noexcept
+      { return _M_b.compare_exchange_strong(__p1, __p2, __m1, __m2); }
+
+      bool
+      compare_exchange_weak(__pointer_type& __p1, __pointer_type __p2,
+			    memory_order __m1,
+			    memory_order __m2) volatile noexcept
+      { return _M_b.compare_exchange_strong(__p1, __p2, __m1, __m2); }
+
+      bool
+      compare_exchange_weak(__pointer_type& __p1, __pointer_type __p2,
+			    memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return compare_exchange_weak(__p1, __p2, __m,
+				     __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_weak(__pointer_type& __p1, __pointer_type __p2,
+		    memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return compare_exchange_weak(__p1, __p2, __m,
+				     __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+			      memory_order __m1, memory_order __m2) noexcept
+      { return _M_b.compare_exchange_strong(__p1, __p2, __m1, __m2); }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+			      memory_order __m1,
+			      memory_order __m2) volatile noexcept
+      { return _M_b.compare_exchange_strong(__p1, __p2, __m1, __m2); }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+			      memory_order __m = memory_order_seq_cst) noexcept
+      {
+	return _M_b.compare_exchange_strong(__p1, __p2, __m,
+					    __calculate_memory_order(__m));
+      }
+
+      bool
+      compare_exchange_strong(__pointer_type& __p1, __pointer_type __p2,
+		    memory_order __m = memory_order_seq_cst) volatile noexcept
+      {
+	return _M_b.compare_exchange_strong(__p1, __p2, __m,
+					    __calculate_memory_order(__m));
+      }
+
+      __pointer_type
+      fetch_add(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return _M_b.fetch_add(__d, __m); }
+
+      __pointer_type
+      fetch_add(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return _M_b.fetch_add(__d, __m); }
+
+      __pointer_type
+      fetch_sub(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) noexcept
+      { return _M_b.fetch_sub(__d, __m); }
+
+      __pointer_type
+      fetch_sub(ptrdiff_t __d,
+		memory_order __m = memory_order_seq_cst) volatile noexcept
+      { return _M_b.fetch_sub(__d, __m); }
+    };
+
+
+  /// Explicit specialization for bool.
+  template<>
+    struct atomic<bool> : public atomic_bool
+    {
+      typedef bool 			__integral_type;
+      typedef atomic_bool 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for char.
+  template<>
+    struct atomic<char> : public atomic_char
+    {
+      typedef char 			__integral_type;
+      typedef atomic_char 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for signed char.
+  template<>
+    struct atomic<signed char> : public atomic_schar
+    {
+      typedef signed char 		__integral_type;
+      typedef atomic_schar 		__base_type;
+
+      atomic() noexcept= default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for unsigned char.
+  template<>
+    struct atomic<unsigned char> : public atomic_uchar
+    {
+      typedef unsigned char 		__integral_type;
+      typedef atomic_uchar 		__base_type;
+
+      atomic() noexcept= default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for short.
+  template<>
+    struct atomic<short> : public atomic_short
+    {
+      typedef short 			__integral_type;
+      typedef atomic_short 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for unsigned short.
+  template<>
+    struct atomic<unsigned short> : public atomic_ushort
+    {
+      typedef unsigned short 	      	__integral_type;
+      typedef atomic_ushort 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for int.
+  template<>
+    struct atomic<int> : atomic_int
+    {
+      typedef int 			__integral_type;
+      typedef atomic_int 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for unsigned int.
+  template<>
+    struct atomic<unsigned int> : public atomic_uint
+    {
+      typedef unsigned int		__integral_type;
+      typedef atomic_uint 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for long.
+  template<>
+    struct atomic<long> : public atomic_long
+    {
+      typedef long 			__integral_type;
+      typedef atomic_long 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for unsigned long.
+  template<>
+    struct atomic<unsigned long> : public atomic_ulong
+    {
+      typedef unsigned long 		__integral_type;
+      typedef atomic_ulong 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for long long.
+  template<>
+    struct atomic<long long> : public atomic_llong
+    {
+      typedef long long 		__integral_type;
+      typedef atomic_llong 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for unsigned long long.
+  template<>
+    struct atomic<unsigned long long> : public atomic_ullong
+    {
+      typedef unsigned long long       	__integral_type;
+      typedef atomic_ullong 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for wchar_t.
+  template<>
+    struct atomic<wchar_t> : public atomic_wchar_t
+    {
+      typedef wchar_t 			__integral_type;
+      typedef atomic_wchar_t 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for char16_t.
+  template<>
+    struct atomic<char16_t> : public atomic_char16_t
+    {
+      typedef char16_t 			__integral_type;
+      typedef atomic_char16_t 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+  /// Explicit specialization for char32_t.
+  template<>
+    struct atomic<char32_t> : public atomic_char32_t
+    {
+      typedef char32_t 			__integral_type;
+      typedef atomic_char32_t 		__base_type;
+
+      atomic() noexcept = default;
+      ~atomic() noexcept = default;
+      atomic(const atomic&) = delete;
+      atomic& operator=(const atomic&) = delete;
+      atomic& operator=(const atomic&) volatile = delete;
+
+      constexpr atomic(__integral_type __i) noexcept : __base_type(__i) { }
+
+      using __base_type::operator __integral_type;
+      using __base_type::operator=;
+    };
+
+
+  // Function definitions, atomic_flag operations.
+  inline bool
+  atomic_flag_test_and_set_explicit(atomic_flag* __a,
+				    memory_order __m) noexcept
+  { return __a->test_and_set(__m); }
+
+  inline bool
+  atomic_flag_test_and_set_explicit(volatile atomic_flag* __a,
+				    memory_order __m) noexcept
+  { return __a->test_and_set(__m); }
+
+  inline void
+  atomic_flag_clear_explicit(atomic_flag* __a, memory_order __m) noexcept
+  { __a->clear(__m); }
+
+  inline void
+  atomic_flag_clear_explicit(volatile atomic_flag* __a,
+			     memory_order __m) noexcept
+  { __a->clear(__m); }
+
+  inline bool
+  atomic_flag_test_and_set(atomic_flag* __a) noexcept
+  { return atomic_flag_test_and_set_explicit(__a, memory_order_seq_cst); }
+
+  inline bool
+  atomic_flag_test_and_set(volatile atomic_flag* __a) noexcept
+  { return atomic_flag_test_and_set_explicit(__a, memory_order_seq_cst); }
+
+  inline void
+  atomic_flag_clear(atomic_flag* __a) noexcept
+  { atomic_flag_clear_explicit(__a, memory_order_seq_cst); }
+
+  inline void
+  atomic_flag_clear(volatile atomic_flag* __a) noexcept
+  { atomic_flag_clear_explicit(__a, memory_order_seq_cst); }
+
+
+  // Function templates generally applicable to atomic types.
+  template<typename _ITp>
+    inline bool
+    atomic_is_lock_free(const atomic<_ITp>* __a) noexcept
+    { return __a->is_lock_free(); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_is_lock_free(const volatile atomic<_ITp>* __a) noexcept
+    { return __a->is_lock_free(); }
+
+  template<typename _ITp>
+    inline void
+    atomic_init(atomic<_ITp>* __a, _ITp __i) noexcept;
+
+  template<typename _ITp>
+    inline void
+    atomic_init(volatile atomic<_ITp>* __a, _ITp __i) noexcept;
+
+  template<typename _ITp>
+    inline void
+    atomic_store_explicit(atomic<_ITp>* __a, _ITp __i,
+			  memory_order __m) noexcept
+    { __a->store(__i, __m); }
+
+  template<typename _ITp>
+    inline void
+    atomic_store_explicit(volatile atomic<_ITp>* __a, _ITp __i,
+			  memory_order __m) noexcept
+    { __a->store(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_load_explicit(const atomic<_ITp>* __a, memory_order __m) noexcept
+    { return __a->load(__m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_load_explicit(const volatile atomic<_ITp>* __a,
+			 memory_order __m) noexcept
+    { return __a->load(__m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_exchange_explicit(atomic<_ITp>* __a, _ITp __i,
+			     memory_order __m) noexcept
+    { return __a->exchange(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_exchange_explicit(volatile atomic<_ITp>* __a, _ITp __i,
+			     memory_order __m) noexcept
+    { return __a->exchange(__i, __m); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_weak_explicit(atomic<_ITp>* __a,
+					  _ITp* __i1, _ITp __i2,
+					  memory_order __m1,
+					  memory_order __m2) noexcept
+    { return __a->compare_exchange_weak(*__i1, __i2, __m1, __m2); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_weak_explicit(volatile atomic<_ITp>* __a,
+					  _ITp* __i1, _ITp __i2,
+					  memory_order __m1,
+					  memory_order __m2) noexcept
+    { return __a->compare_exchange_weak(*__i1, __i2, __m1, __m2); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_strong_explicit(atomic<_ITp>* __a,
+					    _ITp* __i1, _ITp __i2,
+					    memory_order __m1,
+					    memory_order __m2) noexcept
+    { return __a->compare_exchange_strong(*__i1, __i2, __m1, __m2); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_strong_explicit(volatile atomic<_ITp>* __a,
+					    _ITp* __i1, _ITp __i2,
+					    memory_order __m1,
+					    memory_order __m2) noexcept
+    { return __a->compare_exchange_strong(*__i1, __i2, __m1, __m2); }
+
+
+  template<typename _ITp>
+    inline void
+    atomic_store(atomic<_ITp>* __a, _ITp __i) noexcept
+    { atomic_store_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline void
+    atomic_store(volatile atomic<_ITp>* __a, _ITp __i) noexcept
+    { atomic_store_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_load(const atomic<_ITp>* __a) noexcept
+    { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_load(const volatile atomic<_ITp>* __a) noexcept
+    { return atomic_load_explicit(__a, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_exchange(atomic<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_exchange_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_exchange(volatile atomic<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_exchange_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_weak(atomic<_ITp>* __a,
+				 _ITp* __i1, _ITp __i2) noexcept
+    {
+      return atomic_compare_exchange_weak_explicit(__a, __i1, __i2,
+						   memory_order_seq_cst,
+						   memory_order_seq_cst);
+    }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_weak(volatile atomic<_ITp>* __a,
+				 _ITp* __i1, _ITp __i2) noexcept
+    {
+      return atomic_compare_exchange_weak_explicit(__a, __i1, __i2,
+						   memory_order_seq_cst,
+						   memory_order_seq_cst);
+    }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_strong(atomic<_ITp>* __a,
+				   _ITp* __i1, _ITp __i2) noexcept
+    {
+      return atomic_compare_exchange_strong_explicit(__a, __i1, __i2,
+						     memory_order_seq_cst,
+						     memory_order_seq_cst);
+    }
+
+  template<typename _ITp>
+    inline bool
+    atomic_compare_exchange_strong(volatile atomic<_ITp>* __a,
+				   _ITp* __i1, _ITp __i2) noexcept
+    {
+      return atomic_compare_exchange_strong_explicit(__a, __i1, __i2,
+						     memory_order_seq_cst,
+						     memory_order_seq_cst);
+    }
+
+  // Function templates for atomic_integral operations only, using
+  // __atomic_base. Template argument should be constricted to
+  // intergral types as specified in the standard, excluding address
+  // types.
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_add_explicit(__atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_add(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_add_explicit(volatile __atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_add(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_sub_explicit(__atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_sub(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_sub_explicit(volatile __atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_sub(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_and_explicit(__atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_and(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_and_explicit(volatile __atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_and(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_or_explicit(__atomic_base<_ITp>* __a, _ITp __i,
+			     memory_order __m) noexcept
+    { return __a->fetch_or(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_or_explicit(volatile __atomic_base<_ITp>* __a, _ITp __i,
+			     memory_order __m) noexcept
+    { return __a->fetch_or(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_xor_explicit(__atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_xor(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_xor_explicit(volatile __atomic_base<_ITp>* __a, _ITp __i,
+			      memory_order __m) noexcept
+    { return __a->fetch_xor(__i, __m); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_add(__atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_add_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_add(volatile __atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_add_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_sub(__atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_sub_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_sub(volatile __atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_sub_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_and(__atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_and_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_and(volatile __atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_and_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_or(__atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_or_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_or(volatile __atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_or_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_xor(__atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_xor_explicit(__a, __i, memory_order_seq_cst); }
+
+  template<typename _ITp>
+    inline _ITp
+    atomic_fetch_xor(volatile __atomic_base<_ITp>* __a, _ITp __i) noexcept
+    { return atomic_fetch_xor_explicit(__a, __i, memory_order_seq_cst); }
+
+
+  // Partial specializations for pointers.
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_add_explicit(atomic<_ITp*>* __a, ptrdiff_t __d,
+			      memory_order __m) noexcept
+    { return __a->fetch_add(__d, __m); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_add_explicit(volatile atomic<_ITp*>* __a, ptrdiff_t __d,
+			      memory_order __m) noexcept
+    { return __a->fetch_add(__d, __m); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_add(volatile atomic<_ITp*>* __a, ptrdiff_t __d) noexcept
+    { return __a->fetch_add(__d); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_add(atomic<_ITp*>* __a, ptrdiff_t __d) noexcept
+    { return __a->fetch_add(__d); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_sub_explicit(volatile atomic<_ITp*>* __a,
+			      ptrdiff_t __d, memory_order __m) noexcept
+    { return __a->fetch_sub(__d, __m); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_sub_explicit(atomic<_ITp*>* __a, ptrdiff_t __d,
+			      memory_order __m) noexcept
+    { return __a->fetch_sub(__d, __m); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_sub(volatile atomic<_ITp*>* __a, ptrdiff_t __d) noexcept
+    { return __a->fetch_sub(__d); }
+
+  template<typename _ITp>
+    inline _ITp*
+    atomic_fetch_sub(atomic<_ITp*>* __a, ptrdiff_t __d) noexcept
+    { return __a->fetch_sub(__d); }
+  // @} group atomics
+
+// _GLIBCXX_END_NAMESPACE_VERSION
+} // namespace
+
+#endif
diff --git a/libitm/method-gl.cc b/libitm/method-gl.cc
index 60c2662..81045d3 100644
--- a/libitm/method-gl.cc
+++ b/libitm/method-gl.cc
@@ -41,10 +41,11 @@ struct gl_mg : public method_group
   static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
 
   // The global ownership record.
-  gtm_word orec;
+  atomic<gtm_word> orec;
+
   virtual void init()
   {
-    orec = 0;
+    orec.store(0, memory_order_relaxed);
   }
   virtual void fini() { }
 };
@@ -84,28 +85,25 @@ protected:
   static void pre_write(const void *addr, size_t len)
   {
     gtm_thread *tx = gtm_thr();
-    if (unlikely(!gl_mg::is_locked(tx->shared_state)))
+    gtm_word v = tx->shared_state.load(memory_order_acquire);
+    if (unlikely(!gl_mg::is_locked(v)))
       {
 	// Check for and handle version number overflow.
-	if (unlikely(tx->shared_state >= gl_mg::VERSION_MAX))
+	if (unlikely(v >= gl_mg::VERSION_MAX))
 	  tx->restart(RESTART_INIT_METHOD_GROUP);
 
 	// CAS global orec from our snapshot time to the locked state.
 	// This validates that we have a consistent snapshot, which is also
 	// for making privatization safety work (see the class' comments).
-	gtm_word now = o_gl_mg.orec;
-	if (now != tx->shared_state)
+	gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
+	if (now != v)
 	  tx->restart(RESTART_VALIDATE_WRITE);
-	if (__sync_val_compare_and_swap(&o_gl_mg.orec, now,
-	    gl_mg::set_locked(now)) != now)
+	if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
+						   memory_order_acquire))
 	  tx->restart(RESTART_LOCKED_WRITE);
 
-	// Set shared_state to new value. The CAS is a full barrier, so the
-	// acquisition of the global orec is visible before this store here,
-	// and the store will not be visible before earlier data loads, which
-	// is required to correctly ensure privatization safety (see
-	// begin_and_restart() and release_orec() for further comments).
-	tx->shared_state = gl_mg::set_locked(now);
+	// Set shared_state to new value.
+	tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
       }
 
     // TODO Ensure that this gets inlined: Use internal log interface and LTO.
@@ -115,11 +113,12 @@ protected:
   static void validate()
   {
     // Check that snapshot is consistent. The barrier ensures that this
-    // happens after previous data loads.
-    atomic_read_barrier();
+    // happens after previous data loads.  Recall that load cannot itself
+    // have memory_order_release.
     gtm_thread *tx = gtm_thr();
-    gtm_word l = o_gl_mg.orec;
-    if (l != tx->shared_state)
+    atomic_thread_fence(memory_order_release);
+    gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
+    if (l != tx->shared_state.load(memory_order_relaxed))
       tx->restart(RESTART_VALIDATE_READ);
   }
 
@@ -180,17 +179,18 @@ public:
 
     // Spin until global orec is not locked.
     // TODO This is not necessary if there are no pure loads (check txn props).
-    gtm_word v;
     unsigned i = 0;
-    while (gl_mg::is_locked(v = o_gl_mg.orec))
+    gtm_word v;
+    while (1)
       {
+        v = o_gl_mg.orec.load(memory_order_acquire);
+        if (!gl_mg::is_locked(v))
+	  break;
 	// TODO need method-specific max spin count
-	if (++i > gtm_spin_count_var) return RESTART_VALIDATE_READ;
+	if (++i > gtm_spin_count_var)
+	  return RESTART_VALIDATE_READ;
 	cpu_relax();
       }
-    // This barrier ensures that we have read the global orec before later
-    // data loads.
-    atomic_read_barrier();
 
     // Everything is okay, we have a snapshot time.
     // We don't need to enforce any ordering for the following store. There
@@ -202,14 +202,14 @@ public:
     // marking the transaction as active, and restarts enforce immediate
     // visibility of a smaller or equal value with a barrier (see
     // release_orec()).
-    tx->shared_state = v;
+    tx->shared_state.store(v, memory_order_relaxed);
     return NO_RESTART;
   }
 
   virtual bool trycommit(gtm_word& priv_time)
   {
     gtm_thread* tx = gtm_thr();
-    gtm_word v = tx->shared_state;
+    gtm_word v = tx->shared_state.load(memory_order_acquire);
 
     // Special case: If shared_state is ~0, then we have acquired the
     // serial lock (tx->state is not updated yet). In this case, the previous
@@ -218,7 +218,7 @@ public:
     // anymore. In particular, if it is locked, then we are an update
     // transaction, which is all we care about for commit.
     if (v == ~(typeof v)0)
-      v = o_gl_mg.orec;
+      v = o_gl_mg.orec.load(memory_order_relaxed);
 
     // Release the orec but do not reset shared_state, which will be modified
     // by the serial lock right after our commit anyway. Also, resetting
@@ -227,10 +227,8 @@ public:
     if (gl_mg::is_locked(v))
       {
 	// Release the global orec, increasing its version number / timestamp.
-	// TODO replace with C++0x-style atomics (a release in this case)
-	atomic_write_barrier();
 	v = gl_mg::clear_locked(v) + 1;
-	o_gl_mg.orec = v;
+	o_gl_mg.orec.store(v, memory_order_release);
 
 	// Need to ensure privatization safety. Every other transaction must
 	// have a snapshot time that is at least as high as our commit time
@@ -247,15 +245,16 @@ public:
       return;
 
     gtm_thread *tx = gtm_thr();
-    gtm_word v = tx->shared_state;
+    gtm_word v = tx->shared_state.load(memory_order_acquire);
     // Special case: If shared_state is ~0, then we have acquired the
     // serial lock (tx->state is not updated yet). In this case, the previous
     // value isn't available anymore, so grab it from the global lock, which
     // must have a meaningful value because no other transactions are active
     // anymore. In particular, if it is locked, then we are an update
     // transaction, which is all we care about for rollback.
-    if (v == ~(typeof v)0)
-      v = o_gl_mg.orec;
+    bool is_serial = v == ~(typeof v)0;
+    if (is_serial)
+      v = o_gl_mg.orec.load(memory_order_relaxed);
 
     // Release lock and increment version number to prevent dirty reads.
     // Also reset shared state here, so that begin_or_restart() can expect a
@@ -263,16 +262,14 @@ public:
     if (gl_mg::is_locked(v))
       {
 	// Release the global orec, increasing its version number / timestamp.
-	// TODO replace with C++0x-style atomics (a release in this case)
-	atomic_write_barrier();
 	v = gl_mg::clear_locked(v) + 1;
-	o_gl_mg.orec = v;
+	o_gl_mg.orec.store(v, memory_order_release);
 
 	// Also reset the timestamp published via shared_state.
 	// Special case: Only do this if we are not a serial transaction
 	// because otherwise, we would interfere with the serial lock.
-	if (tx->shared_state != ~(typeof tx->shared_state)0)
-	  tx->shared_state = v;
+	if (!is_serial)
+	  tx->shared_state.store(v, memory_order_relaxed);
 
 	// We need a store-load barrier after this store to prevent it
 	// from becoming visible after later data loads because the
@@ -280,7 +277,7 @@ public:
 	// snapshot time (the lock bit had been set), which could break
 	// privatization safety. We do not need a barrier before this
 	// store (see pre_write() for an explanation).
-	__sync_synchronize();
+	atomic_thread_fence(memory_order_acq_rel);
       }
 
   }
diff --git a/libitm/stmlock.h b/libitm/stmlock.h
index f115a41..a338258 100644
--- a/libitm/stmlock.h
+++ b/libitm/stmlock.h
@@ -92,24 +92,23 @@ gtm_get_stmlock (const gtm_cacheline *addr)
 }
 
 /* The current global version number.  */
-extern gtm_version gtm_clock;
+extern atomic<gtm_version> gtm_clock;
 
 static inline gtm_version
 gtm_get_clock (void)
 {
-  gtm_version r;
-
-  __sync_synchronize ();
-  r = gtm_clock;
-  atomic_read_barrier ();
-
-  return r;
+  atomic_thread_fence(memory_order_release);
+  return gtm_clock.load(memory_order_acquire);
 }
 
 static inline gtm_version
 gtm_inc_clock (void)
 {
-  gtm_version r = __sync_add_and_fetch (&gtm_clock, 1);
+  /* ??? Here we have a choice, the pre-inc operator mapping to
+     __atomic_add_fetch with memory_order_seq_cst, or fetch_add
+     with memory_order_acq_rel plus another separate increment.
+     We really ought to recognize and optimize fetch_op(x) op x... */
+  gtm_version r = ++gtm_clock;
 
   /* ??? Ought to handle wraparound for 32-bit.  */
   if (sizeof(r) < 8 && r > GTM_VERSION_MAX)