new file mode 100644
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+ {
+ MEMMODEL_RELAXED = 0,
+ MEMMODEL_ACQUIRE = 2,
+ MEMMODEL_RELEASE = 3,
+ MEMMODEL_SEQ_CST = 5,
+ };
+
+#define TYPE unsigned int
+#define LOCKVAR1 lock_32_1
+#define LOCKVAR2 lock_32_2
+#define TESTS tests_32
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define LOCKVAR1 lock_64_1
+#define LOCKVAR2 lock_64_2
+#define TESTS tests_64
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define N (7 * 1000)
+
+int
+main (void)
+{
+ tests_32 (N);
+ tests_64 (N);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,169 @@
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...) \
+ { \
+ printf (__VA_ARGS__); \
+ fflush (NULL); \
+ }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+ Ensures that the program will terminate. */
+#define SPIN_CNT_MAX 0x8000U
+#endif
+
+#define TEST_1(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS) \
+ assert (N % N_GANGS == 0); \
+ \
+ DO_PRAGMA (acc parallel \
+ num_gangs(N_GANGS) \
+ num_workers(N_WORKERS) \
+ copy (lock_cnt) \
+ copy (spin_cnt_max_hit) \
+ present (LOCKVAR)) \
+ { \
+ TYPE unlocked = (TYPE)0; \
+ TYPE locked = ~unlocked; \
+ \
+ LOCKVAR = unlocked; \
+ \
+ unsigned int n_gangs \
+ = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); \
+ \
+ DO_PRAGMA (acc loop worker) \
+ for (unsigned int i = 0; i < N / n_gangs; i++) \
+ { \
+ TYPE res; \
+ \
+ unsigned int spin_cnt = 0; \
+ while (1) \
+ { \
+ res = __atomic_exchange_n (&LOCKVAR, locked, \
+ MEMMODEL_ACQUIRE); \
+ if (res == locked) \
+ { \
+ if (SPIN_CNT_MAX > 0) \
+ { \
+ spin_cnt++; \
+ if (spin_cnt == SPIN_CNT_MAX) \
+ { \
+ if (VERIFY) \
+ __atomic_fetch_add (&spin_cnt_max_hit, 1, \
+ MEMMODEL_RELAXED); \
+ break; \
+ } \
+ } \
+ continue; \
+ \
+ } \
+ else \
+ { \
+ if (res != unlocked) \
+ __builtin_abort (); \
+ \
+ if (VERIFY) \
+ __atomic_fetch_add (&lock_cnt, 1, \
+ MEMMODEL_RELAXED); \
+ \
+ __atomic_store_n (&LOCKVAR, unlocked, \
+ MEMMODEL_RELEASE); \
+ break; \
+ } \
+ } \
+ } \
+ }
+
+#define TEST(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS) \
+ { \
+ spin_cnt_max_hit = 0; \
+ \
+ if (VERIFY) \
+ lock_cnt = 0; \
+ \
+ PRINTF ("%s - verify=%u - lock=%s - gangs=%u - workers=%u ... ", \
+ XSTR (TYPE), VERIFY, STR(LOCKVAR), N_GANGS, N_WORKERS); \
+ TEST_1 (N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS); \
+ PRINTF ("done\n"); \
+ \
+ if (VERIFY && SPIN_CNT_MAX) \
+ PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit); \
+ \
+ if (VERIFY && (lock_cnt + spin_cnt_max_hit != N)) \
+ { \
+ PRINTF ("lock_cnt: %llu\n", lock_cnt); \
+ PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n", \
+ lock_cnt + spin_cnt_max_hit); \
+ PRINTF ("N: %u\n", N); \
+ __builtin_abort (); \
+ } \
+ }
+
+/* Uses .global addressing on nvptx. */
+TYPE LOCKVAR1;
+#pragma acc declare create (LOCKVAR1)
+
+void
+TESTS (unsigned int n)
+{
+ unsigned long long int lock_cnt;
+ unsigned long long int spin_cnt_max_hit;
+
+ /* Uses generic addressing on nvptx. */
+ TYPE LOCKVAR2;
+#pragma acc declare create (LOCKVAR2)
+
+#define N_GANGS 1
+#define N_WORKERS 8
+#define VERIFY 0
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 2
+#define N_WORKERS 4
+#define VERIFY 0
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 4
+#define N_WORKERS 2
+#define VERIFY 0
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 8
+#define N_WORKERS 1
+#define VERIFY 0
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+ TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+ TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+}
new file mode 100644
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+ {
+ MEMMODEL_RELAXED = 0,
+ MEMMODEL_ACQUIRE = 2,
+ MEMMODEL_RELEASE = 3,
+ MEMMODEL_SEQ_CST = 5,
+ };
+
+#define TYPE unsigned int
+#define TESTS tests_32
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define TESTS tests_64
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define N (50 * 1000)
+
+int
+main (void)
+{
+ tests_32 (N);
+ tests_64 (N);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,135 @@
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...) \
+ { \
+ printf (__VA_ARGS__); \
+ fflush (NULL); \
+ }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+ Ensures that the program will terminate. */
+#define SPIN_CNT_MAX 0x20000U
+#endif
+
+#define TEST_1(N, LOCKREF) \
+ DO_PRAGMA (acc parallel \
+ num_gangs(1) \
+ num_workers(N_WORKERS) \
+ copy (lock_cnt) \
+ copy (spin_cnt_max_hit)) \
+ { \
+ TYPE unlocked = (TYPE)0; \
+ TYPE locked = ~unlocked; \
+ TYPE lock; \
+ TYPE *volatile lock_ptr = &lock; \
+ unsigned long long int lock_cnt_1; \
+ unsigned long long int spin_cnt_max_hit_1; \
+ \
+ if (VERIFY) \
+ { \
+ lock_cnt_1 = 0; \
+ \
+ if (SPIN_CNT_MAX) \
+ spin_cnt_max_hit_1 = 0; \
+ } \
+ \
+ *(LOCKREF) = unlocked; \
+ \
+ DO_PRAGMA (acc loop worker) \
+ for (unsigned int i = 0; i < N; i++) \
+ { \
+ TYPE res; \
+ \
+ unsigned int spin_cnt = 0; \
+ while (1) \
+ { \
+ res = __atomic_exchange_n (LOCKREF, locked, \
+ MEMMODEL_ACQUIRE); \
+ if (res == locked) \
+ { \
+ if (SPIN_CNT_MAX > 0) \
+ { \
+ spin_cnt++; \
+ if (spin_cnt == SPIN_CNT_MAX) \
+ { \
+ if (VERIFY) \
+ __atomic_fetch_add (&spin_cnt_max_hit_1, 1, \
+ MEMMODEL_RELAXED); \
+ break; \
+ } \
+ } \
+ continue; \
+ } \
+ else \
+ { \
+ if (res != unlocked) \
+ __builtin_abort (); \
+ \
+ if (VERIFY) \
+ __atomic_fetch_add (&lock_cnt_1, 1, \
+ MEMMODEL_RELAXED); \
+ \
+ __atomic_store_n (LOCKREF, unlocked, \
+ MEMMODEL_RELEASE); \
+ \
+ break; \
+ } \
+ } \
+ } \
+ \
+ if (VERIFY) \
+ { \
+ lock_cnt += lock_cnt_1; \
+ \
+ if (SPIN_CNT_MAX) \
+ spin_cnt_max_hit += spin_cnt_max_hit_1; \
+ } \
+ }
+
+#define TEST(N, LOCKREF) \
+ { \
+ spin_cnt_max_hit = 0; \
+ \
+ if (VERIFY) \
+ lock_cnt = 0; \
+ \
+ PRINTF ("%s - verify=%u - LOCKREF=%s ... ", \
+ XSTR (TYPE), VERIFY, #LOCKREF); \
+ TEST_1 (N, LOCKREF); \
+ PRINTF ("done\n"); \
+ \
+ if (VERIFY && SPIN_CNT_MAX) \
+ PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit); \
+ \
+ if (VERIFY && (lock_cnt + spin_cnt_max_hit != N)) \
+ { \
+ PRINTF ("lock_cnt: %llu\n", lock_cnt); \
+ PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n", \
+ lock_cnt + spin_cnt_max_hit); \
+ PRINTF ("N: %u\n", N); \
+ __builtin_abort (); \
+ } \
+ }
+
+void
+TESTS (unsigned int n)
+{
+ unsigned long long int lock_cnt;
+ unsigned long long int spin_cnt_max_hit;
+
+#define N_WORKERS 8
+
+#define VERIFY 0
+ TEST (n, &lock);
+ TEST (n, lock_ptr);
+#undef VERIFY
+
+#define VERIFY 1
+ TEST (n, &lock);
+ TEST (n, lock_ptr);
+#undef VERIFY
+}