@@ -37,6 +37,13 @@ enum rseq_abi_cs_flags {
(1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
};
+enum rseq_abi_sched_state_flags {
+ /*
+ * Task is currently running on a CPU if bit is set.
+ */
+ RSEQ_ABI_SCHED_STATE_FLAG_ON_CPU = (1U << 0),
+};
+
/*
* struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always
* contained within a single cache-line. It is usually declared as
@@ -53,6 +60,32 @@ struct rseq_abi_cs {
__u64 abort_ip;
} __attribute__((aligned(4 * sizeof(__u64))));
+/*
+ * rseq_abi_sched_state should be aligned on the cache line size.
+ */
+struct rseq_abi_sched_state {
+ /*
+ * Version of this structure. Populated by the kernel, read by
+ * user-space.
+ */
+ __u32 version;
+ /*
+ * The state is updated by the kernel. Read by user-space with
+ * single-copy atomicity semantics. This field can be read by any
+ * userspace thread. Aligned on 32-bit, and ideally on cache line size.
+ * Contains a bitmask of enum rseq_abi_sched_state_flags. This field is
+ * provided as a hint by the scheduler, and requires that the page
+ * holding this state is faulted-in for the state update to be
+ * performed by the scheduler.
+ */
+ __u32 state;
+ /*
+ * Thread ID associated with the thread registering this structure.
+ * Initialized by user-space before registration.
+ */
+ __u32 tid;
+};
+
/*
* struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
* contained within a single cache-line.
@@ -164,6 +197,15 @@ struct rseq_abi {
*/
__u32 mm_cid;
+ __u32 padding1;
+
+ /*
+ * Restartable sequences sched_state_ptr field. Initialized by
+ * userspace to the address at which the struct rseq_abi_sched_state is
+ * located. Read by the kernel on rseq registration.
+ */
+ __u64 sched_state_ptr;
+
/*
* Flexible array member at end of structure, after last feature field.
*/
@@ -62,17 +62,28 @@ static int rseq_reg_success; /* At least one rseq registration has succeded. */
/* Allocate a large area for the TLS. */
#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
+/* Approximation of cacheline size. */
+#define CACHELINE_SIZE 128
+
/* Original struct rseq feature size is 20 bytes. */
#define ORIG_RSEQ_FEATURE_SIZE 20
/* Original struct rseq allocation size is 32 bytes. */
#define ORIG_RSEQ_ALLOC_SIZE 32
+static
+__thread struct rseq_abi_sched_state __rseq_abi_sched_state __attribute__((tls_model("initial-exec"), aligned(CACHELINE_SIZE)));
+
static
__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
};
+static pid_t rseq_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
int flags, uint32_t sig)
{
@@ -109,6 +120,8 @@ int rseq_register_current_thread(void)
/* Treat libc's ownership as a successful registration. */
return 0;
}
+ __rseq_abi_sched_state.tid = rseq_gettid();
+ __rseq_abi.sched_state_ptr = (uint64_t)(unsigned long)&__rseq_abi_sched_state;
rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
if (rc) {
if (RSEQ_READ_ONCE(rseq_reg_success)) {
@@ -236,6 +236,11 @@ static inline void rseq_prepare_unload(void)
rseq_clear_rseq_cs();
}
+static inline struct rseq_abi_sched_state *rseq_get_sched_state(struct rseq_abi *rseq)
+{
+ return (struct rseq_abi_sched_state *)(unsigned long)rseq->sched_state_ptr;
+}
+
static inline __attribute__((always_inline))
int rseq_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
intptr_t *v, intptr_t expect,
Extend struct rseq in the rseq selftests to include the sched_state field. Implement a getter function for this field. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> --- tools/testing/selftests/rseq/rseq-abi.h | 42 +++++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.c | 13 ++++++++ tools/testing/selftests/rseq/rseq.h | 5 +++ 3 files changed, 60 insertions(+)