@@ -379,10 +379,17 @@ struct compat_robust_list_head {
struct compat_robust_list list;
compat_long_t futex_offset;
compat_uptr_t list_op_pending;
};
+struct compat_extended_robust_list_head {
+ struct compat_robust_list_head list_head;
+ compat_long_t futex_offset2;
+ compat_long_t futex_offset3;
+ compat_long_t futex_offset4;
+};
+
#ifdef CONFIG_COMPAT_OLD_SIGACTION
struct compat_old_sigaction {
compat_uptr_t sa_handler;
compat_old_sigset_t sa_mask;
compat_ulong_t sa_flags;
@@ -1057,10 +1057,16 @@ struct task_struct {
#ifdef CONFIG_X86_CPU_RESCTRL
u32 closid;
u32 rmid;
#endif
#ifdef CONFIG_FUTEX
+ /*
+ * bottom two bits are masked
+ * 0: struct extended_robust_list_head
+ * 1: struct robust_list_head
+ * same for compat_robust_list
+ */
struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
struct compat_robust_list_head __user *compat_robust_list;
#endif
struct list_head pi_state_list;
@@ -92,10 +92,41 @@ struct robust_list_head {
* so only truly owned locks will be handled.
*/
struct robust_list __user *list_op_pending;
};
+/*
+ * Extensible per-thread list head:
+ *
+ * As locks are shared between processes, the futex_offset field
+ * has ABI lock-stepping issues, which the original robust_list_head
+ * structure did not anticipate. (And which prevents 32-bit/64-bit
+ * interoperability, as well as shrinking of mutex structures).
+ * This new extensible_robust_list_head allows multiple
+ * concurrent futex_offset values, chosen using the bottom 2 bits of the
+ * robust_list *next pointer, which are now masked in BOTH the old and
+ * new ABI.
+ *
+ * Note: this structure is part of the syscall ABI like
+ * robust_list_head above, and must have a different size than
+ * robust_list_head.
+ *
+ */
+struct extended_robust_list_head {
+ struct robust_list_head list_head;
+
+ /*
+ * These relative offsets are set by user-space. They give the kernel
+ * the relative position of the futex field to examine, based on the
+ * bit 1 *next pointer.
+ * The original version was insufficiently flexible. Locks are held
+ * in shared memory between processes, and a process might want to hold
+ * locks of two ABIs at the same time.
+ */
+ long futex_offset2;
+};
+
/*
* Are there any waiters for this robust futex:
*/
#define FUTEX_WAITERS 0x80000000
@@ -3396,17 +3396,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
size_t, len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
- /*
- * The kernel knows only one size for now:
- */
- if (unlikely(len != sizeof(*head)))
+
+ if (unlikely(len != sizeof(struct robust_list_head) &&
+ len != sizeof(struct extensible_robust_list_head)))
return -EINVAL;
- current->robust_list = head;
+ current->robust_list = head & 0b11;
+ BUILD_BUG_ON(sizeof(struct robust_list_head) ==
+ sizeof(struct extended_robust_list_head));
+ if (len == sizeof(struct robust_list_head))
+ current->robust_list |= 1;
return 0;
}
/**
@@ -3419,10 +3422,11 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
struct robust_list_head __user * __user *, head_ptr,
size_t __user *, len_ptr)
{
struct robust_list_head __user *head;
unsigned long ret;
+ size_t len;
struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -3439,14 +3443,18 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
ret = -EPERM;
if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;
- head = p->robust_list;
+ head = p->robust_list & ~0b11;
+ if (p->robust_list & 0b11 == 0b1)
+ len = sizeof(struct robust_list_head);
+ else
+ len = sizeof(struct extended_robust_list_head);
rcu_read_unlock();
- if (put_user(sizeof(*head), len_ptr))
+ if (put_user(len, len_ptr))
return -EFAULT;
return put_user(head, head_ptr);
err_unlock:
rcu_read_unlock();
@@ -3524,23 +3532,26 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p
return 0;
}
/*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes. Bit 1 choses which
+ * futex_offset to use:
*/
static inline int fetch_robust_entry(struct robust_list __user **entry,
struct robust_list __user * __user *head,
- unsigned int *pi)
+ unsigned int *pi,
+ *unsigned int *second_abi)
{
unsigned long uentry;
if (get_user(uentry, (unsigned long __user *)head))
return -EFAULT;
- *entry = (void __user *)(uentry & ~1UL);
+ *entry = (void __user *)(uentry & ~0b11UL);
*pi = uentry & 1;
+ *second_abi = uentry & 0b10;
return 0;
}
/*
@@ -3549,69 +3560,84 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
*
* We silently return on any sign of list-walking problem.
*/
void exit_robust_list(struct task_struct *curr)
{
- struct robust_list_head __user *head = curr->robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
- unsigned long futex_offset;
+ struct robust_list_head __user *head_ptr = curr->robust_list & ~1UL;
+ unsigned int is_extended_list = curr->robust_list & 1 == 0;
+ struct extended_robust_list_head head;
+ struct robust_list __user *entry = &head->list_head.list.next,
+ *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip, second_abi,
+ second_abip;
+ unsigned int uninitialized_var(next_pi),
+ uninitialized_var(next_second_abi);
int rc;
if (!futex_cmpxchg_enabled)
return;
/*
- * Fetch the list head (which was registered earlier, via
- * sys_set_robust_list()):
+ * fetch_robust_entry code is duplicated here to avoid excessive calls
+ * to get_user()
*/
- if (fetch_robust_entry(&entry, &head->list.next, &pi))
- return;
- /*
- * Fetch the relative futex offset:
- */
- if (get_user(futex_offset, &head->futex_offset))
- return;
- /*
- * Fetch any possibly pending lock-add first, and handle it
- * if it exists:
- */
- if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
- return;
+ if (is_extended_list) {
+ if (get_user(head, (struct extended_robust_list_head *)
+ head_ptr))
+ return;
+ } else {
+ if (get_user(head.list_head, head_ptr))
+ return;
+ }
+
+ pi = head.list_head.list.next & 1;
+ second_abi = head.list_head.list.next & 0b10;
+ head.list_head.list.next &= ~0b11UL;
+ pip = head.list_head.list_op_pending & 1;
+ second_abip = head.list_head.list_op_pending & 0b10;
+ head.list_head.list_op_pending &= ~0b11UL;
next_entry = NULL; /* avoid warning with gcc */
- while (entry != &head->list) {
+ while (entry != &head->list_head.list) {
/*
* Fetch the next entry in the list before calling
* handle_futex_death:
*/
- rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
+ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi,
+ &next_second_abi);
/*
* A pending lock might already be on the list, so
* don't process it twice:
*/
- if (entry != pending)
+ if (entry != pending) {
+ long futex_offset = second_abi ?
+ head.futex_offset2 :
+ head.list_head.futex_offset;
if (handle_futex_death((void __user *)entry + futex_offset,
curr, pi))
return;
+ }
if (rc)
return;
entry = next_entry;
pi = next_pi;
+ second_abi = next_second_abi;
/*
* Avoid excessively long or circular lists:
*/
if (!--limit)
break;
cond_resched();
}
- if (pending)
+ if (pending) {
+ long futex_offset = second_abip ? head.futex_offset2 :
+ head.list_head.futex_offset;
handle_futex_death((void __user *)pending + futex_offset,
curr, pip);
+ }
}
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
@@ -3707,21 +3733,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
#ifdef CONFIG_COMPAT
/*
- * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes.
+ * Bit 1 choses which futex_offset to use:
*/
static inline int
-compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
- compat_uptr_t __user *head, unsigned int *pi)
+compat_fetch_robust_entry(compat_uptr_t *uentry,
+ struct robust_list __user **entry,
+ compat_uptr_t __user *head, unsigned int *pi,
+ unsigned int *second_abi)
{
if (get_user(*uentry, head))
return -EFAULT;
- *entry = compat_ptr((*uentry) & ~1);
+ *entry = compat_ptr((*uentry) & ~0b11);
*pi = (unsigned int)(*uentry) & 1;
+ *second_abi = (unsigned int)(*uentry) & 0b10;
return 0;
}
static void __user *futex_uaddr(struct robust_list __user *entry,
@@ -3739,72 +3769,86 @@ static void __user *futex_uaddr(struct robust_list __user *entry,
*
* We silently return on any sign of list-walking problem.
*/
void compat_exit_robust_list(struct task_struct *curr)
{
- struct compat_robust_list_head __user *head = curr->compat_robust_list;
- struct robust_list __user *entry, *next_entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
- unsigned int uninitialized_var(next_pi);
+ struct compat_robust_list_head __user *head = curr->compat_robust_list &
+ ~1UL;
+ unsigned int is_extended_list = curr->compat_robust_list & 1 == 0;
+ struct compat_extended_robust_list_head head;
+ struct robust_list __user *entry = &head->list_head.list.next,
+ *next_entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip, second_abi,
+ second_abip;
+ unsigned int uninitialized_var(next_pi),
+ uninitialized_var(next_second_abi);
compat_uptr_t uentry, next_uentry, upending;
- compat_long_t futex_offset;
int rc;
if (!futex_cmpxchg_enabled)
return;
/*
- * Fetch the list head (which was registered earlier, via
- * sys_set_robust_list()):
- */
- if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
- return;
- /*
- * Fetch the relative futex offset:
- */
- if (get_user(futex_offset, &head->futex_offset))
- return;
- /*
- * Fetch any possibly pending lock-add first, and handle it
- * if it exists:
+ * compat_fetch_robust_entry code is duplicated here to avoid excessive
+ * calls to get_user()
*/
- if (compat_fetch_robust_entry(&upending, &pending,
- &head->list_op_pending, &pip))
- return;
+ if (is_extended_list) {
+ if (get_user(head, (struct compat_extended_robust_list_head *)
+ head_ptr))
+ return;
+ } else {
+ if (get_user(head.list_head, head_ptr))
+ return;
+ }
+
+ pi = head.list_head.list.next & 1;
+ second_abi = head.list_head.list.next & 0b10;
+ head.list_head.list.next &= ~0b11UL;
+ pip = head.list_head.list_op_pending & 1;
+ second_abip = head.list_head.list_op_pending & 0b10;
+ head.list_head.list_op_pending &= ~0b11UL;
next_entry = NULL; /* avoid warning with gcc */
while (entry != (struct robust_list __user *) &head->list) {
/*
* Fetch the next entry in the list before calling
* handle_futex_death:
*/
rc = compat_fetch_robust_entry(&next_uentry, &next_entry,
- (compat_uptr_t __user *)&entry->next, &next_pi);
+ (compat_uptr_t __user *)&entry->next, &next_pi,
+ &next_second_abi);
/*
* A pending lock might already be on the list, so
* dont process it twice:
*/
if (entry != pending) {
+ compat_long_t futex_offset = second_abi ?
+ head.futex_offset2 :
+ head.list_head.futex_offset;
void __user *uaddr = futex_uaddr(entry, futex_offset);
if (handle_futex_death(uaddr, curr, pi))
return;
}
if (rc)
return;
uentry = next_uentry;
entry = next_entry;
pi = next_pi;
+ second_abi = next_second_abi;
/*
* Avoid excessively long or circular lists:
*/
if (!--limit)
break;
cond_resched();
}
if (pending) {
+ compat_long_t futex_offset = second_abip ?
+ head.futex_offset2 :
+ head.list_head.futex_offset;
void __user *uaddr = futex_uaddr(pending, futex_offset);
handle_futex_death(uaddr, curr, pip);
}
}
@@ -3814,23 +3858,29 @@ COMPAT_SYSCALL_DEFINE2(set_robust_list,
compat_size_t, len)
{
if (!futex_cmpxchg_enabled)
return -ENOSYS;
- if (unlikely(len != sizeof(*head)))
+ if (unlikely(len != sizeof(struct compat_robust_list_head) &&
+ len != sizeof(struct compat_extended_robust_list_head)))
return -EINVAL;
- current->compat_robust_list = head;
+ current->compat_robust_list = head & ~0b11;
+ BUILD_BUG_ON(sizeof(compat_robust_list_head) ==
+ sizeof(compat_extended_robust_list_head));
+ if (len == sizeof(compat_robust_list_head))
+ current->compat_robust_list |= 0b1;
return 0;
}
COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
compat_uptr_t __user *, head_ptr,
compat_size_t __user *, len_ptr)
{
struct compat_robust_list_head __user *head;
+ size_t len;
unsigned long ret;
struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
@@ -3848,14 +3898,18 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
ret = -EPERM;
if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
goto err_unlock;
- head = p->compat_robust_list;
+ head = p->compat_robust_list & ~0b11;
+ if (p->compat_robust_list & 0b11 == 0b1)
+ len = sizeof(struct compat_robust_list_head);
+ else
+ len = sizeof(struct compat_extended_robust_list_head);
rcu_read_unlock();
- if (put_user(sizeof(*head), len_ptr))
+ if (put_user(len, len_ptr))
return -EFAULT;
return put_user(ptr_to_compat(head), head_ptr);
err_unlock:
rcu_read_unlock();