aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c256
1 files changed, 121 insertions, 135 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 2921ebaa1467..e282c083df59 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -719,7 +719,7 @@ again:
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.i_seq = get_inode_sequence_number(inode);
- key->shared.pgoff = basepage_index(tail);
+ key->shared.pgoff = page_to_pgoff(tail);
rcu_read_unlock();
}
@@ -839,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void)
return pi_state;
}
+static void pi_state_update_owner(struct futex_pi_state *pi_state,
+ struct task_struct *new_owner)
+{
+ struct task_struct *old_owner = pi_state->owner;
+
+ lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
+
+ if (old_owner) {
+ raw_spin_lock(&old_owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ raw_spin_unlock(&old_owner->pi_lock);
+ }
+
+ if (new_owner) {
+ raw_spin_lock(&new_owner->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
+ list_add(&pi_state->list, &new_owner->pi_state_list);
+ pi_state->owner = new_owner;
+ raw_spin_unlock(&new_owner->pi_lock);
+ }
+}
+
static void get_pi_state(struct futex_pi_state *pi_state)
{
WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
@@ -861,17 +884,12 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already
*/
if (pi_state->owner) {
- struct task_struct *owner;
+ unsigned long flags;
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
- owner = pi_state->owner;
- if (owner) {
- raw_spin_lock(&owner->pi_lock);
- list_del_init(&pi_state->list);
- raw_spin_unlock(&owner->pi_lock);
- }
- rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
+ pi_state_update_owner(pi_state, NULL);
+ rt_mutex_proxy_unlock(&pi_state->pi_mutex);
+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache) {
@@ -1034,7 +1052,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
- * TID out of sync.
+ * TID out of sync. Except one error case where the kernel is denied
+ * write access to the user address, see fixup_pi_state_owner().
*
*
* Serialization and lifetime rules:
@@ -1595,8 +1614,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
*/
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- if (unlikely(should_fail_futex(true)))
+ if (unlikely(should_fail_futex(true))) {
ret = -EFAULT;
+ goto out_unlock;
+ }
ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
if (!ret && (curval != uval)) {
@@ -1612,26 +1633,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
ret = -EINVAL;
}
- if (ret)
- goto out_unlock;
-
- /*
- * This is a point of no return; once we modify the uval there is no
- * going back and subsequent operations must not fail.
- */
-
- raw_spin_lock(&pi_state->owner->pi_lock);
- WARN_ON(list_empty(&pi_state->list));
- list_del_init(&pi_state->list);
- raw_spin_unlock(&pi_state->owner->pi_lock);
-
- raw_spin_lock(&new_owner->pi_lock);
- WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &new_owner->pi_state_list);
- pi_state->owner = new_owner;
- raw_spin_unlock(&new_owner->pi_lock);
-
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ if (!ret) {
+ /*
+ * This is a point of no return; once we modified the uval
+ * there is no going back and subsequent operations must
+ * not fail.
+ */
+ pi_state_update_owner(pi_state, new_owner);
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+ }
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@@ -1722,8 +1732,8 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
{
unsigned int op = (encoded_op & 0x70000000) >> 28;
unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
- int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12);
- int cmparg = sign_extend32(encoded_op & 0x00000fff, 12);
+ int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
+ int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
int oldval, ret;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
@@ -2454,18 +2464,13 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr);
}
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *argowner)
+static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+ struct task_struct *argowner)
{
+ u32 uval, uninitialized_var(curval), newval, newtid;
struct futex_pi_state *pi_state = q->pi_state;
- u32 uval, uninitialized_var(curval), newval;
struct task_struct *oldowner, *newowner;
- u32 newtid;
- int ret, err = 0;
-
- lockdep_assert_held(q->lock_ptr);
-
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ int err = 0;
oldowner = pi_state->owner;
@@ -2499,21 +2504,31 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
- ret = 0;
- goto out_unlock;
+ return 0;
}
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
- /* We got the lock after all, nothing to fix. */
- ret = 0;
- goto out_unlock;
+ /* We got the lock. pi_state is correct. Tell caller. */
+ return 1;
}
/*
- * Since we just failed the trylock; there must be an owner.
+ * The trylock just failed, so either there is an owner or
+ * there is a higher priority waiter than this one.
*/
newowner = rt_mutex_owner(&pi_state->pi_mutex);
- BUG_ON(!newowner);
+ /*
+ * If the higher priority waiter has not yet taken over the
+ * rtmutex then newowner is NULL. We can't return here with
+ * that state because it's inconsistent vs. the user space
+ * state. So drop the locks and try again. It's a valid
+ * situation and not any different from the other retry
+ * conditions.
+ */
+ if (unlikely(!newowner)) {
+ err = -EAGAIN;
+ goto handle_err;
+ }
} else {
WARN_ON_ONCE(argowner != current);
if (oldowner == current) {
@@ -2521,8 +2536,7 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
- ret = 0;
- goto out_unlock;
+ return 1;
}
newowner = argowner;
}
@@ -2552,22 +2566,9 @@ retry:
* We fixed up user space. Now we need to fix the pi_state
* itself.
*/
- if (pi_state->owner != NULL) {
- raw_spin_lock(&pi_state->owner->pi_lock);
- WARN_ON(list_empty(&pi_state->list));
- list_del_init(&pi_state->list);
- raw_spin_unlock(&pi_state->owner->pi_lock);
- }
+ pi_state_update_owner(pi_state, newowner);
- pi_state->owner = newowner;
-
- raw_spin_lock(&newowner->pi_lock);
- WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &newowner->pi_state_list);
- raw_spin_unlock(&newowner->pi_lock);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-
- return 0;
+ return argowner == current;
/*
* In order to reschedule or handle a page fault, we need to drop the
@@ -2588,17 +2589,16 @@ handle_err:
switch (err) {
case -EFAULT:
- ret = fault_in_user_writeable(uaddr);
+ err = fault_in_user_writeable(uaddr);
break;
case -EAGAIN:
cond_resched();
- ret = 0;
+ err = 0;
break;
default:
WARN_ON_ONCE(1);
- ret = err;
break;
}
@@ -2608,17 +2608,44 @@ handle_err:
/*
* Check if someone else fixed it for us:
*/
- if (pi_state->owner != oldowner) {
- ret = 0;
- goto out_unlock;
- }
+ if (pi_state->owner != oldowner)
+ return argowner == current;
- if (ret)
- goto out_unlock;
+ /* Retry if err was -EAGAIN or the fault in succeeded */
+ if (!err)
+ goto retry;
- goto retry;
+ /*
+ * fault_in_user_writeable() failed so user state is immutable. At
+ * best we can make the kernel state consistent but user state will
+ * be most likely hosed and any subsequent unlock operation will be
+ * rejected due to PI futex rule [10].
+ *
+ * Ensure that the rtmutex owner is also the pi_state owner despite
+ * the user space value claiming something different. There is no
+ * point in unlocking the rtmutex if current is the owner as it
+ * would need to wait until the next waiter has taken the rtmutex
+ * to guarantee consistent state. Keep it simple. Userspace asked
+ * for this wreckaged state.
+ *
+ * The rtmutex has an owner - either current or some other
+ * task. See the EAGAIN loop above.
+ */
+ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
-out_unlock:
+ return err;
+}
+
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+ struct task_struct *argowner)
+{
+ struct futex_pi_state *pi_state = q->pi_state;
+ int ret;
+
+ lockdep_assert_held(q->lock_ptr);
+
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ ret = __fixup_pi_state_owner(uaddr, q, argowner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret;
}
@@ -2642,8 +2669,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
- int ret = 0;
-
if (locked) {
/*
* Got the lock. We might not be the anticipated owner if we
@@ -2654,8 +2679,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
- ret = fixup_pi_state_owner(uaddr, q, current);
- goto out;
+ return fixup_pi_state_owner(uaddr, q, current);
+ return 1;
}
/*
@@ -2666,24 +2691,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable
* but needs our attention.
*/
- if (q->pi_state->owner == current) {
- ret = fixup_pi_state_owner(uaddr, q, NULL);
- goto out;
- }
+ if (q->pi_state->owner == current)
+ return fixup_pi_state_owner(uaddr, q, NULL);
/*
* Paranoia check. If we did not take the lock, then we should not be
- * the owner of the rt_mutex.
+ * the owner of the rt_mutex. Warn and establish consistent state.
*/
- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
- printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
- "pi-state %p\n", ret,
- q->pi_state->pi_mutex.owner,
- q->pi_state->owner);
- }
+ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
+ return fixup_pi_state_owner(uaddr, q, current);
-out:
- return ret ? ret : locked;
+ return 0;
}
/**
@@ -2857,14 +2875,13 @@ retry:
goto out;
restart = &current->restart_block;
- restart->fn = futex_wait_restart;
restart->futex.uaddr = uaddr;
restart->futex.val = val;
restart->futex.time = *abs_time;
restart->futex.bitset = bitset;
restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
- ret = -ERESTART_RESTARTBLOCK;
+ ret = set_restart_fn(restart, futex_wait_restart);
out:
if (to) {
@@ -2904,7 +2921,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
- struct futex_pi_state *pi_state = NULL;
struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@@ -3047,23 +3063,9 @@ no_block:
if (res)
ret = (res < 0) ? res : 0;
- /*
- * If fixup_owner() faulted and was unable to handle the fault, unlock
- * it and return the fault to userspace.
- */
- if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
-
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- if (pi_state) {
- rt_mutex_futex_unlock(&pi_state->pi_mutex);
- put_pi_state(pi_state);
- }
-
goto out_put_key;
out_unlock_put_key:
@@ -3329,7 +3331,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2)
{
struct hrtimer_sleeper timeout, *to = NULL;
- struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
@@ -3414,16 +3415,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
/*
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
+ /*
+ * Adjust the return value. It's either -EFAULT or
+ * success (1) but the caller expects 0 for success.
+ */
+ ret = ret < 0 ? ret : 0;
}
} else {
struct rt_mutex *pi_mutex;
@@ -3454,25 +3456,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res)
ret = (res < 0) ? res : 0;
- /*
- * If fixup_pi_state_owner() faulted and was unable to handle
- * the fault, unlock the rt_mutex and return the fault to
- * userspace.
- */
- if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
- pi_state = q.pi_state;
- get_pi_state(pi_state);
- }
-
/* Unqueue and drop the lock. */
unqueue_me_pi(&q);
}
- if (pi_state) {
- rt_mutex_futex_unlock(&pi_state->pi_mutex);
- put_pi_state(pi_state);
- }
-
if (ret == -EINTR) {
/*
* We've already been requeued, but cannot restart by calling
@@ -3898,8 +3885,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
if (op & FUTEX_CLOCK_REALTIME) {
flags |= FLAGS_CLOCKRT;
- if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
- cmd != FUTEX_WAIT_REQUEUE_PI)
+ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
}