aboutsummaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c101
1 files changed, 51 insertions, 50 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f5d809d529a..98f43d0427bc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -258,6 +258,8 @@ struct io_ring_ctx {
struct user_struct *user;
+ const struct cred *creds;
+
struct completion ctx_done;
struct {
@@ -406,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
}
ctx->flags = p->flags;
+ init_waitqueue_head(&ctx->sqo_wait);
init_waitqueue_head(&ctx->cq_wait);
init_completion(&ctx->ctx_done);
init_completion(&ctx->sqo_thread_started);
@@ -754,11 +757,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
int iters = 0, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
do {
int tmin = 0;
@@ -794,21 +803,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
- return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
-{
- int ret;
-
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1468,8 +1462,11 @@ static void io_poll_complete_work(struct work_struct *work)
struct io_poll_iocb *poll = &req->poll;
struct poll_table_struct pt = { ._key = poll->events };
struct io_ring_ctx *ctx = req->ctx;
+ const struct cred *old_cred;
__poll_t mask = 0;
+ old_cred = override_creds(ctx->creds);
+
if (!READ_ONCE(poll->canceled))
mask = vfs_poll(poll->file, &pt) & poll->events;
@@ -1484,7 +1481,7 @@ static void io_poll_complete_work(struct work_struct *work)
if (!mask && !READ_ONCE(poll->canceled)) {
add_wait_queue(poll->head, &poll->wait);
spin_unlock_irq(&ctx->completion_lock);
- return;
+ goto out;
}
list_del_init(&req->list);
io_poll_complete(ctx, req, mask);
@@ -1492,6 +1489,8 @@ static void io_poll_complete_work(struct work_struct *work)
io_cqring_ev_posted(ctx);
io_put_req(req);
+out:
+ revert_creds(old_cred);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -1613,7 +1612,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ struct sqe_submit *s)
{
struct io_uring_sqe *sqe_copy;
@@ -1631,7 +1630,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
return 0;
}
- memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+ memcpy(&req->submit, s, sizeof(*s));
+ memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
req->submit.sqe = sqe_copy;
INIT_WORK(&req->work, io_sq_wq_submit_work);
@@ -1734,10 +1734,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
struct io_ring_ctx *ctx = req->ctx;
struct mm_struct *cur_mm = NULL;
struct async_list *async_list;
+ const struct cred *old_cred;
LIST_HEAD(req_list);
mm_segment_t old_fs;
int ret;
+ old_cred = override_creds(ctx->creds);
async_list = io_async_list_from_sqe(ctx, req->submit.sqe);
restart:
do {
@@ -1845,6 +1847,7 @@ out:
unuse_mm(cur_mm);
mmput(cur_mm);
}
+ revert_creds(old_cred);
}
/*
@@ -1943,7 +1946,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret))
goto out;
- ret = io_req_defer(ctx, req, s->sqe);
+ ret = io_req_defer(ctx, req, s);
if (ret) {
if (ret == -EIOCBQUEUED)
ret = 0;
@@ -2108,6 +2111,7 @@ static int io_sq_thread(void *data)
struct sqe_submit sqes[IO_IOPOLL_BATCH];
struct io_ring_ctx *ctx = data;
struct mm_struct *cur_mm = NULL;
+ const struct cred *old_cred;
mm_segment_t old_fs;
DEFINE_WAIT(wait);
unsigned inflight;
@@ -2117,6 +2121,7 @@ static int io_sq_thread(void *data)
old_fs = get_fs();
set_fs(USER_DS);
+ old_cred = override_creds(ctx->creds);
timeout = inflight = 0;
while (!kthread_should_park()) {
@@ -2137,7 +2142,7 @@ static int io_sq_thread(void *data)
*/
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->poll_list))
- __io_iopoll_check(ctx, &nr_events, 0);
+ io_iopoll_getevents(ctx, &nr_events, 0);
else
inflight = 0;
mutex_unlock(&ctx->uring_lock);
@@ -2156,16 +2161,6 @@ static int io_sq_thread(void *data)
if (!io_get_sqring(ctx, &sqes[0])) {
/*
- * We're polling. If we're within the defined idle
- * period, then let us spin without work before going
- * to sleep.
- */
- if (inflight || !time_after(jiffies, timeout)) {
- cpu_relax();
- continue;
- }
-
- /*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
@@ -2177,6 +2172,16 @@ static int io_sq_thread(void *data)
cur_mm = NULL;
}
+ /*
+ * We're polling. If we're within the defined idle
+ * period, then let us spin without work before going
+ * to sleep.
+ */
+ if (inflight || !time_after(jiffies, timeout)) {
+ cpu_relax();
+ continue;
+ }
+
prepare_to_wait(&ctx->sqo_wait, &wait,
TASK_INTERRUPTIBLE);
@@ -2235,6 +2240,7 @@ static int io_sq_thread(void *data)
unuse_mm(cur_mm);
mmput(cur_mm);
}
+ revert_creds(old_cred);
kthread_parkme();
@@ -2283,7 +2289,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz)
{
struct io_cq_ring *ring = ctx->cq_ring;
- sigset_t ksigmask, sigsaved;
int ret;
if (io_cqring_events(ring) >= min_events)
@@ -2293,21 +2298,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- &ksigmask, &sigsaved, sigsz);
+ sigsz);
else
#endif
- ret = set_user_sigmask(sig, &ksigmask,
- &sigsaved, sigsz);
+ ret = set_user_sigmask(sig, sigsz);
if (ret)
return ret;
}
ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
-
- if (sig)
- restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
-
+ restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -2391,13 +2392,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
struct sk_buff *skb;
int i;
- if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
- unsigned long inflight = ctx->user->unix_inflight + nr;
-
- if (inflight > task_rlimit(current, RLIMIT_NOFILE))
- return -EMFILE;
- }
-
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
@@ -2532,7 +2526,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
{
int ret;
- init_waitqueue_head(&ctx->sqo_wait);
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
@@ -2750,8 +2743,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0;
if (!pages || nr_pages > got_pages) {
- kfree(vmas);
- kfree(pages);
+ kvfree(vmas);
+ kvfree(pages);
pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL);
vmas = kvmalloc_array(nr_pages,
@@ -2898,6 +2891,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_unaccount_mem(ctx->user,
ring_pages(ctx->sq_entries, ctx->cq_entries));
free_uid(ctx->user);
+ if (ctx->creds)
+ put_cred(ctx->creds);
kfree(ctx);
}
@@ -3175,6 +3170,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
ctx->account_mem = account_mem;
ctx->user = user;
+ ctx->creds = get_current_cred();
+ if (!ctx->creds) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
ret = io_allocate_scq_urings(ctx, p);
if (ret)
goto err;