diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 101 |
1 files changed, 51 insertions, 50 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f5d809d529a..98f43d0427bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -258,6 +258,8 @@ struct io_ring_ctx { struct user_struct *user; + const struct cred *creds; + struct completion ctx_done; struct { @@ -406,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) } ctx->flags = p->flags; + init_waitqueue_head(&ctx->sqo_wait); init_waitqueue_head(&ctx->cq_wait); init_completion(&ctx->ctx_done); init_completion(&ctx->sqo_thread_started); @@ -754,11 +757,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { int iters = 0, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); do { int tmin = 0; @@ -794,21 +803,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); - return ret; -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) -{ - int ret; - - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -1468,8 +1462,11 @@ static void io_poll_complete_work(struct work_struct *work) struct io_poll_iocb *poll = &req->poll; struct poll_table_struct pt = { ._key = poll->events }; struct io_ring_ctx *ctx = req->ctx; + const struct cred *old_cred; __poll_t mask = 0; + old_cred = override_creds(ctx->creds); + if (!READ_ONCE(poll->canceled)) mask = vfs_poll(poll->file, &pt) & poll->events; @@ -1484,7 +1481,7 @@ static void io_poll_complete_work(struct work_struct *work) if (!mask && !READ_ONCE(poll->canceled)) { add_wait_queue(poll->head, &poll->wait); spin_unlock_irq(&ctx->completion_lock); - return; + goto out; } list_del_init(&req->list); io_poll_complete(ctx, req, mask); @@ -1492,6 +1489,8 @@ static void io_poll_complete_work(struct work_struct *work) io_cqring_ev_posted(ctx); io_put_req(req); +out: + revert_creds(old_cred); } static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -1613,7 +1612,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) } static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) + struct sqe_submit *s) { struct io_uring_sqe *sqe_copy; @@ -1631,7 +1630,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, return 0; } - memcpy(sqe_copy, sqe, sizeof(*sqe_copy)); + memcpy(&req->submit, s, sizeof(*s)); + memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy)); req->submit.sqe = sqe_copy; INIT_WORK(&req->work, io_sq_wq_submit_work); @@ -1734,10 +1734,12 @@ static void io_sq_wq_submit_work(struct work_struct *work) struct io_ring_ctx *ctx = req->ctx; struct mm_struct *cur_mm = NULL; struct async_list *async_list; + const struct cred *old_cred; LIST_HEAD(req_list); mm_segment_t old_fs; int ret; + old_cred = override_creds(ctx->creds); async_list = io_async_list_from_sqe(ctx, req->submit.sqe); restart: do { @@ -1845,6 +1847,7 @@ out: unuse_mm(cur_mm); mmput(cur_mm); } + revert_creds(old_cred); } /* @@ -1943,7 +1946,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(ret)) goto out; - ret = io_req_defer(ctx, req, s->sqe); + ret = io_req_defer(ctx, req, s); if (ret) { if (ret == -EIOCBQUEUED) ret = 0; @@ -2108,6 +2111,7 @@ static int io_sq_thread(void *data) struct sqe_submit sqes[IO_IOPOLL_BATCH]; struct io_ring_ctx *ctx = data; struct mm_struct *cur_mm = NULL; + const struct cred *old_cred; mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned inflight; @@ -2117,6 +2121,7 @@ static int io_sq_thread(void *data) old_fs = get_fs(); set_fs(USER_DS); + old_cred = override_creds(ctx->creds); timeout = inflight = 0; while (!kthread_should_park()) { @@ -2137,7 +2142,7 @@ static int io_sq_thread(void *data) */ mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->poll_list)) - __io_iopoll_check(ctx, &nr_events, 0); + io_iopoll_getevents(ctx, &nr_events, 0); else inflight = 0; mutex_unlock(&ctx->uring_lock); @@ -2156,16 +2161,6 @@ static int io_sq_thread(void *data) if (!io_get_sqring(ctx, &sqes[0])) { /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. - */ - if (inflight || !time_after(jiffies, timeout)) { - cpu_relax(); - continue; - } - - /* * Drop cur_mm before scheduling, we can't hold it for * long periods (or over schedule()). Do this before * adding ourselves to the waitqueue, as the unuse/drop @@ -2177,6 +2172,16 @@ static int io_sq_thread(void *data) cur_mm = NULL; } + /* + * We're polling. If we're within the defined idle + * period, then let us spin without work before going + * to sleep. + */ + if (inflight || !time_after(jiffies, timeout)) { + cpu_relax(); + continue; + } + prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); @@ -2235,6 +2240,7 @@ static int io_sq_thread(void *data) unuse_mm(cur_mm); mmput(cur_mm); } + revert_creds(old_cred); kthread_parkme(); @@ -2283,7 +2289,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { struct io_cq_ring *ring = ctx->cq_ring; - sigset_t ksigmask, sigsaved; int ret; if (io_cqring_events(ring) >= min_events) @@ -2293,21 +2298,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, #ifdef CONFIG_COMPAT if (in_compat_syscall()) ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - &ksigmask, &sigsaved, sigsz); + sigsz); else #endif - ret = set_user_sigmask(sig, &ksigmask, - &sigsaved, sigsz); + ret = set_user_sigmask(sig, sigsz); if (ret) return ret; } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - - if (sig) - restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); - + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; @@ -2391,13 +2392,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) struct sk_buff *skb; int i; - if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { - unsigned long inflight = ctx->user->unix_inflight + nr; - - if (inflight > task_rlimit(current, RLIMIT_NOFILE)) - return -EMFILE; - } - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); if (!fpl) return -ENOMEM; @@ -2532,7 +2526,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, { int ret; - init_waitqueue_head(&ctx->sqo_wait); mmgrab(current->mm); ctx->sqo_mm = current->mm; @@ -2750,8 +2743,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = 0; if (!pages || nr_pages > got_pages) { - kfree(vmas); - kfree(pages); + kvfree(vmas); + kvfree(pages); pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); vmas = kvmalloc_array(nr_pages, @@ -2898,6 +2891,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_unaccount_mem(ctx->user, ring_pages(ctx->sq_entries, ctx->cq_entries)); free_uid(ctx->user); + if (ctx->creds) + put_cred(ctx->creds); kfree(ctx); } @@ -3175,6 +3170,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) ctx->account_mem = account_mem; ctx->user = user; + ctx->creds = get_current_cred(); + if (!ctx->creds) { + ret = -ENOMEM; + goto err; + } + ret = io_allocate_scq_urings(ctx, p); if (ret) goto err; |