diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 78 |
1 files changed, 29 insertions, 49 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 50e9cde0288a..98f43d0427bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -408,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) } ctx->flags = p->flags; + init_waitqueue_head(&ctx->sqo_wait); init_waitqueue_head(&ctx->cq_wait); init_completion(&ctx->ctx_done); init_completion(&ctx->sqo_thread_started); @@ -756,11 +757,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { int iters = 0, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); do { int tmin = 0; @@ -796,21 +803,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); - return ret; -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) -{ - int ret; - - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -1620,7 +1612,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) } static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) + struct sqe_submit *s) { struct io_uring_sqe *sqe_copy; @@ -1638,7 +1630,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, return 0; } - memcpy(sqe_copy, sqe, sizeof(*sqe_copy)); + memcpy(&req->submit, s, sizeof(*s)); + memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy)); req->submit.sqe = sqe_copy; INIT_WORK(&req->work, io_sq_wq_submit_work); @@ -1953,7 +1946,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(ret)) goto out; - ret = io_req_defer(ctx, req, s->sqe); + ret = io_req_defer(ctx, req, s); if (ret) { if (ret == -EIOCBQUEUED) ret = 0; @@ -2149,7 +2142,7 @@ static int io_sq_thread(void *data) */ mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->poll_list)) - __io_iopoll_check(ctx, &nr_events, 0); + io_iopoll_getevents(ctx, &nr_events, 0); else inflight = 0; mutex_unlock(&ctx->uring_lock); @@ -2168,16 +2161,6 @@ static int io_sq_thread(void *data) if (!io_get_sqring(ctx, &sqes[0])) { /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. - */ - if (inflight || !time_after(jiffies, timeout)) { - cpu_relax(); - continue; - } - - /* * Drop cur_mm before scheduling, we can't hold it for * long periods (or over schedule()). Do this before * adding ourselves to the waitqueue, as the unuse/drop @@ -2189,6 +2172,16 @@ static int io_sq_thread(void *data) cur_mm = NULL; } + /* + * We're polling. If we're within the defined idle + * period, then let us spin without work before going + * to sleep. + */ + if (inflight || !time_after(jiffies, timeout)) { + cpu_relax(); + continue; + } + prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); @@ -2296,7 +2289,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { struct io_cq_ring *ring = ctx->cq_ring; - sigset_t ksigmask, sigsaved; int ret; if (io_cqring_events(ring) >= min_events) @@ -2306,21 +2298,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, #ifdef CONFIG_COMPAT if (in_compat_syscall()) ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - &ksigmask, &sigsaved, sigsz); + sigsz); else #endif - ret = set_user_sigmask(sig, &ksigmask, - &sigsaved, sigsz); + ret = set_user_sigmask(sig, sigsz); if (ret) return ret; } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); - - if (sig) - restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS); - + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; @@ -2404,13 +2392,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) struct sk_buff *skb; int i; - if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { - unsigned long inflight = ctx->user->unix_inflight + nr; - - if (inflight > task_rlimit(current, RLIMIT_NOFILE)) - return -EMFILE; - } - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); if (!fpl) return -ENOMEM; @@ -2545,7 +2526,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, { int ret; - init_waitqueue_head(&ctx->sqo_wait); mmgrab(current->mm); ctx->sqo_mm = current->mm; @@ -2763,8 +2743,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = 0; if (!pages || nr_pages > got_pages) { - kfree(vmas); - kfree(pages); + kvfree(vmas); + kvfree(pages); pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); vmas = kvmalloc_array(nr_pages, |