aboutsummaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c78
1 files changed, 29 insertions, 49 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 50e9cde0288a..98f43d0427bc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -408,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
}
ctx->flags = p->flags;
+ init_waitqueue_head(&ctx->sqo_wait);
init_waitqueue_head(&ctx->cq_wait);
init_completion(&ctx->ctx_done);
init_completion(&ctx->sqo_thread_started);
@@ -756,11 +757,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
int iters = 0, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
do {
int tmin = 0;
@@ -796,21 +803,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
- return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
-{
- int ret;
-
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -1620,7 +1612,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+ struct sqe_submit *s)
{
struct io_uring_sqe *sqe_copy;
@@ -1638,7 +1630,8 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
return 0;
}
- memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+ memcpy(&req->submit, s, sizeof(*s));
+ memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
req->submit.sqe = sqe_copy;
INIT_WORK(&req->work, io_sq_wq_submit_work);
@@ -1953,7 +1946,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret))
goto out;
- ret = io_req_defer(ctx, req, s->sqe);
+ ret = io_req_defer(ctx, req, s);
if (ret) {
if (ret == -EIOCBQUEUED)
ret = 0;
@@ -2149,7 +2142,7 @@ static int io_sq_thread(void *data)
*/
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->poll_list))
- __io_iopoll_check(ctx, &nr_events, 0);
+ io_iopoll_getevents(ctx, &nr_events, 0);
else
inflight = 0;
mutex_unlock(&ctx->uring_lock);
@@ -2168,16 +2161,6 @@ static int io_sq_thread(void *data)
if (!io_get_sqring(ctx, &sqes[0])) {
/*
- * We're polling. If we're within the defined idle
- * period, then let us spin without work before going
- * to sleep.
- */
- if (inflight || !time_after(jiffies, timeout)) {
- cpu_relax();
- continue;
- }
-
- /*
* Drop cur_mm before scheduling, we can't hold it for
* long periods (or over schedule()). Do this before
* adding ourselves to the waitqueue, as the unuse/drop
@@ -2189,6 +2172,16 @@ static int io_sq_thread(void *data)
cur_mm = NULL;
}
+ /*
+ * We're polling. If we're within the defined idle
+ * period, then let us spin without work before going
+ * to sleep.
+ */
+ if (inflight || !time_after(jiffies, timeout)) {
+ cpu_relax();
+ continue;
+ }
+
prepare_to_wait(&ctx->sqo_wait, &wait,
TASK_INTERRUPTIBLE);
@@ -2296,7 +2289,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz)
{
struct io_cq_ring *ring = ctx->cq_ring;
- sigset_t ksigmask, sigsaved;
int ret;
if (io_cqring_events(ring) >= min_events)
@@ -2306,21 +2298,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- &ksigmask, &sigsaved, sigsz);
+ sigsz);
else
#endif
- ret = set_user_sigmask(sig, &ksigmask,
- &sigsaved, sigsz);
+ ret = set_user_sigmask(sig, sigsz);
if (ret)
return ret;
}
ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
-
- if (sig)
- restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
-
+ restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -2404,13 +2392,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
struct sk_buff *skb;
int i;
- if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
- unsigned long inflight = ctx->user->unix_inflight + nr;
-
- if (inflight > task_rlimit(current, RLIMIT_NOFILE))
- return -EMFILE;
- }
-
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
@@ -2545,7 +2526,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
{
int ret;
- init_waitqueue_head(&ctx->sqo_wait);
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
@@ -2763,8 +2743,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0;
if (!pages || nr_pages > got_pages) {
- kfree(vmas);
- kfree(pages);
+ kvfree(vmas);
+ kvfree(pages);
pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL);
vmas = kvmalloc_array(nr_pages,