aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c59
1 files changed, 30 insertions, 29 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4c74c768ae43..70ecf02cdca2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1827,7 +1827,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
- bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1872,21 +1871,23 @@ fetch_events:
*/
ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller. We need
- * to sleep here, and we will be woken by ep_poll_callback() when events
- * become available.
- */
- if (!waiter) {
- waiter = true;
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irq(&ep->wq.lock);
+ do {
+ /*
+ * Internally init_wait() uses autoremove_wake_function(),
+ * thus wait entry is removed from the wait queue on each
+ * wakeup. Why it is important? In case of several waiters
+ * each new wakeup will hit the next waiter, giving it the
+ * chance to harvest new event. Otherwise wakeup can be
+ * lost. This is also good performance-wise, because on
+ * normal wakeup path no need to call __remove_wait_queue()
+ * explicitly, thus ep->lock is not taken, which halts the
+ * event delivery.
+ */
+ init_wait(&wait);
+ write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
- }
+ write_unlock_irq(&ep->lock);
- for (;;) {
/*
* We don't want to sleep if the ep_poll_callback() sends us
* a wakeup in between. That's why we set the task state
@@ -1916,10 +1917,20 @@ fetch_events:
timed_out = 1;
break;
}
- }
+
+ /* We were woken up, thus go and try to harvest some events */
+ eavail = 1;
+
+ } while (0);
__set_current_state(TASK_RUNNING);
+ if (!list_empty_careful(&wait.entry)) {
+ write_lock_irq(&ep->lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ write_unlock_irq(&ep->lock);
+ }
+
send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
@@ -1930,12 +1941,6 @@ send_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
- if (waiter) {
- spin_lock_irq(&ep->wq.lock);
- __remove_wait_queue(&ep->wq, &wait);
- spin_unlock_irq(&ep->wq.lock);
- }
-
return res;
}
@@ -2313,19 +2318,17 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
size_t, sigsetsize)
{
int error;
- sigset_t ksigmask, sigsaved;
/*
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ error = set_user_sigmask(sigmask, sigsetsize);
if (error)
return error;
error = do_epoll_wait(epfd, events, maxevents, timeout);
-
- restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
+ restore_saved_sigmask_unless(error == -EINTR);
return error;
}
@@ -2338,19 +2341,17 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
compat_size_t, sigsetsize)
{
long err;
- sigset_t ksigmask, sigsaved;
/*
* If the caller wants a certain signal mask to be set during the wait,
* we apply it here.
*/
- err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize);
+ err = set_compat_user_sigmask(sigmask, sigsetsize);
if (err)
return err;
err = do_epoll_wait(epfd, events, maxevents, timeout);
-
- restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
+ restore_saved_sigmask_unless(err == -EINTR);
return err;
}