aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c575
1 files changed, 448 insertions, 127 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2c5b222d4de6..e456326a9a0d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -42,7 +42,11 @@ const_debug unsigned int sysctl_sched_features =
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
const_debug unsigned int sysctl_sched_nr_migrate = 32;
+#else
+const_debug unsigned int sysctl_sched_nr_migrate = 8;
+#endif
/*
* period over which we average the RT time consumption, measured
@@ -322,7 +326,7 @@ static void hrtick_rq_init(struct rq *rq)
rq->hrtick_csd.info = rq;
#endif
- hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
}
#else /* CONFIG_SCHED_HRTICK */
@@ -404,9 +408,15 @@ static bool set_nr_if_polling(struct task_struct *p)
#endif
#endif
-void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
+ bool sleeper)
{
- struct wake_q_node *node = &task->wake_q;
+ struct wake_q_node *node;
+
+ if (sleeper)
+ node = &task->wake_q_sleeper;
+ else
+ node = &task->wake_q;
/*
* Atomically grab the task, if ->wake_q is !nil already it means
@@ -429,24 +439,32 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
head->lastp = &node->next;
}
-void wake_up_q(struct wake_q_head *head)
+void __wake_up_q(struct wake_q_head *head, bool sleeper)
{
struct wake_q_node *node = head->first;
while (node != WAKE_Q_TAIL) {
struct task_struct *task;
- task = container_of(node, struct task_struct, wake_q);
+ if (sleeper)
+ task = container_of(node, struct task_struct, wake_q_sleeper);
+ else
+ task = container_of(node, struct task_struct, wake_q);
BUG_ON(!task);
/* Task can safely be re-inserted now: */
node = node->next;
- task->wake_q.next = NULL;
-
+ if (sleeper)
+ task->wake_q_sleeper.next = NULL;
+ else
+ task->wake_q.next = NULL;
/*
* wake_up_process() implies a wmb() to pair with the queueing
* in wake_q_add() so as not to miss wakeups.
*/
- wake_up_process(task);
+ if (sleeper)
+ wake_up_lock_sleeper(task);
+ else
+ wake_up_process(task);
put_task_struct(task);
}
}
@@ -482,6 +500,48 @@ void resched_curr(struct rq *rq)
trace_sched_wake_idle_without_ipi(cpu);
}
+#ifdef CONFIG_PREEMPT_LAZY
+
+static int tsk_is_polling(struct task_struct *p)
+{
+#ifdef TIF_POLLING_NRFLAG
+ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+#else
+ return 0;
+#endif
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ int cpu;
+
+ if (!sched_feat(PREEMPT_LAZY)) {
+ resched_curr(rq);
+ return;
+ }
+
+ lockdep_assert_held(&rq->lock);
+
+ if (test_tsk_need_resched(curr))
+ return;
+
+ if (test_tsk_need_resched_lazy(curr))
+ return;
+
+ set_tsk_need_resched_lazy(curr);
+
+ cpu = cpu_of(rq);
+ if (cpu == smp_processor_id())
+ return;
+
+ /* NEED_RESCHED_LAZY must be visible before we test polling */
+ smp_mb();
+ if (!tsk_is_polling(curr))
+ smp_send_reschedule(cpu);
+}
+#endif
+
void resched_cpu(int cpu)
{
struct rq *rq = cpu_rq(cpu);
@@ -505,11 +565,14 @@ void resched_cpu(int cpu)
*/
int get_nohz_timer_target(void)
{
- int i, cpu = smp_processor_id();
+ int i, cpu;
struct sched_domain *sd;
+ preempt_disable_rt();
+ cpu = smp_processor_id();
+
if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
- return cpu;
+ goto preempt_en_rt;
rcu_read_lock();
for_each_domain(cpu, sd) {
@@ -528,6 +591,8 @@ int get_nohz_timer_target(void)
cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
unlock:
rcu_read_unlock();
+preempt_en_rt:
+ preempt_enable_rt();
return cpu;
}
@@ -902,10 +967,10 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
*/
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
{
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
return false;
- if (is_per_cpu_kthread(p))
+ if (is_per_cpu_kthread(p) || __migrate_disabled(p))
return cpu_online(cpu);
return cpu_active(cpu);
@@ -997,7 +1062,7 @@ static int migration_cpu_stop(void *data)
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
- * __migrate_task() such that we will not miss enforcing cpus_allowed
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
@@ -1028,11 +1093,19 @@ static int migration_cpu_stop(void *data)
*/
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
- cpumask_copy(&p->cpus_allowed, new_mask);
+ cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+int __migrate_disabled(struct task_struct *p)
+{
+ return p->migrate_disable;
+}
+#endif
+
+static void __do_set_cpus_allowed_tail(struct task_struct *p,
+ const struct cpumask *new_mask)
{
struct rq *rq = task_rq(p);
bool queued, running;
@@ -1061,6 +1134,20 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
set_curr_task(rq, p);
}
+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+ if (__migrate_disabled(p)) {
+ lockdep_assert_held(&p->pi_lock);
+
+ cpumask_copy(&p->cpus_mask, new_mask);
+ p->migrate_disable_update = 1;
+ return;
+ }
+#endif
+ __do_set_cpus_allowed_tail(p, new_mask);
+}
+
/*
* Change a given task's CPU affinity. Migrate the thread to a
* proper CPU and schedule it away if the CPU it's executing on
@@ -1098,7 +1185,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}
- if (cpumask_equal(&p->cpus_allowed, new_mask))
+ if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
@@ -1119,9 +1206,16 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
}
/* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), new_mask))
+ if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
goto out;
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+ if (__migrate_disabled(p)) {
+ p->migrate_disable_update = 1;
+ goto out;
+ }
+#endif
+
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
@@ -1260,10 +1354,10 @@ static int migrate_swap_stop(void *data)
if (task_cpu(arg->src_task) != arg->src_cpu)
goto unlock;
- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
goto unlock;
- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
goto unlock;
__migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1304,10 +1398,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
goto out;
- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
goto out;
- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
goto out;
trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1317,6 +1411,18 @@ out:
return ret;
}
+static bool check_task_state(struct task_struct *p, long match_state)
+{
+ bool match = false;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ if (p->state == match_state || p->saved_state == match_state)
+ match = true;
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ return match;
+}
+
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -1361,7 +1467,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
- if (match_state && unlikely(p->state != match_state))
+ if (match_state && !check_task_state(p, match_state))
return 0;
cpu_relax();
}
@@ -1376,7 +1482,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
running = task_running(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
- if (!match_state || p->state == match_state)
+ if (!match_state || p->state == match_state ||
+ p->saved_state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &rf);
@@ -1451,7 +1558,7 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);
/*
- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
@@ -1491,14 +1598,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu))
continue;
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu;
}
}
for (;;) {
/* Any allowed, online CPU? */
- for_each_cpu(dest_cpu, &p->cpus_allowed) {
+ for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!is_cpu_allowed(p, dest_cpu))
continue;
@@ -1542,7 +1649,7 @@ out:
}
/*
- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
@@ -1552,11 +1659,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
- cpu = cpumask_any(&p->cpus_allowed);
+ cpu = cpumask_any(p->cpus_ptr);
/*
* In order not to call set_task_cpu() on a blocking task we need
- * to rely on ttwu() to place the task on a valid ->cpus_allowed
+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
* CPU.
*
* Since this is common to all placement strategies, this lives here.
@@ -1659,10 +1766,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
{
activate_task(rq, p, en_flags);
p->on_rq = TASK_ON_RQ_QUEUED;
-
- /* If a worker is waking up, notify the workqueue: */
- if (p->flags & PF_WQ_WORKER)
- wq_worker_waking_up(p, cpu_of(rq));
}
/*
@@ -1986,8 +2089,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
smp_mb__after_spinlock();
- if (!(p->state & state))
+ if (!(p->state & state)) {
+ /*
+ * The task might be running due to a spinlock sleeper
+ * wakeup. Check the saved state and set it to running
+ * if the wakeup condition is true.
+ */
+ if (!(wake_flags & WF_LOCK_SLEEPER)) {
+ if (p->saved_state & state) {
+ p->saved_state = TASK_RUNNING;
+ success = 1;
+ }
+ }
goto out;
+ }
+
+ /*
+ * If this is a regular wakeup, then we can unconditionally
+ * clear the saved state of a "lock sleeper".
+ */
+ if (!(wake_flags & WF_LOCK_SLEEPER))
+ p->saved_state = TASK_RUNNING;
trace_sched_waking(p);
@@ -2084,56 +2206,6 @@ out:
}
/**
- * try_to_wake_up_local - try to wake up a local task with rq lock held
- * @p: the thread to be awakened
- * @rf: request-queue flags for pinning
- *
- * Put @p on the run-queue if it's not already there. The caller must
- * ensure that this_rq() is locked, @p is bound to this_rq() and not
- * the current task.
- */
-static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
-{
- struct rq *rq = task_rq(p);
-
- if (WARN_ON_ONCE(rq != this_rq()) ||
- WARN_ON_ONCE(p == current))
- return;
-
- lockdep_assert_held(&rq->lock);
-
- if (!raw_spin_trylock(&p->pi_lock)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet picked a replacement task.
- */
- rq_unlock(rq, rf);
- raw_spin_lock(&p->pi_lock);
- rq_relock(rq, rf);
- }
-
- if (!(p->state & TASK_NORMAL))
- goto out;
-
- trace_sched_waking(p);
-
- if (!task_on_rq_queued(p)) {
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&rq->nr_iowait);
- }
- ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
- }
-
- ttwu_do_wakeup(rq, p, 0, rf);
- ttwu_stat(p, smp_processor_id(), 0);
-out:
- raw_spin_unlock(&p->pi_lock);
-}
-
-/**
* wake_up_process - Wake up a specific process
* @p: The process to be woken up.
*
@@ -2151,6 +2223,18 @@ int wake_up_process(struct task_struct *p)
}
EXPORT_SYMBOL(wake_up_process);
+/**
+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
+ * @p: The process to be woken up.
+ *
+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
+ * the nature of the wakeup.
+ */
+int wake_up_lock_sleeper(struct task_struct *p)
+{
+ return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
+}
+
int wake_up_state(struct task_struct *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
@@ -2319,7 +2403,6 @@ static inline void init_schedstats(void) {}
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
unsigned long flags;
- int cpu = get_cpu();
__sched_fork(clone_flags, p);
/*
@@ -2355,14 +2438,12 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_reset_on_fork = 0;
}
- if (dl_prio(p->prio)) {
- put_cpu();
+ if (dl_prio(p->prio))
return -EAGAIN;
- } else if (rt_prio(p->prio)) {
+ else if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
- } else {
+ else
p->sched_class = &fair_sched_class;
- }
init_entity_runnable_average(&p->se);
@@ -2378,7 +2459,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
* We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu().
*/
- __set_task_cpu(p, cpu);
+ __set_task_cpu(p, smp_processor_id());
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -2391,12 +2472,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->on_cpu = 0;
#endif
init_task_preempt_count(p);
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(p)->preempt_lazy_count = 0;
+#endif
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif
-
- put_cpu();
return 0;
}
@@ -2433,7 +2515,7 @@ void wake_up_new_task(struct task_struct *p)
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
- * - cpus_allowed can change in the fork path
+ * - cpus_ptr can change in the fork path
* - any previously selected CPU might disappear through hotplug
*
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
@@ -2721,23 +2803,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* provided by mmdrop(),
* - a sync_core for SYNC_CORE.
*/
+ /*
+ * We use mmdrop_delayed() here so we don't have to do the
+ * full __mmdrop() when we are the last user.
+ */
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
+ mmdrop_delayed(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
- /*
- * Remove function-return probe instances associated with this
- * task and put them back on the free list.
- */
- kprobe_flush_task(prev);
-
- /* Task is done with its stack. */
- put_task_stack(prev);
-
put_task_struct(prev);
}
@@ -3457,25 +3534,13 @@ static void __sched notrace __schedule(bool preempt)
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
-
- /*
- * If a worker went to sleep, notify and ask workqueue
- * whether it wants to wake up a task to maintain
- * concurrency.
- */
- if (prev->flags & PF_WQ_WORKER) {
- struct task_struct *to_wakeup;
-
- to_wakeup = wq_worker_sleeping(prev);
- if (to_wakeup)
- try_to_wake_up_local(to_wakeup, &rf);
- }
}
switch_count = &prev->nvcsw;
}
next = pick_next_task(rq, prev, &rf);
clear_tsk_need_resched(prev);
+ clear_tsk_need_resched_lazy(prev);
clear_preempt_need_resched();
if (likely(prev != next)) {
@@ -3527,9 +3592,25 @@ void __noreturn do_task_dead(void)
static inline void sched_submit_work(struct task_struct *tsk)
{
- if (!tsk->state || tsk_is_pi_blocked(tsk))
+ if (!tsk->state)
return;
/*
+ * If a worker went to sleep, notify and ask workqueue whether
+ * it wants to wake up a task to maintain concurrency.
+ * As this function is called inside the schedule() context,
+ * we disable preemption to avoid it calling schedule() again
+ * in the possible wakeup of a kworker.
+ */
+ if (tsk->flags & PF_WQ_WORKER) {
+ preempt_disable();
+ wq_worker_sleeping(tsk);
+ preempt_enable_no_resched();
+ }
+
+ if (tsk_is_pi_blocked(tsk))
+ return;
+
+ /*
* If we are going to sleep and we have plugged IO queued,
* make sure to submit it to avoid deadlocks.
*/
@@ -3537,6 +3618,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_schedule_flush_plug(tsk);
}
+static void sched_update_worker(struct task_struct *tsk)
+{
+ if (tsk->flags & PF_WQ_WORKER)
+ wq_worker_running(tsk);
+}
+
asmlinkage __visible void __sched schedule(void)
{
struct task_struct *tsk = current;
@@ -3547,6 +3634,7 @@ asmlinkage __visible void __sched schedule(void)
__schedule(false);
sched_preempt_enable_no_resched();
} while (need_resched());
+ sched_update_worker(tsk);
}
EXPORT_SYMBOL(schedule);
@@ -3635,6 +3723,30 @@ static void __sched notrace preempt_schedule_common(void)
} while (need_resched());
}
+#ifdef CONFIG_PREEMPT_LAZY
+/*
+ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
+ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
+ * preempt_lazy_count counter >0.
+ */
+static __always_inline int preemptible_lazy(void)
+{
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ return 1;
+ if (current_thread_info()->preempt_lazy_count)
+ return 0;
+ return 1;
+}
+
+#else
+
+static inline int preemptible_lazy(void)
+{
+ return 1;
+}
+
+#endif
+
#ifdef CONFIG_PREEMPT
/*
* this is the entry point to schedule() from in-kernel preemption
@@ -3649,7 +3761,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
*/
if (likely(!preemptible()))
return;
-
+ if (!preemptible_lazy())
+ return;
preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
@@ -3676,6 +3789,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
if (likely(!preemptible()))
return;
+ if (!preemptible_lazy())
+ return;
+
do {
/*
* Because the function tracer can trace preempt_count_sub()
@@ -4304,7 +4420,7 @@ change:
* the entire root_domain to become SCHED_DEADLINE. We
* will also fail if there's no bandwidth available.
*/
- if (!cpumask_subset(span, &p->cpus_allowed) ||
+ if (!cpumask_subset(span, p->cpus_ptr) ||
rq->rd->dl_bw.bw == 0) {
task_rq_unlock(rq, p, &rf);
return -EPERM;
@@ -4903,7 +5019,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
goto out_unlock;
raw_spin_lock_irqsave(&p->pi_lock, flags);
- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out_unlock:
@@ -5444,7 +5560,9 @@ void init_idle(struct task_struct *idle, int cpu)
/* Set the preempt count _outside_ the spinlocks! */
init_idle_preempt_count(idle, cpu);
-
+#ifdef CONFIG_HAVE_PREEMPT_LAZY
+ task_thread_info(idle)->preempt_lazy_count = 0;
+#endif
/*
* The idle tasks have their own, simple scheduling class:
*/
@@ -5483,7 +5601,7 @@ int task_can_attach(struct task_struct *p,
* allowed nodes is unnecessary. Thus, cpusets are not
* applicable for such threads. This prevents checking for
* success of set_cpus_allowed_ptr() on all attached tasks
- * before cpus_allowed may be changed.
+ * before cpus_mask may be changed.
*/
if (p->flags & PF_NO_SETAFFINITY) {
ret = -EINVAL;
@@ -5510,7 +5628,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (curr_cpu == target_cpu)
return 0;
- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;
/* TODO: This is not properly updating schedstats */
@@ -5549,6 +5667,8 @@ void sched_setnuma(struct task_struct *p, int nid)
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm);
+
/*
* Ensure that the idle task is using init_mm right before its CPU goes
* offline.
@@ -5564,7 +5684,11 @@ void idle_task_exit(void)
current->active_mm = &init_mm;
finish_arch_post_lock_switch();
}
- mmdrop(mm);
+ /*
+ * Defer the cleanup to an alive cpu. On RT we can neither
+ * call mmdrop() nor mmdrop_delayed() from here.
+ */
+ per_cpu(idle_last_mm, smp_processor_id()) = mm;
}
/*
@@ -5648,7 +5772,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
put_prev_task(rq, next);
/*
- * Rules for changing task_struct::cpus_allowed are holding
+ * Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
* stabilizes the mask.
*
@@ -5884,6 +6008,10 @@ int sched_cpu_dying(unsigned int cpu)
update_max_interval();
nohz_balance_exit_idle(rq);
hrtick_clear(rq);
+ if (per_cpu(idle_last_mm, cpu)) {
+ mmdrop_delayed(per_cpu(idle_last_mm, cpu));
+ per_cpu(idle_last_mm, cpu) = NULL;
+ }
return 0;
}
#endif
@@ -6120,7 +6248,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline int preempt_count_equals(int preempt_offset)
{
- int nested = preempt_count() + rcu_preempt_depth();
+ int nested = preempt_count() + sched_rcu_preempt_depth();
return (nested == preempt_offset);
}
@@ -7096,3 +7224,196 @@ const u32 sched_prio_to_wmult[40] = {
};
#undef CREATE_TRACE_POINTS
+
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+
+static inline void
+update_nr_migratory(struct task_struct *p, long delta)
+{
+ if (unlikely((p->sched_class == &rt_sched_class ||
+ p->sched_class == &dl_sched_class) &&
+ p->nr_cpus_allowed > 1)) {
+ if (p->sched_class == &rt_sched_class)
+ task_rq(p)->rt.rt_nr_migratory += delta;
+ else
+ task_rq(p)->dl.dl_nr_migratory += delta;
+ }
+}
+
+static inline void
+migrate_disable_update_cpus_allowed(struct task_struct *p)
+{
+ struct rq *rq;
+ struct rq_flags rf;
+
+ p->cpus_ptr = cpumask_of(smp_processor_id());
+
+ rq = task_rq_lock(p, &rf);
+ update_nr_migratory(p, -1);
+ p->nr_cpus_allowed = 1;
+ task_rq_unlock(rq, p, &rf);
+}
+
+static inline void
+migrate_enable_update_cpus_allowed(struct task_struct *p)
+{
+ struct rq *rq;
+ struct rq_flags rf;
+
+ p->cpus_ptr = &p->cpus_mask;
+
+ rq = task_rq_lock(p, &rf);
+ p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask);
+ update_nr_migratory(p, 1);
+ task_rq_unlock(rq, p, &rf);
+}
+
+void migrate_disable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic++;
+#endif
+ return;
+ }
+#ifdef CONFIG_SCHED_DEBUG
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+#endif
+
+ if (p->migrate_disable) {
+ p->migrate_disable++;
+ return;
+ }
+
+ preempt_disable();
+ preempt_lazy_disable();
+ pin_current_cpu();
+
+ migrate_disable_update_cpus_allowed(p);
+ p->migrate_disable = 1;
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+#ifdef CONFIG_SCHED_DEBUG
+ p->migrate_disable_atomic--;
+#endif
+ return;
+ }
+
+#ifdef CONFIG_SCHED_DEBUG
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+#endif
+
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ if (p->migrate_disable > 1) {
+ p->migrate_disable--;
+ return;
+ }
+
+ preempt_disable();
+
+ p->migrate_disable = 0;
+ migrate_enable_update_cpus_allowed(p);
+
+ if (p->migrate_disable_update) {
+ struct rq *rq;
+ struct rq_flags rf;
+
+ rq = task_rq_lock(p, &rf);
+ update_rq_clock(rq);
+
+ __do_set_cpus_allowed_tail(p, &p->cpus_mask);
+ task_rq_unlock(rq, p, &rf);
+
+ p->migrate_disable_update = 0;
+
+ WARN_ON(smp_processor_id() != task_cpu(p));
+ if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
+ struct migration_arg arg;
+ unsigned int dest_cpu;
+
+ if (p->flags & PF_KTHREAD) {
+ /*
+ * Kernel threads are allowed on online && !active CPUs
+ */
+ cpu_valid_mask = cpu_online_mask;
+ }
+ dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_mask);
+ arg.task = p;
+ arg.dest_cpu = dest_cpu;
+
+ unpin_current_cpu();
+ preempt_lazy_enable();
+ preempt_enable();
+ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
+ tlb_migrate_finish(p->mm);
+
+ return;
+ }
+ }
+ unpin_current_cpu();
+ preempt_lazy_enable();
+ preempt_enable();
+}
+EXPORT_SYMBOL(migrate_enable);
+
+#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+void migrate_disable(void)
+{
+#ifdef CONFIG_SCHED_DEBUG
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+ p->migrate_disable_atomic++;
+ return;
+ }
+
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+
+ p->migrate_disable++;
+#endif
+ barrier();
+}
+EXPORT_SYMBOL(migrate_disable);
+
+void migrate_enable(void)
+{
+#ifdef CONFIG_SCHED_DEBUG
+ struct task_struct *p = current;
+
+ if (in_atomic() || irqs_disabled()) {
+ p->migrate_disable_atomic--;
+ return;
+ }
+
+ if (unlikely(p->migrate_disable_atomic)) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ p->migrate_disable--;
+#endif
+ barrier();
+}
+EXPORT_SYMBOL(migrate_enable);
+#endif