aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/sched/fair.c56
-rw-r--r--kernel/sched/idle.c22
-rw-r--r--kernel/sched/rt.c5
4 files changed, 68 insertions, 25 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 91683193a33f..c6d3847eebf4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -801,6 +801,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
+ if (task_on_rq_migrating(p))
+ flags |= ENQUEUE_MIGRATED;
+
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
@@ -3380,8 +3383,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
print_ip_sym(preempt_disable_ip);
pr_cont("\n");
}
- if (panic_on_warn)
- panic("scheduling while atomic\n");
+ check_panic_on_warn("scheduling while atomic");
dump_stack();
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
@@ -5060,14 +5062,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
if (len & (sizeof(unsigned long)-1))
return -EINVAL;
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
if (ret == 0) {
unsigned int retlen = min(len, cpumask_size());
- if (copy_to_user(user_mask_ptr, mask, retlen))
+ if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
ret = -EFAULT;
else
ret = retlen;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 97ebedeeafce..4d4cc7400de1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3854,6 +3854,29 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
#endif
}
+static inline bool entity_is_long_sleeper(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq;
+ u64 sleep_time;
+
+ if (se->exec_start == 0)
+ return false;
+
+ cfs_rq = cfs_rq_of(se);
+
+ sleep_time = rq_clock_task(rq_of(cfs_rq));
+
+ /* Happen while migrating because of clock task divergence */
+ if (sleep_time <= se->exec_start)
+ return false;
+
+ sleep_time -= se->exec_start;
+ if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
+ return true;
+
+ return false;
+}
+
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
@@ -3882,8 +3905,29 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
vruntime -= thresh;
}
- /* ensure we never gain time by being placed backwards. */
- se->vruntime = max_vruntime(se->vruntime, vruntime);
+ /*
+ * Pull vruntime of the entity being placed to the base level of
+ * cfs_rq, to prevent boosting it if placed backwards.
+ * However, min_vruntime can advance much faster than real time, with
+ * the extreme being when an entity with the minimal weight always runs
+ * on the cfs_rq. If the waking entity slept for a long time, its
+ * vruntime difference from min_vruntime may overflow s64 and their
+ * comparison may get inversed, so ignore the entity's original
+ * vruntime in that case.
+ * The maximal vruntime speedup is given by the ratio of normal to
+ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
+ * When placing a migrated waking entity, its exec_start has been set
+ * from a different rq. In order to take into account a possible
+ * divergence between new and prev rq's clocks task because of irq and
+ * stolen time, we take an additional margin.
+ * So, cutting off on the sleep time of
+ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
+ * should be safe.
+ */
+ if (entity_is_long_sleeper(se))
+ se->vruntime = vruntime;
+ else
+ se->vruntime = max_vruntime(se->vruntime, vruntime);
}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3978,6 +4022,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (flags & ENQUEUE_WAKEUP)
place_entity(cfs_rq, se, 0);
+ /* Entity has migrated, no longer consider this task hot */
+ if (flags & ENQUEUE_MIGRATED)
+ se->exec_start = 0;
check_schedstat_required();
update_stats_enqueue(cfs_rq, se, flags);
@@ -6554,9 +6601,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
/* Tell new CPU we are migrated */
p->se.avg.last_update_time = 0;
- /* We have migrated, no longer consider this task hot */
- p->se.exec_start = 0;
-
update_scan_period(p, new_cpu);
}
@@ -8687,7 +8731,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.sd = sd,
.dst_cpu = this_cpu,
.dst_rq = this_rq,
- .dst_grpmask = sched_group_span(sd->groups),
+ .dst_grpmask = group_balance_mask(sd->groups),
.idle = idle,
.loop_break = sched_nr_migrate_break,
.cpus = cpus,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 44a17366c8ec..4e3d149d64ad 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -53,17 +53,18 @@ __setup("hlt", cpu_idle_nopoll_setup);
static noinline int __cpuidle cpu_idle_poll(void)
{
+ trace_cpu_idle(0, smp_processor_id());
+ stop_critical_timings();
rcu_idle_enter();
- trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
- stop_critical_timings();
while (!tif_need_resched() &&
- (cpu_idle_force_poll || tick_check_broadcast_expired()))
+ (cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
- start_critical_timings();
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
+
rcu_idle_exit();
+ start_critical_timings();
+ trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
return 1;
}
@@ -90,7 +91,9 @@ void __cpuidle default_idle_call(void)
local_irq_enable();
} else {
stop_critical_timings();
+ rcu_idle_enter();
arch_cpu_idle();
+ rcu_idle_exit();
start_critical_timings();
}
}
@@ -148,7 +151,6 @@ static void cpuidle_idle_call(void)
if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
- rcu_idle_enter();
default_idle_call();
goto exit_idle;
@@ -166,19 +168,15 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle()) {
- rcu_idle_enter();
entered_state = cpuidle_enter_s2idle(drv, dev);
if (entered_state > 0) {
local_irq_enable();
goto exit_idle;
}
-
- rcu_idle_exit();
}
tick_nohz_idle_stop_tick();
- rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
@@ -195,8 +193,6 @@ static void cpuidle_idle_call(void)
else
tick_nohz_idle_retain_tick();
- rcu_idle_enter();
-
entered_state = call_cpuidle(drv, dev, next_state);
/*
* Give the governor an opportunity to reflect on the outcome
@@ -212,8 +208,6 @@ exit_idle:
*/
if (WARN_ON_ONCE(irqs_disabled()))
local_irq_enable();
-
- rcu_idle_exit();
}
/*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b15428ede6cf..41e73baf41ab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1522,6 +1522,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
BUG_ON(idx >= MAX_RT_PRIO);
queue = array->queue + idx;
+ if (SCHED_WARN_ON(list_empty(queue)))
+ return NULL;
next = list_entry(queue->next, struct sched_rt_entity, run_list);
return next;
@@ -1535,7 +1537,8 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
do {
rt_se = pick_next_rt_entity(rq, rt_rq);
- BUG_ON(!rt_se);
+ if (unlikely(!rt_se))
+ return NULL;
rt_rq = group_rt_rq(rt_se);
} while (rt_rq);