diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 10 | ||||
-rw-r--r-- | kernel/sched/fair.c | 56 | ||||
-rw-r--r-- | kernel/sched/idle.c | 22 | ||||
-rw-r--r-- | kernel/sched/rt.c | 5 |
4 files changed, 68 insertions, 25 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 91683193a33f..c6d3847eebf4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -801,6 +801,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { + if (task_on_rq_migrating(p)) + flags |= ENQUEUE_MIGRATED; + if (task_contributes_to_load(p)) rq->nr_uninterruptible--; @@ -3380,8 +3383,7 @@ static noinline void __schedule_bug(struct task_struct *prev) print_ip_sym(preempt_disable_ip); pr_cont("\n"); } - if (panic_on_warn) - panic("scheduling while atomic\n"); + check_panic_on_warn("scheduling while atomic"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); @@ -5060,14 +5062,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, if (len & (sizeof(unsigned long)-1)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; ret = sched_getaffinity(pid, mask); if (ret == 0) { unsigned int retlen = min(len, cpumask_size()); - if (copy_to_user(user_mask_ptr, mask, retlen)) + if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen)) ret = -EFAULT; else ret = retlen; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 97ebedeeafce..4d4cc7400de1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3854,6 +3854,29 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) #endif } +static inline bool entity_is_long_sleeper(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq; + u64 sleep_time; + + if (se->exec_start == 0) + return false; + + cfs_rq = cfs_rq_of(se); + + sleep_time = rq_clock_task(rq_of(cfs_rq)); + + /* Happen while migrating because of clock task divergence */ + if (sleep_time <= se->exec_start) + return false; + + sleep_time -= se->exec_start; + if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD))) + return true; + + return false; +} + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { @@ -3882,8 +3905,29 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) vruntime -= thresh; } - /* ensure we never gain time by being placed backwards. */ - se->vruntime = max_vruntime(se->vruntime, vruntime); + /* + * Pull vruntime of the entity being placed to the base level of + * cfs_rq, to prevent boosting it if placed backwards. + * However, min_vruntime can advance much faster than real time, with + * the extreme being when an entity with the minimal weight always runs + * on the cfs_rq. If the waking entity slept for a long time, its + * vruntime difference from min_vruntime may overflow s64 and their + * comparison may get inversed, so ignore the entity's original + * vruntime in that case. + * The maximal vruntime speedup is given by the ratio of normal to + * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES. + * When placing a migrated waking entity, its exec_start has been set + * from a different rq. In order to take into account a possible + * divergence between new and prev rq's clocks task because of irq and + * stolen time, we take an additional margin. + * So, cutting off on the sleep time of + * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days + * should be safe. + */ + if (entity_is_long_sleeper(se)) + se->vruntime = vruntime; + else + se->vruntime = max_vruntime(se->vruntime, vruntime); } static void check_enqueue_throttle(struct cfs_rq *cfs_rq); @@ -3978,6 +4022,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (flags & ENQUEUE_WAKEUP) place_entity(cfs_rq, se, 0); + /* Entity has migrated, no longer consider this task hot */ + if (flags & ENQUEUE_MIGRATED) + se->exec_start = 0; check_schedstat_required(); update_stats_enqueue(cfs_rq, se, flags); @@ -6554,9 +6601,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) /* Tell new CPU we are migrated */ p->se.avg.last_update_time = 0; - /* We have migrated, no longer consider this task hot */ - p->se.exec_start = 0; - update_scan_period(p, new_cpu); } @@ -8687,7 +8731,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .sd = sd, .dst_cpu = this_cpu, .dst_rq = this_rq, - .dst_grpmask = sched_group_span(sd->groups), + .dst_grpmask = group_balance_mask(sd->groups), .idle = idle, .loop_break = sched_nr_migrate_break, .cpus = cpus, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 44a17366c8ec..4e3d149d64ad 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -53,17 +53,18 @@ __setup("hlt", cpu_idle_nopoll_setup); static noinline int __cpuidle cpu_idle_poll(void) { + trace_cpu_idle(0, smp_processor_id()); + stop_critical_timings(); rcu_idle_enter(); - trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); - stop_critical_timings(); while (!tif_need_resched() && - (cpu_idle_force_poll || tick_check_broadcast_expired())) + (cpu_idle_force_poll || tick_check_broadcast_expired())) cpu_relax(); - start_critical_timings(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + rcu_idle_exit(); + start_critical_timings(); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); return 1; } @@ -90,7 +91,9 @@ void __cpuidle default_idle_call(void) local_irq_enable(); } else { stop_critical_timings(); + rcu_idle_enter(); arch_cpu_idle(); + rcu_idle_exit(); start_critical_timings(); } } @@ -148,7 +151,6 @@ static void cpuidle_idle_call(void) if (cpuidle_not_available(drv, dev)) { tick_nohz_idle_stop_tick(); - rcu_idle_enter(); default_idle_call(); goto exit_idle; @@ -166,19 +168,15 @@ static void cpuidle_idle_call(void) if (idle_should_enter_s2idle() || dev->use_deepest_state) { if (idle_should_enter_s2idle()) { - rcu_idle_enter(); entered_state = cpuidle_enter_s2idle(drv, dev); if (entered_state > 0) { local_irq_enable(); goto exit_idle; } - - rcu_idle_exit(); } tick_nohz_idle_stop_tick(); - rcu_idle_enter(); next_state = cpuidle_find_deepest_state(drv, dev); call_cpuidle(drv, dev, next_state); @@ -195,8 +193,6 @@ static void cpuidle_idle_call(void) else tick_nohz_idle_retain_tick(); - rcu_idle_enter(); - entered_state = call_cpuidle(drv, dev, next_state); /* * Give the governor an opportunity to reflect on the outcome @@ -212,8 +208,6 @@ exit_idle: */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); - - rcu_idle_exit(); } /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b15428ede6cf..41e73baf41ab 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1522,6 +1522,8 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, BUG_ON(idx >= MAX_RT_PRIO); queue = array->queue + idx; + if (SCHED_WARN_ON(list_empty(queue))) + return NULL; next = list_entry(queue->next, struct sched_rt_entity, run_list); return next; @@ -1535,7 +1537,8 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) do { rt_se = pick_next_rt_entity(rq, rt_rq); - BUG_ON(!rt_se); + if (unlikely(!rt_se)) + return NULL; rt_rq = group_rt_rq(rt_se); } while (rt_rq); |