diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 10 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 45 | ||||
-rw-r--r-- | kernel/sched/fair.c | 29 | ||||
-rw-r--r-- | kernel/sched/isolation.c | 21 | ||||
-rw-r--r-- | kernel/sched/sched.h | 8 |
5 files changed, 76 insertions, 37 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a9983da4408..3dd675697301 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1239,13 +1239,8 @@ static void uclamp_fork(struct task_struct *p) return; for_each_clamp_id(clamp_id) { - unsigned int clamp_value = uclamp_none(clamp_id); - - /* By default, RT tasks always get 100% boost */ - if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) - clamp_value = uclamp_none(UCLAMP_MAX); - - uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false); + uclamp_se_set(&p->uclamp_req[clamp_id], + uclamp_none(clamp_id), false); } } @@ -3671,7 +3666,6 @@ static void sched_tick_remote(struct work_struct *work) if (cpu_is_offline(cpu)) goto out_unlock; - curr = rq->curr; update_rq_clock(rq); if (!is_idle_task(curr)) { diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index cff3e656566d..ff9435dee1df 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -909,8 +909,10 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) } while (read_seqcount_retry(&vtime->seqcount, seq)); } -static int vtime_state_check(struct vtime *vtime, int cpu) +static int vtime_state_fetch(struct vtime *vtime, int cpu) { + int state = READ_ONCE(vtime->state); + /* * We raced against a context switch, fetch the * kcpustat task again. @@ -927,10 +929,10 @@ static int vtime_state_check(struct vtime *vtime, int cpu) * * Case 1) is ok but 2) is not. So wait for a safe VTIME state. */ - if (vtime->state == VTIME_INACTIVE) + if (state == VTIME_INACTIVE) return -EAGAIN; - return 0; + return state; } static u64 kcpustat_user_vtime(struct vtime *vtime) @@ -949,14 +951,15 @@ static int kcpustat_field_vtime(u64 *cpustat, { struct vtime *vtime = &tsk->vtime; unsigned int seq; - int err; do { + int state; + seq = read_seqcount_begin(&vtime->seqcount); - err = vtime_state_check(vtime, cpu); - if (err < 0) - return err; + state = vtime_state_fetch(vtime, cpu); + if (state < 0) + return state; *val = cpustat[usage]; @@ -969,7 +972,7 @@ static int kcpustat_field_vtime(u64 *cpustat, */ switch (usage) { case CPUTIME_SYSTEM: - if (vtime->state == VTIME_SYS) + if (state == VTIME_SYS) *val += vtime->stime + vtime_delta(vtime); break; case CPUTIME_USER: @@ -981,11 +984,11 @@ static int kcpustat_field_vtime(u64 *cpustat, *val += kcpustat_user_vtime(vtime); break; case CPUTIME_GUEST: - if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0) + if (state == VTIME_GUEST && task_nice(tsk) <= 0) *val += vtime->gtime + vtime_delta(vtime); break; case CPUTIME_GUEST_NICE: - if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0) + if (state == VTIME_GUEST && task_nice(tsk) > 0) *val += vtime->gtime + vtime_delta(vtime); break; default: @@ -1000,12 +1003,12 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat, enum cpu_usage_stat usage, int cpu) { u64 *cpustat = kcpustat->cpustat; + u64 val = cpustat[usage]; struct rq *rq; - u64 val; int err; if (!vtime_accounting_enabled_cpu(cpu)) - return cpustat[usage]; + return val; rq = cpu_rq(cpu); @@ -1036,23 +1039,23 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, { struct vtime *vtime = &tsk->vtime; unsigned int seq; - int err; do { u64 *cpustat; u64 delta; + int state; seq = read_seqcount_begin(&vtime->seqcount); - err = vtime_state_check(vtime, cpu); - if (err < 0) - return err; + state = vtime_state_fetch(vtime, cpu); + if (state < 0) + return state; *dst = *src; cpustat = dst->cpustat; /* Task is sleeping, dead or idle, nothing to add */ - if (vtime->state < VTIME_SYS) + if (state < VTIME_SYS) continue; delta = vtime_delta(vtime); @@ -1061,15 +1064,15 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, * Task runs either in user (including guest) or kernel space, * add pending nohz time to the right place. */ - if (vtime->state == VTIME_SYS) { + if (state == VTIME_SYS) { cpustat[CPUTIME_SYSTEM] += vtime->stime + delta; - } else if (vtime->state == VTIME_USER) { + } else if (state == VTIME_USER) { if (task_nice(tsk) > 0) cpustat[CPUTIME_NICE] += vtime->utime + delta; else cpustat[CPUTIME_USER] += vtime->utime + delta; } else { - WARN_ON_ONCE(vtime->state != VTIME_GUEST); + WARN_ON_ONCE(state != VTIME_GUEST); if (task_nice(tsk) > 0) { cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta; cpustat[CPUTIME_NICE] += vtime->gtime + delta; @@ -1080,7 +1083,7 @@ static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, } } while (read_seqcount_retry(&vtime->seqcount, seq)); - return err; + return 0; } void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c1217bfe5e81..c76a20648b72 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3957,6 +3957,7 @@ static inline void check_schedstat_required(void) #endif } +static inline bool cfs_bandwidth_used(void); /* * MIGRATION @@ -4035,10 +4036,16 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) __enqueue_entity(cfs_rq, se); se->on_rq = 1; - if (cfs_rq->nr_running == 1) { + /* + * When bandwidth control is enabled, cfs might have been removed + * because of a parent been throttled but cfs->nr_running > 1. Try to + * add it unconditionnally. + */ + if (cfs_rq->nr_running == 1 || cfs_bandwidth_used()) list_add_leaf_cfs_rq(cfs_rq); + + if (cfs_rq->nr_running == 1) check_enqueue_throttle(cfs_rq); - } } static void __clear_buddies_last(struct sched_entity *se) @@ -4619,11 +4626,22 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) break; } - assert_list_leaf_cfs_rq(rq); - if (!se) add_nr_running(rq, task_delta); + /* + * The cfs_rq_throttled() breaks in the above iteration can result in + * incomplete leaf list maintenance, resulting in triggering the + * assertion below. + */ + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + + list_add_leaf_cfs_rq(cfs_rq); + } + + assert_list_leaf_cfs_rq(rq); + /* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) resched_curr(rq); @@ -8345,7 +8363,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, * Computing avg_load makes sense only when group is fully busy or * overloaded */ - if (sgs->group_type < group_fully_busy) + if (sgs->group_type == group_fully_busy || + sgs->group_type == group_overloaded) sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) / sgs->group_capacity; } diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 008d6ac2342b..808244f3ddd9 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -149,6 +149,9 @@ __setup("nohz_full=", housekeeping_nohz_full_setup); static int __init housekeeping_isolcpus_setup(char *str) { unsigned int flags = 0; + bool illegal = false; + char *par; + int len; while (isalpha(*str)) { if (!strncmp(str, "nohz,", 5)) { @@ -169,8 +172,22 @@ static int __init housekeeping_isolcpus_setup(char *str) continue; } - pr_warn("isolcpus: Error, unknown flag\n"); - return 0; + /* + * Skip unknown sub-parameter and validate that it is not + * containing an invalid character. + */ + for (par = str, len = 0; *str && *str != ','; str++, len++) { + if (!isalpha(*str) && *str != '_') + illegal = true; + } + + if (illegal) { + pr_warn("isolcpus: Invalid flag %.*s\n", len, par); + return 0; + } + + pr_info("isolcpus: Skipped unknown flag %.*s\n", len, par); + str++; } /* Default behaviour for isolcpus without flags */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9ea647835fd6..b056149c228b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -118,7 +118,13 @@ extern long calc_load_fold_active(struct rq *this_rq, long adjust); #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) -# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT) +# define scale_load_down(w) \ +({ \ + unsigned long __w = (w); \ + if (__w) \ + __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ + __w; \ +}) #else # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) (w) |