aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/cpudeadline.c2
-rw-r--r--kernel/sched/cputime.c41
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c2
5 files changed, 58 insertions, 11 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c883fe8e440..2a906f20fba7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
#include <linux/context_tracking.h>
#include <linux/compiler.h>
#include <linux/frame.h>
+#include <linux/prefetch.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -2972,6 +2973,23 @@ EXPORT_PER_CPU_SYMBOL(kstat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+ struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+ prefetch(curr);
+ prefetch(&curr->exec_start);
+}
+
+/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* pending runtime that have not been accounted yet.
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && task_on_rq_queued(p)) {
+ prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
}
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5be58820465c..d4184498c9f5 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
if (old_idx == IDX_INVALID) {
cp->size++;
- cp->elements[cp->size - 1].dl = 0;
+ cp->elements[cp->size - 1].dl = dl;
cp->elements[cp->size - 1].cpu = cpu;
cp->elements[cpu].idx = cp->size - 1;
cpudl_change_key(cp, cp->size - 1, dl);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1934f658c036..a846cf89eb96 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
}
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
{
#ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* idle, or potentially user or system time. Due to rounding,
* other time can exceed ticks occasionally.
*/
- other = account_other_time(cputime);
+ other = account_other_time(ULONG_MAX);
if (other >= cputime)
return;
cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
}
cputime = cputime_one_jiffy;
- steal = steal_account_process_time(cputime);
+ steal = steal_account_process_time(ULONG_MAX);
if (steal >= cputime)
return;
@@ -508,13 +513,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
*/
void account_idle_ticks(unsigned long ticks)
{
+ cputime_t cputime, steal;
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
- account_idle_time(jiffies_to_cputime(ticks));
+ cputime = jiffies_to_cputime(ticks);
+ steal = steal_account_process_time(ULONG_MAX);
+
+ if (steal >= cputime)
+ return;
+
+ cputime -= steal;
+ account_idle_time(cputime);
}
/*
@@ -606,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
stime = curr->stime;
utime = curr->utime;
- if (utime == 0) {
- stime = rtime;
+ /*
+ * If either stime or both stime and utime are 0, assume all runtime is
+ * userspace. Once a task gets some ticks, the monotonicy code at
+ * 'update' will ensure things converge to the observed ratio.
+ */
+ if (stime == 0) {
+ utime = rtime;
goto update;
}
- if (stime == 0) {
- utime = rtime;
+ if (utime == 0) {
+ stime = rtime;
goto update;
}
stime = scale_stime((__force u64)stime, (__force u64)rtime,
(__force u64)(stime + utime));
+update:
/*
* Make sure stime doesn't go backwards; this preserves monotonicity
* for utime because rtime is monotonic.
@@ -641,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
stime = rtime - utime;
}
-update:
prev->stime = stime;
prev->utime = utime;
out:
@@ -686,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
unsigned long now = READ_ONCE(jiffies);
cputime_t delta, other;
+ /*
+ * Unlike tick based timing, vtime based timing never has lost
+ * ticks, and no need for steal time accounting to make up for
+ * lost ticks. Vtime accounts a rounded version of actual
+ * elapsed time. Limit account_other_time to prevent rounding
+ * errors from causing elapsed vtime to go negative.
+ */
delta = jiffies_to_cputime(now - tsk->vtime_snap);
other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fcb7f0217ff4..1ce8867283dc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
*
* XXX figure out if select_task_rq_dl() deals with offline cpus.
*/
- if (unlikely(!rq->online))
+ if (unlikely(!rq->online)) {
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
+ rf.cookie = lockdep_pin_lock(&rq->lock);
+ }
/*
* Queueing this task back might have overloaded rq, check if we need
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4088eedea763..039de34f1521 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
pcfs_rq = tg->parent->cfs_rq[cpu];
cfs_rq->throttle_count = pcfs_rq->throttle_count;
- pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+ cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
}
/* conditionally throttle active cfs_rq's from put_prev_entity() */