aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/tick-sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/tick-sched.c')
-rw-r--r--kernel/time/tick-sched.c142
1 files changed, 99 insertions, 43 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5eb04bb59802..c7b9b1e4af7d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -53,48 +53,69 @@ static ktime_t last_jiffies_update;
*/
static void tick_do_update_jiffies64(ktime_t now)
{
- unsigned long ticks = 0;
+ unsigned long ticks = 1;
ktime_t delta;
/*
- * Do a quick check without holding jiffies_lock:
- * The READ_ONCE() pairs with two updates done later in this function.
+ * Do a quick check without holding jiffies_lock. The READ_ONCE()
+ * pairs with the update done later in this function.
+ *
+ * This is also an intentional data race which is even safe on
+ * 32bit in theory. If there is a concurrent update then the check
+ * might give a random answer. It does not matter because if it
+ * returns then the concurrent update is already taking care, if it
+ * falls through then it will pointlessly contend on jiffies_lock.
+ *
+ * Though there is one nasty case on 32bit due to store tearing of
+ * the 64bit value. If the first 32bit store makes the quick check
+ * return on all other CPUs and the writing CPU context gets
+ * delayed to complete the second store (scheduled out on virt)
+ * then jiffies can become stale for up to ~2^32 nanoseconds
+ * without noticing. After that point all CPUs will wait for
+ * jiffies lock.
+ *
+ * OTOH, this is not any different than the situation with NOHZ=off
+ * where one CPU is responsible for updating jiffies and
+ * timekeeping. If that CPU goes out for lunch then all other CPUs
+ * will operate on stale jiffies until it decides to come back.
*/
- delta = ktime_sub(now, READ_ONCE(last_jiffies_update));
- if (delta < tick_period)
+ if (ktime_before(now, READ_ONCE(tick_next_period)))
return;
/* Reevaluate with jiffies_lock held */
- write_seqlock(&jiffies_lock);
-
- delta = ktime_sub(now, last_jiffies_update);
- if (delta >= tick_period) {
-
- delta = ktime_sub(delta, tick_period);
- /* Pairs with the lockless read in this function. */
- WRITE_ONCE(last_jiffies_update,
- ktime_add(last_jiffies_update, tick_period));
+ raw_spin_lock(&jiffies_lock);
+ if (ktime_before(now, tick_next_period)) {
+ raw_spin_unlock(&jiffies_lock);
+ return;
+ }
- /* Slow path for long timeouts */
- if (unlikely(delta >= tick_period)) {
- s64 incr = ktime_to_ns(tick_period);
+ write_seqcount_begin(&jiffies_seq);
- ticks = ktime_divns(delta, incr);
+ delta = ktime_sub(now, tick_next_period);
+ if (unlikely(delta >= TICK_NSEC)) {
+ /* Slow path for long idle sleep times */
+ s64 incr = TICK_NSEC;
- /* Pairs with the lockless read in this function. */
- WRITE_ONCE(last_jiffies_update,
- ktime_add_ns(last_jiffies_update,
- incr * ticks));
- }
- do_timer(++ticks);
+ ticks += ktime_divns(delta, incr);
- /* Keep the tick_next_period variable up to date */
- tick_next_period = ktime_add(last_jiffies_update, tick_period);
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
+ incr * ticks);
} else {
- write_sequnlock(&jiffies_lock);
- return;
+ last_jiffies_update = ktime_add_ns(last_jiffies_update,
+ TICK_NSEC);
}
- write_sequnlock(&jiffies_lock);
+
+ do_timer(ticks);
+
+ /*
+ * Keep the tick_next_period variable up to date. WRITE_ONCE()
+ * pairs with the READ_ONCE() in the lockless quick check above.
+ */
+ WRITE_ONCE(tick_next_period,
+ ktime_add_ns(last_jiffies_update, TICK_NSEC));
+
+ write_seqcount_end(&jiffies_seq);
+ raw_spin_unlock(&jiffies_lock);
update_wall_time();
}
@@ -105,12 +126,25 @@ static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
- write_seqlock(&jiffies_lock);
+ raw_spin_lock(&jiffies_lock);
+ write_seqcount_begin(&jiffies_seq);
/* Did we start the jiffies update yet ? */
- if (last_jiffies_update == 0)
+ if (last_jiffies_update == 0) {
+ u32 rem;
+
+ /*
+ * Ensure that the tick is aligned to a multiple of
+ * TICK_NSEC.
+ */
+ div_u64_rem(tick_next_period, TICK_NSEC, &rem);
+ if (rem)
+ tick_next_period += TICK_NSEC - rem;
+
last_jiffies_update = tick_next_period;
+ }
period = last_jiffies_update;
- write_sequnlock(&jiffies_lock);
+ write_seqcount_end(&jiffies_seq);
+ raw_spin_unlock(&jiffies_lock);
return period;
}
@@ -202,6 +236,11 @@ static bool check_tick_dependency(atomic_t *dep)
return true;
}
+ if (val & TICK_DEP_MASK_RCU) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU);
+ return true;
+ }
+
return false;
}
@@ -328,6 +367,7 @@ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
preempt_enable();
}
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
@@ -335,6 +375,7 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
atomic_andnot(BIT(bit), &ts->tick_dep_mask);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
@@ -402,7 +443,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
tick_nohz_full_running = true;
}
-static int tick_nohz_cpu_down(unsigned int cpu)
+bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
{
/*
* The tick_do_timer_cpu CPU handles housekeeping duty (unbound
@@ -410,8 +451,13 @@ static int tick_nohz_cpu_down(unsigned int cpu)
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
- return -EBUSY;
- return 0;
+ return false;
+ return true;
+}
+
+static int tick_nohz_cpu_down(unsigned int cpu)
+{
+ return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY;
}
void __init tick_nohz_init(void)
@@ -636,7 +682,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
/* Forward the time to expire in the future */
- hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start_expires(&ts->sched_timer,
@@ -665,10 +711,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
/* Read jiffies and the time when jiffies were updated last */
do {
- seq = read_seqbegin(&jiffies_lock);
+ seq = read_seqcount_begin(&jiffies_seq);
basemono = last_jiffies_update;
basejiff = jiffies;
- } while (read_seqretry(&jiffies_lock, seq));
+ } while (read_seqcount_retry(&jiffies_seq, seq));
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
@@ -1198,7 +1244,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
if (unlikely(ts->tick_stopped))
return;
- hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
@@ -1235,7 +1281,7 @@ static void tick_nohz_switch_to_nohz(void)
next = tick_init_jiffy_update();
hrtimer_set_expires(&ts->sched_timer, next);
- hrtimer_forward_now(&ts->sched_timer, tick_period);
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
}
@@ -1301,7 +1347,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
- hrtimer_forward(timer, now, tick_period);
+ hrtimer_forward(timer, now, TICK_NSEC);
return HRTIMER_RESTART;
}
@@ -1335,13 +1381,13 @@ void tick_setup_sched_timer(void)
/* Offset the tick to avert jiffies_lock contention. */
if (sched_skew_tick) {
- u64 offset = ktime_to_ns(tick_period) >> 1;
+ u64 offset = TICK_NSEC >> 1;
do_div(offset, num_possible_cpus());
offset *= smp_processor_id();
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}
- hrtimer_forward(&ts->sched_timer, now, tick_period);
+ hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
@@ -1351,13 +1397,23 @@ void tick_setup_sched_timer(void)
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t idle_sleeptime, iowait_sleeptime;
+ unsigned long idle_calls, idle_sleeps;
# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
# endif
+ idle_sleeptime = ts->idle_sleeptime;
+ iowait_sleeptime = ts->iowait_sleeptime;
+ idle_calls = ts->idle_calls;
+ idle_sleeps = ts->idle_sleeps;
memset(ts, 0, sizeof(*ts));
+ ts->idle_sleeptime = idle_sleeptime;
+ ts->iowait_sleeptime = iowait_sleeptime;
+ ts->idle_calls = idle_calls;
+ ts->idle_sleeps = idle_sleeps;
}
#endif