aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/events/core.c9
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/power/suspend.c1
-rw-r--r--kernel/printk/printk.c6
-rw-r--r--kernel/time/timer.c164
-rw-r--r--kernel/trace/ring_buffer.c51
-rw-r--r--kernel/trace/trace_event_perf.c3
-rw-r--r--kernel/trace/trace_events_trigger.c6
10 files changed, 178 insertions, 90 deletions
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 9795d75b09b2..a3e1d3dfad31 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -19,7 +19,7 @@ int main(void)
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
#ifdef CONFIG_SMP
- DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
+ DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS));
#endif
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
/* End of constants */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f18a5bbc66ef..576af248a539 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6372,9 +6372,16 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
+ unsigned long flags;
u64 values[6];
int n = 0;
+ /*
+ * Disabling interrupts avoids all counter scheduling
+ * (context switches, timer based rotation and IPIs).
+ */
+ local_irq_save(flags);
+
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -6410,6 +6417,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
__output_copy(handle, values, n * sizeof(u64));
}
+
+ local_irq_restore(flags);
}
#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index aecf4342f67c..063e4ade1680 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1593,10 +1593,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
jump_label_lock();
preempt_disable();
- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
find_bug((unsigned long)p->addr)) {
@@ -1604,8 +1611,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}
- /* Check if are we probing a module */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
diff --git a/kernel/panic.c b/kernel/panic.c
index 2973c34d6d09..353c626e445b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -404,6 +404,14 @@ void panic(const char *fmt, ...)
/* Do not scroll important messages printed above */
suppress_printk = 1;
+
+ /*
+ * The final messages may not have been printed if in a context that
+ * defers printing (such as NMI) and irq_work is not available.
+ * Explicitly flush the kernel log buffer one last time.
+ */
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 5dea2778a3db..c6433d3c04a0 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -187,6 +187,7 @@ static int __init mem_sleep_default_setup(char *str)
if (mem_sleep_labels[state] &&
!strcmp(str, mem_sleep_labels[state])) {
mem_sleep_default = state;
+ mem_sleep_current = state;
break;
}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index bb2198b40756..ae1a97dd0c3c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1787,6 +1787,12 @@ static int console_trylock_spinning(void)
*/
mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+ /*
+ * Update @console_may_schedule for trylock because the previous
+ * owner may have been schedulable.
+ */
+ console_may_schedule = 0;
+
return 1;
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 16a2b62f5f74..6e2dd83a93af 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1031,7 +1031,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
/*
* We are trying to schedule the timer on the new base.
* However we can't change timer's base while it is running,
- * otherwise del_timer_sync() can't detect that the timer's
+ * otherwise timer_delete_sync() can't detect that the timer's
* handler yet has not finished. This also guarantees that the
* timer is serialized wrt itself.
*/
@@ -1072,14 +1072,16 @@ out_unlock:
}
/**
- * mod_timer_pending - modify a pending timer's timeout
- * @timer: the pending timer to be modified
- * @expires: new timeout in jiffies
+ * mod_timer_pending - Modify a pending timer's timeout
+ * @timer: The pending timer to be modified
+ * @expires: New absolute timeout in jiffies
*
- * mod_timer_pending() is the same for pending timers as mod_timer(),
- * but will not re-activate and modify already deleted timers.
+ * mod_timer_pending() is the same for pending timers as mod_timer(), but
+ * will not activate inactive timers.
*
- * It is useful for unserialized use of timers.
+ * Return:
+ * * %0 - The timer was inactive and not modified
+ * * %1 - The timer was active and requeued to expire at @expires
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
@@ -1088,24 +1090,27 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
EXPORT_SYMBOL(mod_timer_pending);
/**
- * mod_timer - modify a timer's timeout
- * @timer: the timer to be modified
- * @expires: new timeout in jiffies
- *
- * mod_timer() is a more efficient way to update the expire field of an
- * active timer (if the timer is inactive it will be activated)
+ * mod_timer - Modify a timer's timeout
+ * @timer: The timer to be modified
+ * @expires: New absolute timeout in jiffies
*
* mod_timer(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*
+ * mod_timer() is more efficient than the above open coded sequence. In
+ * case that the timer is inactive, the del_timer() part is a NOP. The
+ * timer is in any case activated with the new expiry time @expires.
+ *
* Note that if there are multiple unserialized concurrent users of the
* same timer, then mod_timer() is the only safe way to modify the timeout,
* since add_timer() cannot modify an already running timer.
*
- * The function returns whether it has modified a pending timer or not.
- * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
- * active timer returns 1.)
+ * Return:
+ * * %0 - The timer was inactive and started
+ * * %1 - The timer was active and requeued to expire at @expires or
+ * the timer was active and not modified because @expires did
+ * not change the effective expiry time
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
@@ -1116,11 +1121,18 @@ EXPORT_SYMBOL(mod_timer);
/**
* timer_reduce - Modify a timer's timeout if it would reduce the timeout
* @timer: The timer to be modified
- * @expires: New timeout in jiffies
+ * @expires: New absolute timeout in jiffies
*
* timer_reduce() is very similar to mod_timer(), except that it will only
- * modify a running timer if that would reduce the expiration time (it will
- * start a timer that isn't running).
+ * modify an enqueued timer if that would reduce the expiration time. If
+ * @timer is not enqueued it starts the timer.
+ *
+ * Return:
+ * * %0 - The timer was inactive and started
+ * * %1 - The timer was active and requeued to expire at @expires or
+ * the timer was active and not modified because @expires
+ * did not change the effective expiry time such that the
+ * timer would expire earlier than already scheduled
*/
int timer_reduce(struct timer_list *timer, unsigned long expires)
{
@@ -1129,18 +1141,21 @@ int timer_reduce(struct timer_list *timer, unsigned long expires)
EXPORT_SYMBOL(timer_reduce);
/**
- * add_timer - start a timer
- * @timer: the timer to be added
+ * add_timer - Start a timer
+ * @timer: The timer to be started
*
- * The kernel will do a ->function(@timer) callback from the
- * timer interrupt at the ->expires point in the future. The
- * current time is 'jiffies'.
+ * Start @timer to expire at @timer->expires in the future. @timer->expires
+ * is the absolute expiry time measured in 'jiffies'. When the timer expires
+ * timer->function(timer) will be invoked from soft interrupt context.
*
- * The timer's ->expires, ->function fields must be set prior calling this
- * function.
+ * The @timer->expires and @timer->function fields must be set prior
+ * to calling this function.
*
- * Timers with an ->expires field in the past will be executed in the next
- * timer tick.
+ * If @timer->expires is already in the past @timer will be queued to
+ * expire at the next timer tick.
+ *
+ * This can only operate on an inactive timer. Attempts to invoke this on
+ * an active timer are rejected with a warning.
*/
void add_timer(struct timer_list *timer)
{
@@ -1150,11 +1165,13 @@ void add_timer(struct timer_list *timer)
EXPORT_SYMBOL(add_timer);
/**
- * add_timer_on - start a timer on a particular CPU
- * @timer: the timer to be added
- * @cpu: the CPU to start it on
+ * add_timer_on - Start a timer on a particular CPU
+ * @timer: The timer to be started
+ * @cpu: The CPU to start it on
+ *
+ * Same as add_timer() except that it starts the timer on the given CPU.
*
- * This is not very scalable on SMP. Double adds are not possible.
+ * See add_timer() for further details.
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
@@ -1189,15 +1206,18 @@ void add_timer_on(struct timer_list *timer, int cpu)
EXPORT_SYMBOL_GPL(add_timer_on);
/**
- * del_timer - deactivate a timer.
- * @timer: the timer to be deactivated
- *
- * del_timer() deactivates a timer - this works on both active and inactive
- * timers.
- *
- * The function returns whether it has deactivated a pending timer or not.
- * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
- * active timer returns 1.)
+ * del_timer - Deactivate a timer.
+ * @timer: The timer to be deactivated
+ *
+ * The function only deactivates a pending timer, but contrary to
+ * timer_delete_sync() it does not take into account whether the timer's
+ * callback function is concurrently executed on a different CPU or not.
+ * It neither prevents rearming of the timer. If @timer can be rearmed
+ * concurrently then the return value of this function is meaningless.
+ *
+ * Return:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
int del_timer(struct timer_list *timer)
{
@@ -1219,10 +1239,19 @@ EXPORT_SYMBOL(del_timer);
/**
* try_to_del_timer_sync - Try to deactivate a timer
- * @timer: timer to delete
+ * @timer: Timer to deactivate
+ *
+ * This function tries to deactivate a timer. On success the timer is not
+ * queued and the timer callback function is not running on any CPU.
+ *
+ * This function does not guarantee that the timer cannot be rearmed right
+ * after dropping the base lock. That needs to be prevented by the calling
+ * code if necessary.
*
- * This function tries to deactivate a timer. Upon successful (ret >= 0)
- * exit the timer is not queued and the handler is not running on any CPU.
+ * Return:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
+ * * %-1 - The timer callback function is running on a different CPU
*/
int try_to_del_timer_sync(struct timer_list *timer)
{
@@ -1316,25 +1345,20 @@ static inline void timer_sync_wait_running(struct timer_base *base) { }
static inline void del_timer_wait_running(struct timer_list *timer) { }
#endif
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
/**
- * del_timer_sync - deactivate a timer and wait for the handler to finish.
- * @timer: the timer to be deactivated
- *
- * This function only differs from del_timer() on SMP: besides deactivating
- * the timer it also makes sure the handler has finished executing on other
- * CPUs.
+ * timer_delete_sync - Deactivate a timer and wait for the handler to finish.
+ * @timer: The timer to be deactivated
*
* Synchronization rules: Callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
* interrupt contexts unless the timer is an irqsafe one. The caller must
- * not hold locks which would prevent completion of the timer's
- * handler. The timer's handler must not call add_timer_on(). Upon exit the
- * timer is not queued and the handler is not running on any CPU.
+ * not hold locks which would prevent completion of the timer's callback
+ * function. The timer's handler must not call add_timer_on(). Upon exit
+ * the timer is not queued and the handler is not running on any CPU.
*
- * Note: For !irqsafe timers, you must not hold locks that are held in
- * interrupt context while calling this function. Even if the lock has
- * nothing to do with the timer in question. Here's why::
+ * For !irqsafe timers, the caller must not hold locks that are held in
+ * interrupt context. Even if the lock has nothing to do with the timer in
+ * question. Here's why::
*
* CPU0 CPU1
* ---- ----
@@ -1344,16 +1368,23 @@ static inline void del_timer_wait_running(struct timer_list *timer) { }
* spin_lock_irq(somelock);
* <IRQ>
* spin_lock(somelock);
- * del_timer_sync(mytimer);
+ * timer_delete_sync(mytimer);
* while (base->running_timer == mytimer);
*
- * Now del_timer_sync() will never return and never release somelock.
- * The interrupt on the other CPU is waiting to grab somelock but
- * it has interrupted the softirq that CPU0 is waiting to finish.
+ * Now timer_delete_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but it has
+ * interrupted the softirq that CPU0 is waiting to finish.
+ *
+ * This function cannot guarantee that the timer is not rearmed again by
+ * some concurrent or preempting code, right after it dropped the base
+ * lock. If there is the possibility of a concurrent rearm then the return
+ * value of the function is meaningless.
*
- * The function returns whether it has deactivated a pending timer or not.
+ * Return:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
-int del_timer_sync(struct timer_list *timer)
+int timer_delete_sync(struct timer_list *timer)
{
int ret;
@@ -1386,8 +1417,7 @@ int del_timer_sync(struct timer_list *timer)
return ret;
}
-EXPORT_SYMBOL(del_timer_sync);
-#endif
+EXPORT_SYMBOL(timer_delete_sync);
static void call_timer_fn(struct timer_list *timer,
void (*fn)(struct timer_list *),
@@ -1409,8 +1439,8 @@ static void call_timer_fn(struct timer_list *timer,
#endif
/*
* Couple the lock chain with the lock chain at
- * del_timer_sync() by acquiring the lock_map around the fn()
- * call here and in del_timer_sync().
+ * timer_delete_sync() by acquiring the lock_map around the fn()
+ * call here and in timer_delete_sync().
*/
lock_map_acquire(&lockdep_map);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ed505c6de7ca..d2dba546fbbe 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -601,8 +601,19 @@ static void rb_wake_up_waiters(struct irq_work *work)
wake_up_all(&rbwork->waiters);
if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ /* Only cpu_buffer sets the above flags */
+ struct ring_buffer_per_cpu *cpu_buffer =
+ container_of(rbwork, struct ring_buffer_per_cpu, irq_work);
+
+ /* Called from interrupt context */
+ raw_spin_lock(&cpu_buffer->reader_lock);
rbwork->wakeup_full = false;
rbwork->full_waiters_pending = false;
+
+ /* Waking up all waiters, they will reset the shortest full */
+ cpu_buffer->shortest_full = 0;
+ raw_spin_unlock(&cpu_buffer->reader_lock);
+
wake_up_all(&rbwork->full_waiters);
}
}
@@ -731,30 +742,51 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct rb_irq_work *work;
+ struct rb_irq_work *rbwork;
if (cpu == RING_BUFFER_ALL_CPUS) {
- work = &buffer->irq_work;
+ rbwork = &buffer->irq_work;
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return EPOLLERR;
cpu_buffer = buffer->buffers[cpu];
- work = &cpu_buffer->irq_work;
+ rbwork = &cpu_buffer->irq_work;
}
if (full) {
- poll_wait(filp, &work->full_waiters, poll_table);
- work->full_waiters_pending = true;
+ unsigned long flags;
+
+ poll_wait(filp, &rbwork->full_waiters, poll_table);
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
- } else {
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ if (full_hit(buffer, cpu, full))
+ return EPOLLIN | EPOLLRDNORM;
+ /*
+ * Only allow full_waiters_pending update to be seen after
+ * the shortest_full is set. If the writer sees the
+ * full_waiters_pending flag set, it will compare the
+ * amount in the ring buffer to shortest_full. If the amount
+ * in the ring buffer is greater than the shortest_full
+ * percent, it will call the irq_work handler to wake up
+ * this list. The irq_handler will reset shortest_full
+ * back to zero. That's done under the reader_lock, but
+ * the below smp_mb() makes sure that the update to
+ * full_waiters_pending doesn't leak up into the above.
+ */
+ smp_mb();
+ rbwork->full_waiters_pending = true;
+ return 0;
}
+ poll_wait(filp, &rbwork->waiters, poll_table);
+ rbwork->waiters_pending = true;
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -770,9 +802,6 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
*/
smp_mb();
- if (full)
- return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
-
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index a9dfa04ffa44..563c0e659a40 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -395,7 +395,8 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
- "perf buffer not large enough"))
+ "perf buffer not large enough, wanted %d, have %d",
+ size, PERF_MAX_TRACE_SIZE))
return NULL;
*rctxp = rctx = perf_swevent_get_recursion_context();
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 634d120eab2b..82580f7ffad9 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1140,10 +1140,8 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
- int ret = tracing_alloc_snapshot_instance(file->tr);
-
- if (ret < 0)
- return ret;
+ if (tracing_alloc_snapshot_instance(file->tr) != 0)
+ return 0;
return register_trigger(glob, ops, data, file);
}