diff options
Diffstat (limited to 'kernel')
32 files changed, 295 insertions, 136 deletions
diff --git a/kernel/async.c b/kernel/async.c index 4c3773c0bf63..f1fd155abff6 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -84,20 +84,24 @@ static atomic_t entry_count; static async_cookie_t lowest_in_progress(struct async_domain *domain) { - struct list_head *pending; + struct async_entry *first = NULL; async_cookie_t ret = ASYNC_COOKIE_MAX; unsigned long flags; spin_lock_irqsave(&async_lock, flags); - if (domain) - pending = &domain->pending; - else - pending = &async_global_pending; + if (domain) { + if (!list_empty(&domain->pending)) + first = list_first_entry(&domain->pending, + struct async_entry, domain_list); + } else { + if (!list_empty(&async_global_pending)) + first = list_first_entry(&async_global_pending, + struct async_entry, global_list); + } - if (!list_empty(pending)) - ret = list_first_entry(pending, struct async_entry, - domain_list)->cookie; + if (first) + ret = first->cookie; spin_unlock_irqrestore(&async_lock, flags); return ret; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1c1b8ab34037..6c1783bff424 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -75,7 +75,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 index = *(u32 *)key; + u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 54f0e7fcd0e2..199b54e75359 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -361,7 +361,7 @@ select_insn: DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -380,7 +380,7 @@ select_insn: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 972b76bf54b7..3bafeb25972b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -149,12 +149,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) struct hlist_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; - int i; + int i = 0; WARN_ON_ONCE(!rcu_read_lock_held()); key_size = map->key_size; + if (!key) + goto find_first_elem; + hash = htab_map_hash(key, key_size); head = select_bucket(htab, hash); @@ -162,10 +165,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) /* lookup the key */ l = lookup_elem_raw(head, hash, key, key_size); - if (!l) { - i = 0; + if (!l) goto find_first_elem; - } /* key was found, get next key in the same bucket */ next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3bae6c591914..0fcb43cb2006 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -311,14 +311,18 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) - goto err_put; - - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + if (ukey) { + err = -ENOMEM; + key = kmalloc(map->key_size, GFP_USER); + if (!key) + goto err_put; + + err = -EFAULT; + if (copy_from_user(key, ukey, map->key_size) != 0) + goto free_key; + } else { + key = NULL; + } err = -ENOMEM; next_key = kmalloc(map->key_size, GFP_USER); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1bdc6f910a1d..03d74868c709 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1020,6 +1020,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) return -EINVAL; } + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose("BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index d659487254d5..d37acf86037a 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -107,14 +107,8 @@ int get_callchain_buffers(void) goto exit; } - if (count > 1) { - /* If the allocation failed, give up */ - if (!callchain_cpus_entries) - err = -ENOMEM; - goto exit; - } - - err = alloc_callchain_buffers(); + if (count == 1) + err = alloc_callchain_buffers(); exit: if (err) atomic_dec(&nr_callchain_events); diff --git a/kernel/events/core.c b/kernel/events/core.c index 3ec89d6ecba7..9dc0489d510b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5107,9 +5107,6 @@ static void perf_output_read_one(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } -/* - * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. - */ static void perf_output_read_group(struct perf_output_handle *handle, struct perf_event *event, u64 enabled, u64 running) @@ -5154,6 +5151,13 @@ static void perf_output_read_group(struct perf_output_handle *handle, #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ PERF_FORMAT_TOTAL_TIME_RUNNING) +/* + * XXX PERF_SAMPLE_READ vs inherited events seems difficult. + * + * The problem is that its both hard and excessively expensive to iterate the + * child list, not to mention that its impossible to IPI the children running + * on another CPU, from interrupt/NMI context. + */ static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { @@ -7631,9 +7635,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, local64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_FORMAT_GROUP on inherited events + * We currently do not support PERF_SAMPLE_READ on inherited events. + * See perf_output_read(). */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) goto err_ns; if (!has_branch_stack(event)) @@ -7801,9 +7806,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, * __u16 sample size limit. */ if (attr->sample_stack_user >= USHRT_MAX) - ret = -EINVAL; + return -EINVAL; else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) - ret = -EINVAL; + return -EINVAL; } if (attr->sample_type & PERF_SAMPLE_REGS_INTR) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 92ce5f4ccc26..a27245fdcd81 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -427,16 +427,9 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify * @attr: new breakpoint attributes - * @triggered: callback to trigger when we hit the breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs */ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { - u64 old_addr = bp->attr.bp_addr; - u64 old_len = bp->attr.bp_len; - int old_type = bp->attr.bp_type; - int err = 0; - /* * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it * will not be possible to raise IPIs that invoke __perf_event_disable. @@ -451,27 +444,18 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att bp->attr.bp_addr = attr->bp_addr; bp->attr.bp_type = attr->bp_type; bp->attr.bp_len = attr->bp_len; + bp->attr.disabled = 1; - if (attr->disabled) - goto end; - - err = validate_hw_breakpoint(bp); - if (!err) - perf_event_enable(bp); + if (!attr->disabled) { + int err = validate_hw_breakpoint(bp); - if (err) { - bp->attr.bp_addr = old_addr; - bp->attr.bp_type = old_type; - bp->attr.bp_len = old_len; - if (!bp->attr.disabled) - perf_event_enable(bp); + if (err) + return err; - return err; + perf_event_enable(bp); + bp->attr.disabled = 0; } -end: - bp->attr.disabled = attr->disabled; - return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); diff --git a/kernel/futex.c b/kernel/futex.c index d1649fd7404c..5d231140fd49 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -400,6 +400,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; + struct address_space *mapping; int err, ro = 0; /* @@ -478,7 +479,19 @@ again: } #endif - lock_page(page_head); + /* + * The treatment of mapping from this point on is critical. The page + * lock protects many things but in this context the page lock + * stabilizes mapping, prevents inode freeing in the shared + * file-backed region case and guards against movement to swap cache. + * + * Strictly speaking the page lock is not needed in all cases being + * considered here and page lock forces unnecessarily serialization + * From this point on, mapping will be re-verified if necessary and + * page lock will be acquired only if it is unavoidable + */ + + mapping = READ_ONCE(page_head->mapping); /* * If page_head->mapping is NULL, then it cannot be a PageAnon @@ -495,18 +508,31 @@ again: * shmem_writepage move it from filecache to swapcache beneath us: * an unlikely race, but we do need to retry for page_head->mapping. */ - if (!page_head->mapping) { - int shmem_swizzled = PageSwapCache(page_head); + if (unlikely(!mapping)) { + int shmem_swizzled; + + /* + * Page lock is required to identify which special case above + * applies. If this is really a shmem page then the page lock + * will prevent unexpected transitions. + */ + lock_page(page); + shmem_swizzled = PageSwapCache(page) || page->mapping; unlock_page(page_head); put_page(page_head); + if (shmem_swizzled) goto again; + return -EFAULT; } /* * Private mappings are handled in a simple way. * + * If the futex key is stored on an anonymous page, then the associated + * object is the mm which is implicitly pinned by the calling process. + * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. @@ -524,16 +550,74 @@ again: key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; + + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + } else { + struct inode *inode; + + /* + * The associated futex object in this case is the inode and + * the page->mapping must be traversed. Ordinarily this should + * be stabilised under page lock but it's not strictly + * necessary in this case as we just want to pin the inode, not + * update the radix tree or anything like that. + * + * The RCU read lock is taken as the inode is finally freed + * under RCU. If the mapping still matches expectations then the + * mapping->host can be safely accessed as being a valid inode. + */ + rcu_read_lock(); + + if (READ_ONCE(page_head->mapping) != mapping) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + inode = READ_ONCE(mapping->host); + if (!inode) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* + * Take a reference unless it is about to be freed. Previously + * this reference was taken by ihold under the page lock + * pinning the inode in place so i_lock was unnecessary. The + * only way for this check to fail is if the inode was + * truncated in parallel so warn for now if this happens. + * + * We are not calling into get_futex_key_refs() in file-backed + * cases, therefore a successful atomic_inc return below will + * guarantee that get_futex_key() will still imply smp_mb(); (B). + */ + if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* Should be impossible but lets be paranoid for now */ + if (WARN_ON_ONCE(inode->i_mapping != mapping)) { + err = -EFAULT; + rcu_read_unlock(); + iput(inode); + + goto out; + } + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page_head->mapping->host; + key->shared.inode = inode; key->shared.pgoff = basepage_index(page); + rcu_read_unlock(); } - get_futex_key_refs(key); /* implies MB (B) */ - out: - unlock_page(page_head); put_page(page_head); return err; } @@ -1859,8 +1943,12 @@ static int unqueue_me(struct futex_q *q) /* In the common case we don't take the spinlock, which is nice. */ retry: - lock_ptr = q->lock_ptr; - barrier(); + /* + * q->lock_ptr can change between this read and the following spin_lock. + * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and + * optimizing lock_ptr out of the logic below. + */ + lock_ptr = READ_ONCE(q->lock_ptr); if (lock_ptr != NULL) { spin_lock(lock_ptr); /* diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1aacd0d58a9e..4b74158e5f9d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1245,8 +1245,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * set the trigger type must match. Also all must * agree on ONESHOT. */ + unsigned int oldtype = irqd_get_trigger_type(&desc->irq_data); + if (!((old->flags & new->flags) & IRQF_SHARED) || - ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) || + (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || ((old->flags ^ new->flags) & IRQF_ONESHOT)) goto mismatch; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7dd73a3059b8..04486d8e5809 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -125,7 +125,7 @@ static void *alloc_insn_page(void) return module_alloc(PAGE_SIZE); } -static void free_insn_page(void *page) +void __weak free_insn_page(void *page) { module_memfree(page); } diff --git a/kernel/pid.c b/kernel/pid.c index 4fd07d5b7baf..365281244acc 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -322,8 +322,10 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) + if (pid_ns_prepare_proc(ns)) { + disable_pid_allocation(ns); goto out_free; + } } get_pid_ns(ns); diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index 276762f3a460..e035876af5e2 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c @@ -2,12 +2,13 @@ #include <linux/kernel.h> #include <linux/console.h> +#include <linux/errno.h> #include <linux/string.h> #include "console_cmdline.h" #include "braille.h" -char *_braille_console_setup(char **str, char **brl_options) +int _braille_console_setup(char **str, char **brl_options) { if (!memcmp(*str, "brl,", 4)) { *brl_options = ""; @@ -15,14 +16,14 @@ char *_braille_console_setup(char **str, char **brl_options) } else if (!memcmp(str, "brl=", 4)) { *brl_options = *str + 4; *str = strchr(*brl_options, ','); - if (!*str) + if (!*str) { pr_err("need port name after brl=\n"); - else - *((*str)++) = 0; - } else - return NULL; + return -EINVAL; + } + *((*str)++) = 0; + } - return *str; + return 0; } int diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h index 769d771145c8..749a6756843a 100644 --- a/kernel/printk/braille.h +++ b/kernel/printk/braille.h @@ -9,7 +9,14 @@ braille_set_options(struct console_cmdline *c, char *brl_options) c->brl_options = brl_options; } -char * +/* + * Setup console according to braille options. + * Return -EINVAL on syntax error, 0 on success (or no braille option was + * actually given). + * Modifies str to point to the serial options + * Sets brl_options to the parsed braille options. + */ +int _braille_console_setup(char **str, char **brl_options); int @@ -25,10 +32,10 @@ braille_set_options(struct console_cmdline *c, char *brl_options) { } -static inline char * +static inline int _braille_console_setup(char **str, char **brl_options) { - return NULL; + return 0; } static inline int diff --git a/kernel/profile.c b/kernel/profile.c index 99513e1160e5..9cd8e18e6f18 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -44,7 +44,7 @@ int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); static cpumask_var_t prof_cpu_mask; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); static DEFINE_PER_CPU(int, cpu_profile_flip); static DEFINE_MUTEX(profile_flip_mutex); @@ -201,7 +201,7 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n) } EXPORT_SYMBOL_GPL(profile_event_unregister); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) /* * Each cpu has a pair of open-addressed hashtables for pending * profile hits. read_profile() IPI's all cpus to request them diff --git a/kernel/resource.c b/kernel/resource.c index cbf725c24c3b..39ee5aeaf1e3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -600,7 +600,8 @@ static int __find_resource(struct resource *root, struct resource *old, alloc.start = constraint->alignf(constraint->alignf_data, &avail, size, constraint->align); alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { + if (alloc.start <= alloc.end && + resource_contains(&avail, &alloc)) { new->start = alloc.start; new->end = alloc.end; return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 29f52489af11..cf5071fbecd9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -661,7 +661,8 @@ void resched_cpu(int cpu) unsigned long flags; raw_spin_lock_irqsave(&rq->lock, flags); - resched_curr(rq); + if (cpu_online(cpu) || cpu == smp_processor_id()) + resched_curr(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 2c6be169bdc7..b0bcc7f89628 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -294,7 +294,7 @@ EXPORT_SYMBOL_GPL(__ktime_divns); */ ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) { - ktime_t res = ktime_add(lhs, rhs); + ktime_t res = ktime_add_unsafe(lhs, rhs); /* * We use KTIME_SEC_MAX here, the maximum timeout which we can @@ -1261,7 +1261,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, cpu_base = raw_cpu_ptr(&hrtimer_bases); - if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) + /* + * POSIX magic: Relative CLOCK_REALTIME timers are not affected by + * clock modifications, so they needs to become CLOCK_MONOTONIC to + * ensure POSIX compliance. + */ + if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; base = hrtimer_clockid_to_base(clock_id); diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 0f5d7eae61f0..e6799547c354 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -501,18 +501,22 @@ static struct pid *good_sigevent(sigevent_t * event) struct task_struct *rtn = current->group_leader; int sig = event->sigev_signo; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || - sig_kernel_coredump(sig))) - return NULL; - - return task_pid(rtn); + } } void posix_timers_register_clock(const clockid_t clock_id, @@ -740,8 +744,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) /* interval timer ? */ if (iv.tv64) cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE) return; now = timer->base->get_time(); @@ -752,7 +755,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * expiry is > now. */ if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + timr->it_sigev_notify == SIGEV_NONE)) timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); remaining = ktime_sub(hrtimer_get_expires(timer), now); @@ -762,7 +765,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (timr->it_sigev_notify != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else cur_setting->it_value = ktime_to_timespec(remaining); @@ -874,7 +877,7 @@ common_timer_set(struct k_itimer *timr, int flags, timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { + if (timr->it_sigev_notify == SIGEV_NONE) { /* Setup correct expiry time for relative timers */ if (mode == HRTIMER_MODE_REL) { hrtimer_add_expires(timer, timer->base->get_time()); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..382b159d8592 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -205,6 +205,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) update_clock_read_data(&rd); + if (sched_clock_timer.function != NULL) { + /* update timeout for clock wrap */ + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + } + r = rate; if (r >= 4000000) { r /= 1000000; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index e878c2e0ba45..e81d45097d05 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> +#include <linux/nmi.h> #include <asm/uaccess.h> @@ -91,6 +92,9 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; + + touch_nmi_watchdog(); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = timerqueue_getnext(&base->active); @@ -202,6 +206,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; + touch_nmi_watchdog(); + SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 483cecfa5c17..1994901ef239 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -57,7 +57,8 @@ static struct tracer_flags blk_tracer_flags = { }; /* Global reference count of probes */ -static atomic_t blk_probes_ref = ATOMIC_INIT(0); +static DEFINE_MUTEX(blk_probe_mutex); +static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); @@ -300,11 +301,26 @@ static void blk_trace_free(struct blk_trace *bt) kfree(bt); } +static void get_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (++blk_probes_ref == 1) + blk_register_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + +static void put_probe_ref(void) +{ + mutex_lock(&blk_probe_mutex); + if (!--blk_probes_ref) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); +} + static void blk_trace_cleanup(struct blk_trace *bt) { blk_trace_free(bt); - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); + put_probe_ref(); } int blk_trace_remove(struct request_queue *q) @@ -527,8 +543,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; } - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; err: @@ -1474,9 +1489,7 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; - if (atomic_dec_and_test(&blk_probes_ref)) - blk_unregister_tracepoints(); - + put_probe_ref(); blk_trace_free(bt); return 0; } @@ -1510,8 +1523,7 @@ static int blk_trace_setup_queue(struct request_queue *q, goto free_bt; } - if (atomic_inc_return(&blk_probes_ref) == 1) - blk_register_tracepoints(); + get_probe_ref(); return 0; free_bt: diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 75c5271a56c2..40bc77080fad 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -332,6 +332,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -383,7 +385,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a9634042c176..24c0673c274d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5729,7 +5729,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5780,14 +5780,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; @@ -6458,6 +6450,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } @@ -6481,7 +6474,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64c1ac17af8f..3deff0445f16 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1821,6 +1821,7 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) { struct ftrace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -1828,15 +1829,28 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9a4aee1d3345..2bdb78ab3bd2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -611,7 +611,7 @@ static int create_trace_kprobe(int argc, char **argv) bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; char *arg; - unsigned long offset = 0; + long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -679,7 +679,7 @@ static int create_trace_kprobe(int argc, char **argv) symbol = argv[1]; /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); - if (ret) { + if (ret || offset < 0 || offset > UINT_MAX) { pr_info("Failed to parse either an address or a symbol.\n"); return ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 1769a81da8a7..741c00b90fdc 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -293,7 +293,7 @@ static fetch_func_t get_fetch_size_function(const struct fetch_type *type, } /* Split symbol and offset. */ -int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) +int traceprobe_split_symbol_offset(char *symbol, long *offset) { char *tmp; int ret; @@ -301,13 +301,11 @@ int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset) if (!offset) return -EINVAL; - tmp = strchr(symbol, '+'); + tmp = strpbrk(symbol, "+-"); if (tmp) { - /* skip sign because kstrtoul doesn't accept '+' */ - ret = kstrtoul(tmp + 1, 0, offset); + ret = kstrtol(tmp, 0, offset); if (ret) return ret; - *tmp = '\0'; } else *offset = 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ab283e146b70..80c4ff36896c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -335,7 +335,7 @@ extern int traceprobe_conflict_field_name(const char *name, extern void traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); -extern int traceprobe_split_symbol_offset(char *symbol, unsigned long *offset); +extern int traceprobe_split_symbol_offset(char *symbol, long *offset); extern ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 6dd022c7b5bc..1b11c3c21a29 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -149,6 +149,8 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, return; ret = strncpy_from_user(dst, src, maxlen); + if (ret == maxlen) + dst[--ret] = '\0'; if (ret < 0) { /* Failed to fetch string */ ((u8 *)get_rloc_data(dest))[0] = '\0'; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 3490407dc7b7..4b12034e15b0 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -185,7 +185,7 @@ static int tracepoint_add_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func); if (IS_ERR(old)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } @@ -218,7 +218,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); if (IS_ERR(old)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0d39b3f7b77a..2c4e90f830c1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4098,6 +4098,22 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) EXPORT_SYMBOL_GPL(workqueue_set_max_active); /** + * current_work - retrieve %current task's work struct + * + * Determine if %current task is a workqueue worker and what it's working on. + * Useful to find out the context that the %current task is running in. + * + * Return: work struct if %current task is a workqueue worker, %NULL otherwise. + */ +struct work_struct *current_work(void) +{ + struct worker *worker = current_wq_worker(); + + return worker ? worker->current_work : NULL; +} +EXPORT_SYMBOL(current_work); + +/** * current_is_workqueue_rescuer - is %current workqueue rescuer? * * Determine whether %current is a workqueue rescuer. Can be used from |