diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/core.c | 4 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 31 | ||||
-rw-r--r-- | kernel/events/core.c | 10 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/power/swap.c | 7 | ||||
-rw-r--r-- | kernel/printk/printk.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 3 | ||||
-rw-r--r-- | kernel/signal.c | 19 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace.c | 62 | ||||
-rw-r--r-- | kernel/trace/trace.h | 64 | ||||
-rw-r--r-- | kernel/trace/trace_functions.c | 2 | ||||
-rw-r--r-- | kernel/trace/tracing_map.c | 40 |
13 files changed, 148 insertions, 109 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e7211b0fa27c..485e319ba742 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -295,6 +295,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; int bpf_jit_kallsyms __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static __always_inline void bpf_get_prog_addr_region(const struct bpf_prog *prog, @@ -508,7 +509,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 99783685f3d9..d5044ca33bd0 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1599,6 +1599,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; int ssid, i, ret; + u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1615,8 +1616,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; + + /* + * Collect ssid's that need to be disabled from default + * hierarchy. + */ + if (ss->root == &cgrp_dfl_root) + dfl_disable_ss_mask |= 1 << ssid; + } while_each_subsys_mask(); + if (dfl_disable_ss_mask) { + struct cgroup *scgrp = &cgrp_dfl_root.cgrp; + + /* + * Controllers from default hierarchy that need to be rebound + * are all disabled together in one go. + */ + cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } + do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; @@ -1625,10 +1646,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); - /* disable from the source */ - src_root->subsys_mask &= ~(1 << ssid); - WARN_ON(cgroup_apply_control(scgrp)); - cgroup_finalize_control(scgrp, 0); + if (src_root != &cgrp_dfl_root) { + /* disable from the source */ + src_root->subsys_mask &= ~(1 << ssid); + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); diff --git a/kernel/events/core.c b/kernel/events/core.c index 236e7900e3fc..0736508d595b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6110,7 +6110,6 @@ void perf_output_sample(struct perf_output_handle *handle, static u64 perf_virt_to_phys(u64 virt) { u64 phys_addr = 0; - struct page *p = NULL; if (!virt) return 0; @@ -6129,14 +6128,15 @@ static u64 perf_virt_to_phys(u64 virt) * If failed, leave phys_addr as 0. */ if (current->mm != NULL) { + struct page *p; + pagefault_disable(); - if (__get_user_pages_fast(virt, 1, 0, &p) == 1) + if (__get_user_pages_fast(virt, 1, 0, &p) == 1) { phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + put_page(p); + } pagefault_enable(); } - - if (p) - put_page(p); } return phys_addr; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 03e3ab61a2ed..ac0725b1ada7 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -713,7 +713,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return NULL; - hlist_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample diff --git a/kernel/power/swap.c b/kernel/power/swap.c index d7cdc426ee38..8b37085a6690 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -292,7 +292,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, return error; } -static blk_status_t hib_wait_io(struct hib_bio_batch *hb) +static int hib_wait_io(struct hib_bio_batch *hb) { wait_event(hb->wait, atomic_read(&hb->count) == 0); return blk_status_to_errno(hb->error); @@ -1526,9 +1526,10 @@ end: int swsusp_check(void) { int error; + void *holder; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - FMODE_READ, NULL); + FMODE_READ | FMODE_EXCL, &holder); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); @@ -1550,7 +1551,7 @@ int swsusp_check(void) put: if (error) - blkdev_put(hib_resume_bdev, FMODE_READ); + blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL); else pr_debug("PM: Image signature found, resuming\n"); } else { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f96b22db5fe7..31b5e7919d62 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2092,8 +2092,15 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; - if (str[0] == 0) + /* + * console="" or console=null have been suggested as a way to + * disable console output. Use ttynull that has been created + * for exacly this purpose. + */ + if (str[0] == 0 || strcmp(str, "null") == 0) { + __add_preferred_console("ttynull", 0, NULL, NULL); return 1; + } if (_braille_console_setup(&str, &brl_options)) return 1; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7cedada731c1..544a1cb66d90 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1852,6 +1852,9 @@ out: bool cpus_share_cache(int this_cpu, int that_cpu) { + if (this_cpu == that_cpu) + return true; + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } #endif /* CONFIG_SMP */ diff --git a/kernel/signal.c b/kernel/signal.c index deb36b35c30b..3619ab24644f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1839,16 +1839,6 @@ static inline int may_ptrace_stop(void) } /* - * Return non-zero if there is a SIGKILL that should be waking us up. - * Called with the siglock held. - */ -static int sigkill_pending(struct task_struct *tsk) -{ - return sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL); -} - -/* * This must be called with current->sighand->siglock held. * * This should be the path for all ptrace stops. @@ -1873,17 +1863,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) * calling arch_ptrace_stop, so we must release it now. * To preserve proper semantics, we must do this before * any signal bookkeeping like checking group_stop_count. - * Meanwhile, a SIGKILL could come in before we retake the - * siglock. That must prevent us from sleeping in TASK_TRACED. - * So after regaining the lock, we must check for SIGKILL. */ spin_unlock_irq(¤t->sighand->siglock); arch_ptrace_stop(exit_code, info); spin_lock_irq(¤t->sighand->siglock); - if (sigkill_pending(current)) - return; } + /* + * schedule() will not sleep if there is a pending signal that + * can awaken the task. + */ set_special_state(TASK_TRACED); /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 50c03c430f1a..7d734b4144fd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6113,7 +6113,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; @@ -6188,7 +6188,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 552104837845..cb4f6f9e2705 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1723,8 +1723,15 @@ void tracing_reset_all_online_cpus(void) } } +/* + * The tgid_map array maps from pid to tgid; i.e. the value stored at index i + * is the tgid last observed corresponding to pid=i. + */ static int *tgid_map; +/* The maximum valid index into tgid_map. */ +static size_t tgid_map_max; + #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -1996,24 +2003,41 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } +static int *trace_find_tgid_ptr(int pid) +{ + /* + * Pairs with the smp_store_release in set_tracer_flag() to ensure that + * if we observe a non-NULL tgid_map then we also observe the correct + * tgid_map_max. + */ + int *map = smp_load_acquire(&tgid_map); + + if (unlikely(!map || pid > tgid_map_max)) + return NULL; + + return &map[pid]; +} + int trace_find_tgid(int pid) { - if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT)) - return 0; + int *ptr = trace_find_tgid_ptr(pid); - return tgid_map[pid]; + return ptr ? *ptr : 0; } static int trace_save_tgid(struct task_struct *tsk) { + int *ptr; + /* treat recording of idle task as a success */ if (!tsk->pid) return 1; - if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT)) + ptr = trace_find_tgid_ptr(tsk->pid); + if (!ptr) return 0; - tgid_map[tsk->pid] = tsk->tgid; + *ptr = tsk->tgid; return 1; } @@ -4353,6 +4377,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { + int *map; + if ((mask == TRACE_ITER_RECORD_TGID) || (mask == TRACE_ITER_RECORD_CMD)) lockdep_assert_held(&event_mutex); @@ -4375,9 +4401,19 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) trace_event_enable_cmd_record(enabled); if (mask == TRACE_ITER_RECORD_TGID) { - if (!tgid_map) - tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map), - GFP_KERNEL); + if (!tgid_map) { + tgid_map_max = pid_max; + map = kzalloc((tgid_map_max + 1) * sizeof(*tgid_map), + GFP_KERNEL); + + /* + * Pairs with smp_load_acquire() in + * trace_find_tgid_ptr() to ensure that if it observes + * the tgid_map we just allocated then it also observes + * the corresponding tgid_map_max value. + */ + smp_store_release(&tgid_map, map); + } if (!tgid_map) { tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; return -ENOMEM; @@ -4752,18 +4788,14 @@ static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) { int pid = ++(*pos); - if (pid > PID_MAX_DEFAULT) - return NULL; - - return &tgid_map[pid]; + return trace_find_tgid_ptr(pid); } static void *saved_tgids_start(struct seq_file *m, loff_t *pos) { - if (!tgid_map || *pos > PID_MAX_DEFAULT) - return NULL; + int pid = *pos; - return &tgid_map[*pos]; + return trace_find_tgid_ptr(pid); } static void saved_tgids_stop(struct seq_file *m, void *v) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5097e9406e78..b5a251efd164 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -459,23 +459,8 @@ struct tracer { * When function tracing occurs, the following steps are made: * If arch does not support a ftrace feature: * call internal function (uses INTERNAL bits) which calls... - * If callback is registered to the "global" list, the list - * function is called and recursion checks the GLOBAL bits. - * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not suppport a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { TRACE_BUFFER_BIT, @@ -488,12 +473,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, - /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* @@ -526,12 +513,6 @@ enum { TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, - - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -551,12 +532,18 @@ enum { #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) #define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, +}; static __always_inline int trace_get_context_bit(void) { @@ -564,59 +551,48 @@ static __always_inline int trace_get_context_bit(void) if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = TRACE_CTX_NMI; else if (in_irq()) - bit = 1; + bit = TRACE_CTX_IRQ; else - bit = 2; + bit = TRACE_CTX_SOFTIRQ; } else - bit = 3; + bit = TRACE_CTX_NORMAL; return bit; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; - /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ - bit = TRACE_TRANSITION_BIT; + bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); - return bit + 1; + return bit; } - /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit + 1; + return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; - if (!bit) - return; - - bit--; bit = 1 << bit; val &= ~bit; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 27f7ad12c4b1..7a5d3d422215 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -138,7 +138,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, pc = preempt_count(); preempt_disable_notrace(); - bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + bit = trace_test_and_set_recursion(TRACE_FTRACE_START); if (bit < 0) goto out; diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 35b2ba07f3c6..379db35838b6 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -703,29 +703,35 @@ int tracing_map_init(struct tracing_map *map) return err; } -static int cmp_entries_dup(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_dup(const void *A, const void *B) { + const struct tracing_map_sort_entry *a, *b; int ret = 0; - if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + if (memcmp(a->key, b->key, a->elt->map->key_size)) ret = 1; return ret; } -static int cmp_entries_sum(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_sum(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -742,18 +748,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a, return ret; } -static int cmp_entries_key(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_key(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -926,10 +935,8 @@ static void sort_secondary(struct tracing_map *map, struct tracing_map_sort_key *primary_key, struct tracing_map_sort_key *secondary_key) { - int (*primary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); - int (*secondary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*primary_fn)(const void *, const void *); + int (*secondary_fn)(const void *, const void *); unsigned i, start = 0, n_sub = 1; if (is_key(map, primary_key->field_idx)) @@ -998,8 +1005,7 @@ int tracing_map_sort_entries(struct tracing_map *map, unsigned int n_sort_keys, struct tracing_map_sort_entry ***sort_entries) { - int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*cmp_entries_fn)(const void *, const void *); struct tracing_map_sort_entry *sort_entry, **entries; int i, n_entries, ret; |