aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c4
-rw-r--r--kernel/cgroup/cgroup.c31
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/power/swap.c7
-rw-r--r--kernel/printk/printk.c9
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/signal.c19
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c62
-rw-r--r--kernel/trace/trace.h64
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/tracing_map.c40
13 files changed, 148 insertions, 109 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e7211b0fa27c..485e319ba742 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -295,6 +295,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
long bpf_jit_limit __read_mostly;
+long bpf_jit_limit_max __read_mostly;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -508,7 +509,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void)
static int __init bpf_jit_charge_init(void)
{
/* Only used as heuristic here to derive limit. */
- bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+ bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
PAGE_SIZE), LONG_MAX);
return 0;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 99783685f3d9..d5044ca33bd0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1599,6 +1599,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
+ u16 dfl_disable_ss_mask = 0;
lockdep_assert_held(&cgroup_mutex);
@@ -1615,8 +1616,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;
+
+ /*
+ * Collect ssid's that need to be disabled from default
+ * hierarchy.
+ */
+ if (ss->root == &cgrp_dfl_root)
+ dfl_disable_ss_mask |= 1 << ssid;
+
} while_each_subsys_mask();
+ if (dfl_disable_ss_mask) {
+ struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
+
+ /*
+ * Controllers from default hierarchy that need to be rebound
+ * are all disabled together in one go.
+ */
+ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
+
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
@@ -1625,10 +1646,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
WARN_ON(!css || cgroup_css(dcgrp, ss));
- /* disable from the source */
- src_root->subsys_mask &= ~(1 << ssid);
- WARN_ON(cgroup_apply_control(scgrp));
- cgroup_finalize_control(scgrp, 0);
+ if (src_root != &cgrp_dfl_root) {
+ /* disable from the source */
+ src_root->subsys_mask &= ~(1 << ssid);
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 236e7900e3fc..0736508d595b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6110,7 +6110,6 @@ void perf_output_sample(struct perf_output_handle *handle,
static u64 perf_virt_to_phys(u64 virt)
{
u64 phys_addr = 0;
- struct page *p = NULL;
if (!virt)
return 0;
@@ -6129,14 +6128,15 @@ static u64 perf_virt_to_phys(u64 virt)
* If failed, leave phys_addr as 0.
*/
if (current->mm != NULL) {
+ struct page *p;
+
pagefault_disable();
- if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
+ if (__get_user_pages_fast(virt, 1, 0, &p) == 1) {
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+ put_page(p);
+ }
pagefault_enable();
}
-
- if (p)
- put_page(p);
}
return phys_addr;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 03e3ab61a2ed..ac0725b1ada7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -713,7 +713,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return NULL;
- hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
+ hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) {
if (class->key == key) {
/*
* Huh! same key, different name? Did someone trample
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index d7cdc426ee38..8b37085a6690 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -292,7 +292,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
return error;
}
-static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
+static int hib_wait_io(struct hib_bio_batch *hb)
{
wait_event(hb->wait, atomic_read(&hb->count) == 0);
return blk_status_to_errno(hb->error);
@@ -1526,9 +1526,10 @@ end:
int swsusp_check(void)
{
int error;
+ void *holder;
hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
- FMODE_READ, NULL);
+ FMODE_READ | FMODE_EXCL, &holder);
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);
@@ -1550,7 +1551,7 @@ int swsusp_check(void)
put:
if (error)
- blkdev_put(hib_resume_bdev, FMODE_READ);
+ blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL);
else
pr_debug("PM: Image signature found, resuming\n");
} else {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f96b22db5fe7..31b5e7919d62 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2092,8 +2092,15 @@ static int __init console_setup(char *str)
char *s, *options, *brl_options = NULL;
int idx;
- if (str[0] == 0)
+ /*
+ * console="" or console=null have been suggested as a way to
+ * disable console output. Use ttynull that has been created
+ * for exacly this purpose.
+ */
+ if (str[0] == 0 || strcmp(str, "null") == 0) {
+ __add_preferred_console("ttynull", 0, NULL, NULL);
return 1;
+ }
if (_braille_console_setup(&str, &brl_options))
return 1;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7cedada731c1..544a1cb66d90 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1852,6 +1852,9 @@ out:
bool cpus_share_cache(int this_cpu, int that_cpu)
{
+ if (this_cpu == that_cpu)
+ return true;
+
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
}
#endif /* CONFIG_SMP */
diff --git a/kernel/signal.c b/kernel/signal.c
index deb36b35c30b..3619ab24644f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1839,16 +1839,6 @@ static inline int may_ptrace_stop(void)
}
/*
- * Return non-zero if there is a SIGKILL that should be waking us up.
- * Called with the siglock held.
- */
-static int sigkill_pending(struct task_struct *tsk)
-{
- return sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
-}
-
-/*
* This must be called with current->sighand->siglock held.
*
* This should be the path for all ptrace stops.
@@ -1873,17 +1863,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
* calling arch_ptrace_stop, so we must release it now.
* To preserve proper semantics, we must do this before
* any signal bookkeeping like checking group_stop_count.
- * Meanwhile, a SIGKILL could come in before we retake the
- * siglock. That must prevent us from sleeping in TASK_TRACED.
- * So after regaining the lock, we must check for SIGKILL.
*/
spin_unlock_irq(&current->sighand->siglock);
arch_ptrace_stop(exit_code, info);
spin_lock_irq(&current->sighand->siglock);
- if (sigkill_pending(current))
- return;
}
+ /*
+ * schedule() will not sleep if there is a pending signal that
+ * can awaken the task.
+ */
set_special_state(TASK_TRACED);
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 50c03c430f1a..7d734b4144fd 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6113,7 +6113,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(TRACE_LIST_START);
if (bit < 0)
return;
@@ -6188,7 +6188,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(TRACE_LIST_START);
if (bit < 0)
return;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 552104837845..cb4f6f9e2705 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1723,8 +1723,15 @@ void tracing_reset_all_online_cpus(void)
}
}
+/*
+ * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
+ * is the tgid last observed corresponding to pid=i.
+ */
static int *tgid_map;
+/* The maximum valid index into tgid_map. */
+static size_t tgid_map_max;
+
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1996,24 +2003,41 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
+static int *trace_find_tgid_ptr(int pid)
+{
+ /*
+ * Pairs with the smp_store_release in set_tracer_flag() to ensure that
+ * if we observe a non-NULL tgid_map then we also observe the correct
+ * tgid_map_max.
+ */
+ int *map = smp_load_acquire(&tgid_map);
+
+ if (unlikely(!map || pid > tgid_map_max))
+ return NULL;
+
+ return &map[pid];
+}
+
int trace_find_tgid(int pid)
{
- if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
- return 0;
+ int *ptr = trace_find_tgid_ptr(pid);
- return tgid_map[pid];
+ return ptr ? *ptr : 0;
}
static int trace_save_tgid(struct task_struct *tsk)
{
+ int *ptr;
+
/* treat recording of idle task as a success */
if (!tsk->pid)
return 1;
- if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
+ ptr = trace_find_tgid_ptr(tsk->pid);
+ if (!ptr)
return 0;
- tgid_map[tsk->pid] = tsk->tgid;
+ *ptr = tsk->tgid;
return 1;
}
@@ -4353,6 +4377,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
+ int *map;
+
if ((mask == TRACE_ITER_RECORD_TGID) ||
(mask == TRACE_ITER_RECORD_CMD))
lockdep_assert_held(&event_mutex);
@@ -4375,9 +4401,19 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
trace_event_enable_cmd_record(enabled);
if (mask == TRACE_ITER_RECORD_TGID) {
- if (!tgid_map)
- tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
- GFP_KERNEL);
+ if (!tgid_map) {
+ tgid_map_max = pid_max;
+ map = kzalloc((tgid_map_max + 1) * sizeof(*tgid_map),
+ GFP_KERNEL);
+
+ /*
+ * Pairs with smp_load_acquire() in
+ * trace_find_tgid_ptr() to ensure that if it observes
+ * the tgid_map we just allocated then it also observes
+ * the corresponding tgid_map_max value.
+ */
+ smp_store_release(&tgid_map, map);
+ }
if (!tgid_map) {
tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
return -ENOMEM;
@@ -4752,18 +4788,14 @@ static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
{
int pid = ++(*pos);
- if (pid > PID_MAX_DEFAULT)
- return NULL;
-
- return &tgid_map[pid];
+ return trace_find_tgid_ptr(pid);
}
static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
{
- if (!tgid_map || *pos > PID_MAX_DEFAULT)
- return NULL;
+ int pid = *pos;
- return &tgid_map[*pos];
+ return trace_find_tgid_ptr(pid);
}
static void saved_tgids_stop(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5097e9406e78..b5a251efd164 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -459,23 +459,8 @@ struct tracer {
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
- *
- * Now if the arch does not suppport a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
*/
enum {
TRACE_BUFFER_BIT,
@@ -488,12 +473,14 @@ enum {
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
+ TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
+ /* Internal use recursion bits */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
+ TRACE_INTERNAL_TRANSITION_BIT,
TRACE_BRANCH_BIT,
/*
@@ -526,12 +513,6 @@ enum {
TRACE_GRAPH_DEPTH_START_BIT,
TRACE_GRAPH_DEPTH_END_BIT,
-
- /*
- * When transitioning between context, the preempt_count() may
- * not be correct. Allow for a single recursion to cover this case.
- */
- TRACE_TRANSITION_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -551,12 +532,18 @@ enum {
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
+
+enum {
+ TRACE_CTX_NMI,
+ TRACE_CTX_IRQ,
+ TRACE_CTX_SOFTIRQ,
+ TRACE_CTX_NORMAL,
+ TRACE_CTX_TRANSITION,
+};
static __always_inline int trace_get_context_bit(void)
{
@@ -564,59 +551,48 @@ static __always_inline int trace_get_context_bit(void)
if (in_interrupt()) {
if (in_nmi())
- bit = 0;
+ bit = TRACE_CTX_NMI;
else if (in_irq())
- bit = 1;
+ bit = TRACE_CTX_IRQ;
else
- bit = 2;
+ bit = TRACE_CTX_SOFTIRQ;
} else
- bit = 3;
+ bit = TRACE_CTX_NORMAL;
return bit;
}
-static __always_inline int trace_test_and_set_recursion(int start, int max)
+static __always_inline int trace_test_and_set_recursion(int start)
{
unsigned int val = current->trace_recursion;
int bit;
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit))) {
/*
* It could be that preempt_count has not been updated during
* a switch between contexts. Allow for a single recursion.
*/
- bit = TRACE_TRANSITION_BIT;
+ bit = start + TRACE_CTX_TRANSITION;
if (trace_recursion_test(bit))
return -1;
trace_recursion_set(bit);
barrier();
- return bit + 1;
+ return bit;
}
- /* Normal check passed, clear the transition to allow it again */
- trace_recursion_clear(TRACE_TRANSITION_BIT);
-
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit + 1;
+ return bit;
}
static __always_inline void trace_clear_recursion(int bit)
{
unsigned int val = current->trace_recursion;
- if (!bit)
- return;
-
- bit--;
bit = 1 << bit;
val &= ~bit;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 27f7ad12c4b1..7a5d3d422215 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -138,7 +138,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
pc = preempt_count();
preempt_disable_notrace();
- bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ bit = trace_test_and_set_recursion(TRACE_FTRACE_START);
if (bit < 0)
goto out;
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 35b2ba07f3c6..379db35838b6 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -703,29 +703,35 @@ int tracing_map_init(struct tracing_map *map)
return err;
}
-static int cmp_entries_dup(const struct tracing_map_sort_entry **a,
- const struct tracing_map_sort_entry **b)
+static int cmp_entries_dup(const void *A, const void *B)
{
+ const struct tracing_map_sort_entry *a, *b;
int ret = 0;
- if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size))
+ a = *(const struct tracing_map_sort_entry **)A;
+ b = *(const struct tracing_map_sort_entry **)B;
+
+ if (memcmp(a->key, b->key, a->elt->map->key_size))
ret = 1;
return ret;
}
-static int cmp_entries_sum(const struct tracing_map_sort_entry **a,
- const struct tracing_map_sort_entry **b)
+static int cmp_entries_sum(const void *A, const void *B)
{
const struct tracing_map_elt *elt_a, *elt_b;
+ const struct tracing_map_sort_entry *a, *b;
struct tracing_map_sort_key *sort_key;
struct tracing_map_field *field;
tracing_map_cmp_fn_t cmp_fn;
void *val_a, *val_b;
int ret = 0;
- elt_a = (*a)->elt;
- elt_b = (*b)->elt;
+ a = *(const struct tracing_map_sort_entry **)A;
+ b = *(const struct tracing_map_sort_entry **)B;
+
+ elt_a = a->elt;
+ elt_b = b->elt;
sort_key = &elt_a->map->sort_key;
@@ -742,18 +748,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a,
return ret;
}
-static int cmp_entries_key(const struct tracing_map_sort_entry **a,
- const struct tracing_map_sort_entry **b)
+static int cmp_entries_key(const void *A, const void *B)
{
const struct tracing_map_elt *elt_a, *elt_b;
+ const struct tracing_map_sort_entry *a, *b;
struct tracing_map_sort_key *sort_key;
struct tracing_map_field *field;
tracing_map_cmp_fn_t cmp_fn;
void *val_a, *val_b;
int ret = 0;
- elt_a = (*a)->elt;
- elt_b = (*b)->elt;
+ a = *(const struct tracing_map_sort_entry **)A;
+ b = *(const struct tracing_map_sort_entry **)B;
+
+ elt_a = a->elt;
+ elt_b = b->elt;
sort_key = &elt_a->map->sort_key;
@@ -926,10 +935,8 @@ static void sort_secondary(struct tracing_map *map,
struct tracing_map_sort_key *primary_key,
struct tracing_map_sort_key *secondary_key)
{
- int (*primary_fn)(const struct tracing_map_sort_entry **,
- const struct tracing_map_sort_entry **);
- int (*secondary_fn)(const struct tracing_map_sort_entry **,
- const struct tracing_map_sort_entry **);
+ int (*primary_fn)(const void *, const void *);
+ int (*secondary_fn)(const void *, const void *);
unsigned i, start = 0, n_sub = 1;
if (is_key(map, primary_key->field_idx))
@@ -998,8 +1005,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
unsigned int n_sort_keys,
struct tracing_map_sort_entry ***sort_entries)
{
- int (*cmp_entries_fn)(const struct tracing_map_sort_entry **,
- const struct tracing_map_sort_entry **);
+ int (*cmp_entries_fn)(const void *, const void *);
struct tracing_map_sort_entry *sort_entry, **entries;
int i, n_entries, ret;