aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c31
-rw-r--r--kernel/bpf/arraymap.c12
-rw-r--r--kernel/bpf/cpumap.c3
-rw-r--r--kernel/bpf/hashtab.c20
-rw-r--r--kernel/bpf/helpers.c18
-rw-r--r--kernel/bpf/map_in_map.c2
-rw-r--r--kernel/bpf/map_in_map.h2
-rw-r--r--kernel/bpf/stackmap.c9
-rw-r--r--kernel/events/core.c38
-rw-r--r--kernel/power/swap.c38
-rw-r--r--kernel/sched/membarrier.c9
-rw-r--r--kernel/sched/rt.c10
-rw-r--r--kernel/sys.c91
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/time/hrtimer.c3
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/time/timekeeping.c24
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c78
-rw-r--r--kernel/trace/trace_events_trigger.c6
-rw-r--r--kernel/trace/tracing_map.c7
21 files changed, 254 insertions, 158 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 805b0c9972d3..39e84d65d253 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -484,15 +484,19 @@ static void auditd_conn_free(struct rcu_head *rcu)
* @pid: auditd PID
* @portid: auditd netlink portid
* @net: auditd network namespace pointer
+ * @skb: the netlink command from the audit daemon
+ * @ack: netlink ack flag, cleared if ack'd here
*
* Description:
* This function will obtain and drop network namespace references as
* necessary. Returns zero on success, negative values on failure.
*/
-static int auditd_set(struct pid *pid, u32 portid, struct net *net)
+static int auditd_set(struct pid *pid, u32 portid, struct net *net,
+ struct sk_buff *skb, bool *ack)
{
unsigned long flags;
struct auditd_connection *ac_old, *ac_new;
+ struct nlmsghdr *nlh;
if (!pid || !net)
return -EINVAL;
@@ -504,6 +508,13 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net)
ac_new->portid = portid;
ac_new->net = get_net(net);
+ /* send the ack now to avoid a race with the queue backlog */
+ if (*ack) {
+ nlh = nlmsg_hdr(skb);
+ netlink_ack(skb, nlh, 0, NULL);
+ *ack = false;
+ }
+
spin_lock_irqsave(&auditd_conn_lock, flags);
ac_old = rcu_dereference_protected(auditd_conn,
lockdep_is_held(&auditd_conn_lock));
@@ -1198,7 +1209,8 @@ static int audit_replace(struct pid *pid)
return auditd_send_unicast_skb(skb);
}
-static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ bool *ack)
{
u32 seq;
void *data;
@@ -1290,7 +1302,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* register a new auditd connection */
err = auditd_set(req_pid,
NETLINK_CB(skb).portid,
- sock_net(NETLINK_CB(skb).sk));
+ sock_net(NETLINK_CB(skb).sk),
+ skb, ack);
if (audit_enabled != AUDIT_OFF)
audit_log_config_change("audit_pid",
new_pid,
@@ -1529,9 +1542,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
* Parse the provided skb and deal with any messages that may be present,
* malformed skbs are discarded.
*/
-static void audit_receive(struct sk_buff *skb)
+static void audit_receive(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
+ bool ack;
/*
* len MUST be signed for nlmsg_next to be able to dec it below 0
* if the nlmsg_len was not aligned
@@ -1544,9 +1558,12 @@ static void audit_receive(struct sk_buff *skb)
audit_ctl_lock();
while (nlmsg_ok(nlh, len)) {
- err = audit_receive_msg(skb, nlh);
- /* if err or if this message says it wants a response */
- if (err || (nlh->nlmsg_flags & NLM_F_ACK))
+ ack = nlh->nlmsg_flags & NLM_F_ACK;
+ err = audit_receive_msg(skb, nlh, &ack);
+
+ /* send an ack if the user asked for one and audit_receive_msg
+ * didn't already do it, or if there was an error. */
+ if (ack || err)
netlink_ack(skb, nlh, err, NULL);
nlh = nlmsg_next(nlh, &len);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..81ed9b79f401 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -542,7 +542,7 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
old_ptr = xchg(array->ptrs + index, new_ptr);
if (old_ptr)
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
@@ -558,7 +558,7 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
old_ptr = xchg(array->ptrs + index, NULL);
if (old_ptr) {
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
} else {
return -ENOENT;
@@ -582,8 +582,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
return prog;
}
-static void prog_fd_array_put_ptr(void *ptr)
+static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
@@ -694,8 +695,9 @@ err_out:
return ee;
}
-static void perf_event_fd_array_put_ptr(void *ptr)
+static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@@ -736,7 +738,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
-static void cgroup_fd_array_put_ptr(void *ptr)
+static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a367fc850393..19be747f4e5a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -249,6 +249,7 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
+ unsigned long last_qs = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
@@ -271,10 +272,12 @@ static int cpu_map_kthread_run(void *data)
if (__ptr_ring_empty(rcpu->queue)) {
schedule();
sched = 1;
+ last_qs = jiffies;
} else {
__set_current_state(TASK_RUNNING);
}
} else {
+ rcu_softirq_qs_periodic(last_qs);
sched = cond_resched();
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 03a67583f6fb..34c4f709b1ed 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -327,7 +327,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
num_possible_cpus());
}
- /* hash table size must be power of 2 */
+ /* hash table size must be power of 2; roundup_pow_of_two() can overflow
+ * into UB on 32-bit arches, so check that first
+ */
+ err = -E2BIG;
+ if (htab->map.max_entries > 1UL << 31)
+ goto free_htab;
+
htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
htab->elem_size = sizeof(struct htab_elem) +
@@ -337,10 +343,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
htab->elem_size += round_up(htab->map.value_size, 8);
- err = -E2BIG;
- /* prevent zero size kmalloc and check for u32 overflow */
- if (htab->n_buckets == 0 ||
- htab->n_buckets > U32_MAX / sizeof(struct bucket))
+ /* check for u32 overflow */
+ if (htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
cost = (u64) htab->n_buckets * sizeof(struct bucket) +
@@ -674,7 +678,7 @@ static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
if (map->ops->map_fd_put_ptr) {
ptr = fd_htab_map_get_ptr(map, l);
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, true);
}
}
@@ -1426,7 +1430,7 @@ static void fd_htab_map_free(struct bpf_map *map)
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
void *ptr = fd_htab_map_get_ptr(map, l);
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, false);
}
}
@@ -1467,7 +1471,7 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
ret = htab_map_update_elem(map, key, &ptr, map_flags);
if (ret)
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, false);
return ret;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a77d2814cac5..9bfb4685d068 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -262,13 +262,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
static DEFINE_PER_CPU(unsigned long, irqsave_flags);
-notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
{
unsigned long flags;
local_irq_save(flags);
__bpf_spin_lock(lock);
__this_cpu_write(irqsave_flags, flags);
+}
+
+NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+ __bpf_spin_lock_irqsave(lock);
return 0;
}
@@ -279,13 +284,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
};
-notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
{
unsigned long flags;
flags = __this_cpu_read(irqsave_flags);
__bpf_spin_unlock(lock);
local_irq_restore(flags);
+}
+
+NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+ __bpf_spin_unlock_irqrestore(lock);
return 0;
}
@@ -306,9 +316,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
else
lock = dst + map->spin_lock_off;
preempt_disable();
- ____bpf_spin_lock(lock);
+ __bpf_spin_lock_irqsave(lock);
copy_map_value(map, dst, src);
- ____bpf_spin_unlock(lock);
+ __bpf_spin_unlock_irqrestore(lock);
preempt_enable();
}
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index fab4fb134547..7fe5a73aff07 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -106,7 +106,7 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
return inner_map;
}
-void bpf_map_fd_put_ptr(void *ptr)
+void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* ptr->ops->map_free() has to go through one
* rcu grace period by itself.
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index a507bf6ef8b9..d296890813cc 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -15,7 +15,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1);
void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
int ufd);
-void bpf_map_fd_put_ptr(void *ptr);
+void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer);
u32 bpf_map_fd_sys_lookup_elem(void *ptr);
#endif
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 811071c227f1..bd8516d96745 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -111,11 +111,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
} else if (value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
- /* hash table size must be power of 2 */
- n_buckets = roundup_pow_of_two(attr->max_entries);
- if (!n_buckets)
+ /* hash table size must be power of 2; roundup_pow_of_two() can overflow
+ * into UB on 32-bit arches, so check that first
+ */
+ if (attr->max_entries > 1UL << 31)
return ERR_PTR(-E2BIG);
+ n_buckets = roundup_pow_of_two(attr->max_entries);
+
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
err = bpf_map_charge_init(&mem, cost + attr->max_entries *
(sizeof(struct stack_map_bucket) + (u64)value_size));
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1e62a567b0d7..f18a5bbc66ef 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10080,9 +10080,30 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
static struct attribute *pmu_dev_attrs[] = {
&dev_attr_type.attr,
&dev_attr_perf_event_mux_interval_ms.attr,
+ &dev_attr_nr_addr_filters.attr,
+ NULL,
+};
+
+static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ if (n == 2 && !pmu->nr_addr_filters)
+ return 0;
+
+ return a->mode;
+}
+
+static struct attribute_group pmu_dev_attr_group = {
+ .is_visible = pmu_dev_is_visible,
+ .attrs = pmu_dev_attrs,
+};
+
+static const struct attribute_group *pmu_dev_groups[] = {
+ &pmu_dev_attr_group,
NULL,
};
-ATTRIBUTE_GROUPS(pmu_dev);
static int pmu_bus_running;
static struct bus_type pmu_bus = {
@@ -10118,18 +10139,11 @@ static int pmu_dev_alloc(struct pmu *pmu)
if (ret)
goto free_dev;
- /* For PMUs with address filters, throw in an extra attribute: */
- if (pmu->nr_addr_filters)
- ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
-
- if (ret)
- goto del_dev;
-
- if (pmu->attr_update)
+ if (pmu->attr_update) {
ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
-
- if (ret)
- goto del_dev;
+ if (ret)
+ goto del_dev;
+ }
out:
return ret;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index bcc9769e8a3b..85949b86f097 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -594,11 +594,11 @@ static int crc32_threadfn(void *data)
unsigned i;
while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -607,7 +607,7 @@ static int crc32_threadfn(void *data)
for (i = 0; i < d->run_threads; i++)
*d->crc32 = crc32_le(*d->crc32,
d->unc[i], *d->unc_len[i]);
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -637,12 +637,12 @@ static int lzo_compress_threadfn(void *data)
struct cmp_data *d = data;
while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
d->ret = -1;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -651,7 +651,7 @@ static int lzo_compress_threadfn(void *data)
d->ret = lzo1x_1_compress(d->unc, d->unc_len,
d->cmp + LZO_HEADER, &d->cmp_len,
d->wrk);
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -789,7 +789,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
data[thr].unc_len = off;
- atomic_set(&data[thr].ready, 1);
+ atomic_set_release(&data[thr].ready, 1);
wake_up(&data[thr].go);
}
@@ -797,12 +797,12 @@ static int save_image_lzo(struct swap_map_handle *handle,
break;
crc->run_threads = thr;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);
for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
wait_event(data[thr].done,
- atomic_read(&data[thr].stop));
+ atomic_read_acquire(&data[thr].stop));
atomic_set(&data[thr].stop, 0);
ret = data[thr].ret;
@@ -841,7 +841,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
}
}
- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
}
@@ -1121,12 +1121,12 @@ static int lzo_decompress_threadfn(void *data)
struct dec_data *d = data;
while (1) {
- wait_event(d->go, atomic_read(&d->ready) ||
+ wait_event(d->go, atomic_read_acquire(&d->ready) ||
kthread_should_stop());
if (kthread_should_stop()) {
d->thr = NULL;
d->ret = -1;
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
break;
}
@@ -1139,7 +1139,7 @@ static int lzo_decompress_threadfn(void *data)
flush_icache_range((unsigned long)d->unc,
(unsigned long)d->unc + d->unc_len);
- atomic_set(&d->stop, 1);
+ atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
return 0;
@@ -1327,7 +1327,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
if (crc->run_threads) {
- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
crc->run_threads = 0;
}
@@ -1363,7 +1363,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
pg = 0;
}
- atomic_set(&data[thr].ready, 1);
+ atomic_set_release(&data[thr].ready, 1);
wake_up(&data[thr].go);
}
@@ -1382,7 +1382,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
wait_event(data[thr].done,
- atomic_read(&data[thr].stop));
+ atomic_read_acquire(&data[thr].stop));
atomic_set(&data[thr].stop, 0);
ret = data[thr].ret;
@@ -1413,7 +1413,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
ret = snapshot_write_next(snapshot);
if (ret <= 0) {
crc->run_threads = thr + 1;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);
goto out_finish;
}
@@ -1421,13 +1421,13 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
crc->run_threads = thr;
- atomic_set(&crc->ready, 1);
+ atomic_set_release(&crc->ready, 1);
wake_up(&crc->go);
}
out_finish:
if (crc->run_threads) {
- wait_event(crc->done, atomic_read(&crc->stop));
+ wait_event(crc->done, atomic_read_acquire(&crc->stop));
atomic_set(&crc->stop, 0);
}
stop = ktime_get();
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 46c142b69598..ca52041d5899 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -25,6 +25,8 @@
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
+static DEFINE_MUTEX(membarrier_ipi_mutex);
+
static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
@@ -97,6 +99,7 @@ static int membarrier_global_expedited(void)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -143,6 +146,8 @@ static int membarrier_global_expedited(void)
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
+ mutex_unlock(&membarrier_ipi_mutex);
+
return 0;
}
@@ -178,6 +183,7 @@ static int membarrier_private_expedited(int flags)
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
return -ENOMEM;
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -212,6 +218,7 @@ static int membarrier_private_expedited(int flags)
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
+ mutex_unlock(&membarrier_ipi_mutex);
return 0;
}
@@ -253,6 +260,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
* between threads which are users of @mm has its membarrier state
* updated.
*/
+ mutex_lock(&membarrier_ipi_mutex);
cpus_read_lock();
rcu_read_lock();
for_each_online_cpu(cpu) {
@@ -269,6 +277,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
free_cpumask_var(tmpmask);
cpus_read_unlock();
+ mutex_unlock(&membarrier_ipi_mutex);
return 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 186e7d78ded5..873f364cb3c6 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -8,7 +8,7 @@
#include "pelt.h"
int sched_rr_timeslice = RR_TIMESLICE;
-int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
/* More than 4 hours if BW_SHIFT equals 20. */
static const u64 max_rt_runtime = MAX_BW;
@@ -2659,9 +2659,6 @@ static int sched_rt_global_constraints(void)
static int sched_rt_global_validate(void)
{
- if (sysctl_sched_rt_period <= 0)
- return -EINVAL;
-
if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
((u64)sysctl_sched_rt_runtime *
@@ -2693,7 +2690,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
old_period = sysctl_sched_rt_period;
old_runtime = sysctl_sched_rt_runtime;
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write) {
ret = sched_rt_global_validate();
@@ -2738,6 +2735,9 @@ int sched_rr_handler(struct ctl_table *table, int write,
sched_rr_timeslice =
sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
msecs_to_jiffies(sysctl_sched_rr_timeslice);
+
+ if (sysctl_sched_rr_timeslice <= 0)
+ sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE);
}
mutex_unlock(&mutex);
diff --git a/kernel/sys.c b/kernel/sys.c
index f6d6ce8da3e4..23e88587df87 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1711,74 +1711,87 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
struct task_struct *t;
unsigned long flags;
u64 tgutime, tgstime, utime, stime;
- unsigned long maxrss = 0;
+ unsigned long maxrss;
+ struct mm_struct *mm;
+ struct signal_struct *sig = p->signal;
+ unsigned int seq = 0;
- memset((char *)r, 0, sizeof (*r));
+retry:
+ memset(r, 0, sizeof(*r));
utime = stime = 0;
+ maxrss = 0;
if (who == RUSAGE_THREAD) {
task_cputime_adjusted(current, &utime, &stime);
accumulate_thread_rusage(p, r);
- maxrss = p->signal->maxrss;
- goto out;
+ maxrss = sig->maxrss;
+ goto out_thread;
}
- if (!lock_task_sighand(p, &flags))
- return;
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
switch (who) {
case RUSAGE_BOTH:
case RUSAGE_CHILDREN:
- utime = p->signal->cutime;
- stime = p->signal->cstime;
- r->ru_nvcsw = p->signal->cnvcsw;
- r->ru_nivcsw = p->signal->cnivcsw;
- r->ru_minflt = p->signal->cmin_flt;
- r->ru_majflt = p->signal->cmaj_flt;
- r->ru_inblock = p->signal->cinblock;
- r->ru_oublock = p->signal->coublock;
- maxrss = p->signal->cmaxrss;
+ utime = sig->cutime;
+ stime = sig->cstime;
+ r->ru_nvcsw = sig->cnvcsw;
+ r->ru_nivcsw = sig->cnivcsw;
+ r->ru_minflt = sig->cmin_flt;
+ r->ru_majflt = sig->cmaj_flt;
+ r->ru_inblock = sig->cinblock;
+ r->ru_oublock = sig->coublock;
+ maxrss = sig->cmaxrss;
if (who == RUSAGE_CHILDREN)
break;
/* fall through */
case RUSAGE_SELF:
- thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- utime += tgutime;
- stime += tgstime;
- r->ru_nvcsw += p->signal->nvcsw;
- r->ru_nivcsw += p->signal->nivcsw;
- r->ru_minflt += p->signal->min_flt;
- r->ru_majflt += p->signal->maj_flt;
- r->ru_inblock += p->signal->inblock;
- r->ru_oublock += p->signal->oublock;
- if (maxrss < p->signal->maxrss)
- maxrss = p->signal->maxrss;
- t = p;
- do {
+ r->ru_nvcsw += sig->nvcsw;
+ r->ru_nivcsw += sig->nivcsw;
+ r->ru_minflt += sig->min_flt;
+ r->ru_majflt += sig->maj_flt;
+ r->ru_inblock += sig->inblock;
+ r->ru_oublock += sig->oublock;
+ if (maxrss < sig->maxrss)
+ maxrss = sig->maxrss;
+
+ rcu_read_lock();
+ __for_each_thread(sig, t)
accumulate_thread_rusage(t, r);
- } while_each_thread(p, t);
+ rcu_read_unlock();
+
break;
default:
BUG();
}
- unlock_task_sighand(p, &flags);
-out:
- r->ru_utime = ns_to_timeval(utime);
- r->ru_stime = ns_to_timeval(stime);
+ if (need_seqretry(&sig->stats_lock, seq)) {
+ seq = 1;
+ goto retry;
+ }
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
- if (who != RUSAGE_CHILDREN) {
- struct mm_struct *mm = get_task_mm(p);
+ if (who == RUSAGE_CHILDREN)
+ goto out_children;
- if (mm) {
- setmax_mm_hiwater_rss(&maxrss, mm);
- mmput(mm);
- }
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
+ utime += tgutime;
+ stime += tgstime;
+
+out_thread:
+ mm = get_task_mm(p);
+ if (mm) {
+ setmax_mm_hiwater_rss(&maxrss, mm);
+ mmput(mm);
}
+
+out_children:
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
+ r->ru_utime = ns_to_kernel_old_timeval(utime);
+ r->ru_stime = ns_to_kernel_old_timeval(stime);
}
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4f85f7ed42fc..865e539c7354 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -465,6 +465,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_rt_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "sched_rt_runtime_us",
@@ -472,6 +474,8 @@ static struct ctl_table kern_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = sched_rt_handler,
+ .extra1 = &neg_one,
+ .extra2 = SYSCTL_INT_MAX,
},
{
.procname = "sched_rr_timeslice_ms",
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e2a055e46255..1b301dd1692b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -981,6 +981,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
enum hrtimer_mode mode)
{
debug_activate(timer, mode);
+ WARN_ON_ONCE(!base->cpu_base->online);
base->cpu_base->active_bases |= 1 << base->index;
@@ -2069,6 +2070,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
+ cpu_base->online = 1;
hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
@@ -2136,6 +2138,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
raw_spin_unlock(&new_base->lock);
+ old_base->online = 0;
raw_spin_unlock(&old_base->lock);
return 0;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5f718a17a3e4..c7b9b1e4af7d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1398,6 +1398,7 @@ void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t idle_sleeptime, iowait_sleeptime;
+ unsigned long idle_calls, idle_sleeps;
# ifdef CONFIG_HIGH_RES_TIMERS
if (ts->sched_timer.base)
@@ -1406,9 +1407,13 @@ void tick_cancel_sched_timer(int cpu)
idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
+ idle_calls = ts->idle_calls;
+ idle_sleeps = ts->idle_sleeps;
memset(ts, 0, sizeof(*ts));
ts->idle_sleeptime = idle_sleeptime;
ts->iowait_sleeptime = iowait_sleeptime;
+ ts->idle_calls = idle_calls;
+ ts->idle_sleeps = idle_sleeps;
}
#endif
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 105dd0b66329..c202dbd87860 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1093,13 +1093,15 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history,
}
/*
- * cycle_between - true if test occurs chronologically between before and after
+ * timestamp_in_interval - true if ts is chronologically in [start, end]
+ *
+ * True if ts occurs chronologically at or after start, and before or at end.
*/
-static bool cycle_between(u64 before, u64 test, u64 after)
+static bool timestamp_in_interval(u64 start, u64 end, u64 ts)
{
- if (test > before && test < after)
+ if (ts >= start && ts <= end)
return true;
- if (test < before && before > after)
+ if (start > end && (ts >= start || ts <= end))
return true;
return false;
}
@@ -1159,7 +1161,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
*/
now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last;
- if (!cycle_between(interval_start, cycles, now)) {
+ if (!timestamp_in_interval(interval_start, now, cycles)) {
clock_was_set_seq = tk->clock_was_set_seq;
cs_was_changed_seq = tk->cs_was_changed_seq;
cycles = interval_start;
@@ -1172,10 +1174,8 @@ int get_device_system_crosststamp(int (*get_time_fn)
tk_core.timekeeper.offs_real);
base_raw = tk->tkr_raw.base;
- nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
- system_counterval.cycles);
- nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
- system_counterval.cycles);
+ nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles);
+ nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
@@ -1190,13 +1190,13 @@ int get_device_system_crosststamp(int (*get_time_fn)
bool discontinuity;
/*
- * Check that the counter value occurs after the provided
+ * Check that the counter value is not before the provided
* history reference and that the history doesn't cross a
* clocksource change
*/
if (!history_begin ||
- !cycle_between(history_begin->cycles,
- system_counterval.cycles, cycles) ||
+ !timestamp_in_interval(history_begin->cycles,
+ cycles, system_counterval.cycles) ||
history_begin->cs_was_changed_seq != cs_was_changed_seq)
return -EINVAL;
partial_history_cycles = cycles - system_counterval.cycles;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0c0ca21d807d..ed505c6de7ca 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -738,7 +738,7 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
- return -EINVAL;
+ return EPOLLERR;
cpu_buffer = buffer->buffers[cpu];
work = &cpu_buffer->irq_work;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 35c150085556..6fd7dca57dd9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/init.h>
+#include <linux/kmemleak.h>
#include <linux/poll.h>
#include <linux/nmi.h>
#include <linux/fs.h>
@@ -1977,7 +1978,7 @@ struct saved_cmdlines_buffer {
unsigned *map_cmdline_to_pid;
unsigned cmdline_num;
int cmdline_idx;
- char *saved_cmdlines;
+ char saved_cmdlines[];
};
static struct saved_cmdlines_buffer *savedcmd;
@@ -1991,47 +1992,60 @@ static inline void set_cmdline(int idx, const char *cmdline)
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
-static int allocate_cmdlines_buffer(unsigned int val,
- struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kfree(s->map_cmdline_to_pid);
+ kmemleak_free(s);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
+{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ kmemleak_alloc(s, size, 1, GFP_KERNEL);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / TASK_COMM_LEN;
+ s->cmdline_num = val;
+
s->map_cmdline_to_pid = kmalloc_array(val,
sizeof(*s->map_cmdline_to_pid),
GFP_KERNEL);
- if (!s->map_cmdline_to_pid)
- return -ENOMEM;
-
- s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
- if (!s->saved_cmdlines) {
- kfree(s->map_cmdline_to_pid);
- return -ENOMEM;
+ if (!s->map_cmdline_to_pid) {
+ free_saved_cmdlines_buffer(s);
+ return NULL;
}
s->cmdline_idx = 0;
- s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));
- return 0;
+ return s;
}
static int trace_create_savedcmd(void)
{
- int ret;
-
- savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
- if (!savedcmd)
- return -ENOMEM;
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
- ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
- if (ret < 0) {
- kfree(savedcmd);
- savedcmd = NULL;
- return -ENOMEM;
- }
-
- return 0;
+ return savedcmd ? 0 : -ENOMEM;
}
int is_tracing_stopped(void)
@@ -5283,26 +5297,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- kfree(s->saved_cmdlines);
- kfree(s->map_cmdline_to_pid);
- kfree(s);
-}
-
static int tracing_resize_saved_cmdlines(unsigned int val)
{
struct saved_cmdlines_buffer *s, *savedcmd_temp;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = allocate_cmdlines_buffer(val);
if (!s)
return -ENOMEM;
- if (allocate_cmdlines_buffer(val, s) < 0) {
- kfree(s);
- return -ENOMEM;
- }
-
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 82580f7ffad9..634d120eab2b 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1140,8 +1140,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
- if (tracing_alloc_snapshot_instance(file->tr) != 0)
- return 0;
+ int ret = tracing_alloc_snapshot_instance(file->tr);
+
+ if (ret < 0)
+ return ret;
return register_trigger(glob, ops, data, file);
}
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 83c2a0598c64..33c463967bb3 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
}
memcpy(elt->key, key, map->key_size);
- entry->val = elt;
+ /*
+ * Ensure the initialization is visible and
+ * publish the elt.
+ */
+ smp_wmb();
+ WRITE_ONCE(entry->val, elt);
atomic64_inc(&map->hits);
return entry->val;