aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c321
1 files changed, 226 insertions, 95 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a35d742b0ba8..1f215aa8f30d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -798,7 +798,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
*/
static void perf_cgroup_switch(struct task_struct *task, int mode)
{
- struct perf_cpu_context *cpuctx;
+ struct perf_cpu_context *cpuctx, *tmp;
struct list_head *list;
unsigned long flags;
@@ -809,7 +809,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_save(flags);
list = this_cpu_ptr(&cgrp_cpuctx_list);
- list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
+ list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
@@ -1133,6 +1133,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
return 0;
}
+static int perf_mux_hrtimer_restart_ipi(void *arg)
+{
+ return perf_mux_hrtimer_restart(arg);
+}
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -1705,28 +1710,34 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
-static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
+static int __perf_event_read_size(u64 read_format, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_ID)
+ if (read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_LOST)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_GROUP) {
+ if (read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
}
- size += entry * nr;
- event->read_size = size;
+ /*
+ * Since perf_event_validate_size() limits this to 16k and inhibits
+ * adding more siblings, this will never overflow.
+ */
+ return size + nr * entry;
}
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
@@ -1767,8 +1778,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
*/
static void perf_event__header_size(struct perf_event *event)
{
- __perf_event_read_size(event,
- event->group_leader->nr_siblings);
+ event->read_size =
+ __perf_event_read_size(event->attr.read_format,
+ event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}
@@ -1799,23 +1811,44 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
+/*
+ * Check that adding an event to the group does not result in anybody
+ * overflowing the 64k event limit imposed by the output buffer.
+ *
+ * Specifically, check that the read_size for the event does not exceed 16k,
+ * read_size being the one term that grows with groups size. Since read_size
+ * depends on per-event read_format, also (re)check the existing events.
+ *
+ * This leaves 48k for the constant size fields and things like callchains,
+ * branch stacks and register sets.
+ */
static bool perf_event_validate_size(struct perf_event *event)
{
- /*
- * The values computed here will be over-written when we actually
- * attach the event.
- */
- __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
- __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
- perf_event__id_header_size(event);
+ struct perf_event *sibling, *group_leader = event->group_leader;
+
+ if (__perf_event_read_size(event->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+
+ if (__perf_event_read_size(group_leader->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
/*
- * Sum the lot; should not exceed the 64k limit we have on records.
- * Conservative limit to allow for callchains and other variable fields.
+ * When creating a new group leader, group_leader->ctx is initialized
+ * after the size has been validated, but we cannot safely use
+ * for_each_sibling_event() until group_leader->ctx is set. A new group
+ * leader cannot have any siblings yet, so we can safely skip checking
+ * the non-existent siblings.
*/
- if (event->read_size + event->header_size +
- event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
- return false;
+ if (event == group_leader)
+ return true;
+
+ for_each_sibling_event(sibling, group_leader) {
+ if (__perf_event_read_size(sibling->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+ }
return true;
}
@@ -1843,6 +1876,7 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
group_leader->nr_siblings++;
+ group_leader->group_generation++;
perf_event__header_size(group_leader);
@@ -1913,6 +1947,7 @@ static void perf_group_detach(struct perf_event *event)
if (event->group_leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
+ event->group_leader->group_generation++;
goto out;
}
@@ -2086,6 +2121,7 @@ __perf_remove_from_context(struct perf_event *event,
if (!ctx->nr_events && ctx->is_active) {
ctx->is_active = 0;
+ ctx->rotate_necessary = 0;
if (ctx->task) {
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
cpuctx->task_ctx = NULL;
@@ -2961,6 +2997,13 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (is_active & EVENT_FLEXIBLE) {
list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
group_sched_out(event, cpuctx, ctx);
+
+ /*
+ * Since we cleared EVENT_FLEXIBLE, also clear
+ * rotate_necessary, is will be reset by
+ * ctx_flexible_sched_in() when needed.
+ */
+ ctx->rotate_necessary = 0;
}
perf_pmu_enable(ctx->pmu);
}
@@ -3319,10 +3362,13 @@ static int flexible_sched_in(struct perf_event *event, void *data)
return 0;
if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
- if (!group_sched_in(event, sid->cpuctx, sid->ctx))
- list_add_tail(&event->active_list, &sid->ctx->flexible_active);
- else
+ int ret = group_sched_in(event, sid->cpuctx, sid->ctx);
+ if (ret) {
sid->can_add_hw = 0;
+ sid->ctx->rotate_necessary = 1;
+ return 0;
+ }
+ list_add_tail(&event->active_list, &sid->ctx->flexible_active);
}
return 0;
@@ -3680,34 +3726,45 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
perf_event_groups_insert(&ctx->flexible_groups, event);
}
+/* pick an event from the flexible_groups to rotate */
static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
{
- return list_first_entry_or_null(&ctx->flexible_active,
- struct perf_event, active_list);
+ struct perf_event *event;
+
+ /* pick the first active flexible event */
+ event = list_first_entry_or_null(&ctx->flexible_active,
+ struct perf_event, active_list);
+
+ /* if no active flexible event, pick the first event */
+ if (!event) {
+ event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+ typeof(*event), group_node);
+ }
+
+ /*
+ * Unconditionally clear rotate_necessary; if ctx_flexible_sched_in()
+ * finds there are unschedulable events, it will set it again.
+ */
+ ctx->rotate_necessary = 0;
+
+ return event;
}
static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
{
struct perf_event *cpu_event = NULL, *task_event = NULL;
- bool cpu_rotate = false, task_rotate = false;
- struct perf_event_context *ctx = NULL;
+ struct perf_event_context *task_ctx = NULL;
+ int cpu_rotate, task_rotate;
/*
* Since we run this from IRQ context, nobody can install new
* events, thus the event count values are stable.
*/
- if (cpuctx->ctx.nr_events) {
- if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
- cpu_rotate = true;
- }
-
- ctx = cpuctx->task_ctx;
- if (ctx && ctx->nr_events) {
- if (ctx->nr_events != ctx->nr_active)
- task_rotate = true;
- }
+ cpu_rotate = cpuctx->ctx.rotate_necessary;
+ task_ctx = cpuctx->task_ctx;
+ task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
if (!(cpu_rotate || task_rotate))
return false;
@@ -3716,25 +3773,25 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
perf_pmu_disable(cpuctx->ctx.pmu);
if (task_rotate)
- task_event = ctx_first_active(ctx);
+ task_event = ctx_event_to_rotate(task_ctx);
if (cpu_rotate)
- cpu_event = ctx_first_active(&cpuctx->ctx);
+ cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
/*
* As per the order given at ctx_resched() first 'pop' task flexible
* and then, if needed CPU flexible.
*/
- if (task_event || (ctx && cpu_event))
- ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+ if (task_event || (task_ctx && cpu_event))
+ ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
if (cpu_event)
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (task_event)
- rotate_ctx(ctx, task_event);
+ rotate_ctx(task_ctx, task_event);
if (cpu_event)
rotate_ctx(&cpuctx->ctx, cpu_event);
- perf_event_sched_in(cpuctx, ctx, current);
+ perf_event_sched_in(cpuctx, task_ctx, current);
perf_pmu_enable(cpuctx->ctx.pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
@@ -4133,7 +4190,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
+ raw_spin_lock_irqsave(&ctx->lock, flags);
++ctx->pin_count;
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
return ctx;
}
@@ -4726,7 +4785,7 @@ static int __perf_read_group_add(struct perf_event *leader,
u64 read_format, u64 *values)
{
struct perf_event_context *ctx = leader->ctx;
- struct perf_event *sub;
+ struct perf_event *sub, *parent;
unsigned long flags;
int n = 1; /* skip @nr */
int ret;
@@ -4736,6 +4795,33 @@ static int __perf_read_group_add(struct perf_event *leader,
return ret;
raw_spin_lock_irqsave(&ctx->lock, flags);
+ /*
+ * Verify the grouping between the parent and child (inherited)
+ * events is still in tact.
+ *
+ * Specifically:
+ * - leader->ctx->lock pins leader->sibling_list
+ * - parent->child_mutex pins parent->child_list
+ * - parent->ctx->mutex pins parent->sibling_list
+ *
+ * Because parent->ctx != leader->ctx (and child_list nests inside
+ * ctx->mutex), group destruction is not atomic between children, also
+ * see perf_event_release_kernel(). Additionally, parent can grow the
+ * group.
+ *
+ * Therefore it is possible to have parent and child groups in a
+ * different configuration and summing over such a beast makes no sense
+ * what so ever.
+ *
+ * Reject this.
+ */
+ parent = leader->parent;
+ if (parent &&
+ (parent->group_generation != leader->group_generation ||
+ parent->nr_siblings != leader->nr_siblings)) {
+ ret = -ECHILD;
+ goto unlock;
+ }
/*
* Since we co-schedule groups, {enabled,running} times of siblings
@@ -4758,15 +4844,20 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] += perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
for_each_sibling_event(sub, leader) {
values[n++] += perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
}
+unlock:
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- return 0;
+ return ret;
}
static int perf_read_group(struct perf_event *event,
@@ -4785,10 +4876,6 @@ static int perf_read_group(struct perf_event *event,
values[0] = 1 + leader->nr_siblings;
- /*
- * By locking the child_mutex of the leader we effectively
- * lock the child list of all siblings.. XXX explain how.
- */
mutex_lock(&leader->child_mutex);
ret = __perf_read_group_add(leader, read_format, values);
@@ -4819,7 +4906,7 @@ static int perf_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
u64 enabled, running;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -4829,6 +4916,8 @@ static int perf_read_one(struct perf_event *event,
values[n++] = running;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
if (copy_to_user(buf, values, n * sizeof(u64)))
return -EFAULT;
@@ -5475,11 +5564,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
-
struct ring_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
+ bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5510,7 +5599,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
- atomic_dec(&rb->mmap_count);
+ if (atomic_dec_and_test(&rb->mmap_count))
+ detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
@@ -5519,7 +5609,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
- if (atomic_read(&rb->mmap_count))
+ if (!detach_rest)
goto out_put;
/*
@@ -5694,10 +5784,10 @@ again:
if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
/*
- * Raced against perf_mmap_close() through
- * perf_event_set_output(). Try again, hope for better
- * luck.
+ * Raced against perf_mmap_close(); remove the
+ * event and try again.
*/
+ ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
goto again;
}
@@ -6137,7 +6227,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
- u64 values[4];
+ u64 values[5];
int n = 0;
values[n++] = perf_event_count(event);
@@ -6151,6 +6241,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
}
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&event->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -6161,7 +6253,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
- u64 values[5];
+ u64 values[6];
int n = 0;
values[n++] = 1 + leader->nr_siblings;
@@ -6179,6 +6271,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&leader->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
@@ -6192,6 +6286,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ if (read_format & PERF_FORMAT_LOST)
+ values[n++] = atomic64_read(&sub->lost_samples);
__output_copy(handle, values, n * sizeof(u64));
}
@@ -6399,7 +6495,6 @@ void perf_output_sample(struct perf_output_handle *handle,
static u64 perf_virt_to_phys(u64 virt)
{
u64 phys_addr = 0;
- struct page *p = NULL;
if (!virt)
return 0;
@@ -6418,14 +6513,15 @@ static u64 perf_virt_to_phys(u64 virt)
* If failed, leave phys_addr as 0.
*/
if (current->mm != NULL) {
+ struct page *p;
+
pagefault_disable();
- if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
+ if (__get_user_pages_fast(virt, 1, 0, &p) == 1) {
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+ put_page(p);
+ }
pagefault_enable();
}
-
- if (p)
- put_page(p);
}
return phys_addr;
@@ -7818,8 +7914,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle)
hwc->interrupts = 1;
} else {
hwc->interrupts++;
- if (unlikely(throttle
- && hwc->interrupts >= max_samples_per_tick)) {
+ if (unlikely(throttle &&
+ hwc->interrupts > max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS;
@@ -8889,7 +8985,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
return;
if (ifh->nr_file_filters) {
- mm = get_task_mm(event->ctx->task);
+ mm = get_task_mm(task);
if (!mm)
goto restart;
@@ -9047,6 +9143,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = token == IF_SRC_FILE ? 2 : 1;
+ kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -9093,16 +9190,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
- goto fail_free_name;
+ goto fail;
/* look up the path and grab its inode */
ret = kern_path(filename, LOOKUP_FOLLOW,
&filter->path);
if (ret)
- goto fail_free_name;
-
- kfree(filename);
- filename = NULL;
+ goto fail;
ret = -EINVAL;
if (!filter->path.dentry ||
@@ -9114,21 +9208,24 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
}
/* ready to consume more filters */
+ kfree(filename);
+ filename = NULL;
state = IF_STATE_ACTION;
filter = NULL;
+ kernel = 0;
}
}
if (state != IF_STATE_ACTION)
goto fail;
+ kfree(filename);
kfree(orig);
return 0;
-fail_free_name:
- kfree(filename);
fail:
+ kfree(filename);
free_filters_list(filters);
kfree(orig);
@@ -9618,8 +9715,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
- cpu_function_call(cpu,
- (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
+ cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx);
}
cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
@@ -9656,13 +9752,15 @@ static int pmu_dev_alloc(struct pmu *pmu)
pmu->dev->groups = pmu->attr_groups;
device_initialize(pmu->dev);
- ret = dev_set_name(pmu->dev, "%s", pmu->name);
- if (ret)
- goto free_dev;
dev_set_drvdata(pmu->dev, pmu);
pmu->dev->bus = &pmu_bus;
pmu->dev->release = pmu_dev_release;
+
+ ret = dev_set_name(pmu->dev, "%s", pmu->name);
+ if (ret)
+ goto free_dev;
+
ret = device_add(pmu->dev);
if (ret)
goto free_dev;
@@ -10370,14 +10468,25 @@ err_size:
goto out;
}
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+ if (b < a)
+ swap(a, b);
+
+ mutex_lock(a);
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
struct ring_buffer *rb = NULL;
int ret = -EINVAL;
- if (!output_event)
+ if (!output_event) {
+ mutex_lock(&event->mmap_mutex);
goto set;
+ }
/* don't allow circular references */
if (event == output_event)
@@ -10392,7 +10501,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
/*
* If its not a per-cpu rb, it must be the same task.
*/
- if (output_event->cpu == -1 && output_event->ctx != event->ctx)
+ if (output_event->cpu == -1 && output_event->hw.target != event->hw.target)
goto out;
/*
@@ -10415,8 +10524,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
event->pmu != output_event->pmu)
goto out;
+ /*
+ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since
+ * output_event is already on rb->event_list, and the list iteration
+ * restarts after every removal, it is guaranteed this new event is
+ * observed *OR* if output_event is already removed, it's guaranteed we
+ * observe !rb->mmap_count.
+ */
+ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
set:
- mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
if (atomic_read(&event->mmap_count))
goto unlock;
@@ -10426,6 +10542,12 @@ set:
rb = ring_buffer_get(output_event);
if (!rb)
goto unlock;
+
+ /* did we race against perf_mmap_close() */
+ if (!atomic_read(&rb->mmap_count)) {
+ ring_buffer_put(rb);
+ goto unlock;
+ }
}
ring_buffer_attach(event, rb);
@@ -10433,20 +10555,13 @@ set:
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
+ if (output_event)
+ mutex_unlock(&output_event->mmap_mutex);
out:
return ret;
}
-static void mutex_lock_double(struct mutex *a, struct mutex *b)
-{
- if (b < a)
- swap(a, b);
-
- mutex_lock(a);
- mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
-}
-
static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
{
bool nmi_safe = false;
@@ -10530,7 +10645,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event *group_leader = NULL, *output_event = NULL;
struct perf_event *event, *sibling;
struct perf_event_attr attr;
- struct perf_event_context *ctx, *uninitialized_var(gctx);
+ struct perf_event_context *ctx, *gctx;
struct file *event_file = NULL;
struct fd group = {NULL, 0};
struct task_struct *task = NULL;
@@ -10732,6 +10847,9 @@ SYSCALL_DEFINE5(perf_event_open,
* Do not allow to attach to a group in a different task
* or CPU context. If we're moving SW events, we'll fix
* this up later, so allow that.
+ *
+ * Racy, not holding group_leader->ctx->mutex, see comment with
+ * perf_event_ctx_lock().
*/
if (!move_group && group_leader->ctx != ctx)
goto err_context;
@@ -10781,6 +10899,7 @@ SYSCALL_DEFINE5(perf_event_open,
} else {
perf_event_ctx_unlock(group_leader, gctx);
move_group = 0;
+ goto not_move_group;
}
}
@@ -10797,7 +10916,17 @@ SYSCALL_DEFINE5(perf_event_open,
}
} else {
mutex_lock(&ctx->mutex);
+
+ /*
+ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx,
+ * see the group_leader && !move_group test earlier.
+ */
+ if (group_leader && group_leader->ctx != ctx) {
+ err = -EINVAL;
+ goto err_locked;
+ }
}
+not_move_group:
if (ctx->task == TASK_TOMBSTONE) {
err = -ESRCH;
@@ -11540,6 +11669,8 @@ static int inherit_group(struct perf_event *parent_event,
if (IS_ERR(child_ctr))
return PTR_ERR(child_ctr);
}
+ if (leader)
+ leader->group_generation = parent_event->group_generation;
return 0;
}