diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 94 |
1 files changed, 76 insertions, 18 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8123a8b53c54..359f44cee08a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -416,14 +416,16 @@ struct rb_event_info { /* * Used for which event context the event is in. - * NMI = 0 - * IRQ = 1 - * SOFTIRQ = 2 - * NORMAL = 3 + * TRANSITION = 0 + * NMI = 1 + * IRQ = 2 + * SOFTIRQ = 3 + * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { + RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, @@ -1625,18 +1627,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; - int cpu, err = 0; + int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) - return size; + return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) - return size; + return 0; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); @@ -1776,7 +1778,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } mutex_unlock(&buffer->mutex); - return size; + return 0; out_err: for_each_buffer_cpu(buffer, cpu) { @@ -2553,10 +2555,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * bit 1 = NMI context + * bit 2 = IRQ context + * bit 3 = SoftIRQ context + * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ @@ -2579,6 +2581,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. + * + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit + * is set when a recursion is detected at the current context, and if + * the TRANSITION bit is already set, it will fail the recursion. + * This is needed because there's a lag between the changing of + * interrupt context and updating the preempt count. In this case, + * a false positive will be found. To handle this, one extra recursion + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION + * bit is already set, then it is considered a recursion and the function + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. + * + * On the trace_recursive_unlock(), the TRANSITION bit will be the first + * to be cleared. Even if it wasn't the context that set it. That is, + * if an interrupt comes in while NORMAL bit is set and the ring buffer + * is called before preempt_count() is updated, since the check will + * be on the NORMAL bit, the TRANSITION bit will then be set. If an + * NMI then comes in, it will set the NMI bit, but when the NMI code + * does the trace_recursive_unlock() it will clear the TRANSTION bit + * and leave the NMI bit set. But this is fine, because the interrupt + * code that set the TRANSITION bit will then clear the NMI bit when it + * calls trace_recursive_unlock(). If another NMI comes in, it will + * set the TRANSITION bit and continue. + * + * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline int @@ -2597,8 +2623,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) } else bit = RB_CTX_NORMAL; - if (unlikely(val & (1 << bit))) - return 1; + if (unlikely(val & (1 << bit))) { + /* + * It is possible that this was called by transitioning + * between interrupt context, and preempt_count() has not + * been updated yet. In this case, use the TRANSITION bit. + */ + bit = RB_CTX_TRANSITION; + if (val & (1 << bit)) + return 1; + } val |= (1 << bit); cpu_buffer->current_context = val; @@ -3020,10 +3054,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) if (unlikely(!head)) return true; - return reader->read == rb_page_commit(reader) && - (commit == reader || - (commit == head && - head->read == rb_page_commit(commit))); + /* Reader should exhaust content in reader page */ + if (reader->read != rb_page_commit(reader)) + return false; + + /* + * If writers are committing on the reader page, knowing all + * committed content has been read, the ring buffer is empty. + */ + if (commit == reader) + return true; + + /* + * If writers are committing on a page other than reader page + * and head page, there should always be content to read. + */ + if (commit != head) + return false; + + /* + * Writers are committing on the head page, we just need + * to care about there're committed data, and the reader will + * swap reader page with head page when it is to read data. + */ + return rb_page_commit(commit) == 0; } /** @@ -4228,6 +4282,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) if (!cpumask_test_cpu(cpu, buffer->cpumask)) return; + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); atomic_inc(&buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); @@ -4251,6 +4307,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&buffer->resize_disabled); + + mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); |