diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 130 |
1 files changed, 105 insertions, 25 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 987d3447bf2a..d2903d8834fe 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -493,6 +493,8 @@ struct ring_buffer_per_cpu { unsigned long read_bytes; u64 write_stamp; u64 read_stamp; + /* pages removed since last reset */ + unsigned long pages_removed; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; struct list_head new_pages; /* new pages to add */ @@ -528,6 +530,7 @@ struct ring_buffer_iter { struct buffer_page *head_page; struct buffer_page *cache_reader_page; unsigned long cache_read; + unsigned long cache_pages_removed; u64 read_stamp; }; @@ -542,8 +545,9 @@ static void rb_wake_up_waiters(struct irq_work *work) struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); wake_up_all(&rbwork->waiters); - if (rbwork->wakeup_full) { + if (rbwork->full_waiters_pending || rbwork->wakeup_full) { rbwork->wakeup_full = false; + rbwork->full_waiters_pending = false; wake_up_all(&rbwork->full_waiters); } } @@ -560,7 +564,7 @@ static void rb_wake_up_waiters(struct irq_work *work) */ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) { - struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); + struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); struct rb_irq_work *work; int ret = 0; @@ -1325,11 +1329,13 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) struct list_head *head = cpu_buffer->pages; struct buffer_page *bpage, *tmp; - free_buffer_page(cpu_buffer->reader_page); + irq_work_sync(&cpu_buffer->irq_work.work); - rb_head_page_deactivate(cpu_buffer); + free_buffer_page(cpu_buffer->reader_page); if (head) { + rb_head_page_deactivate(cpu_buffer); + list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); @@ -1338,6 +1344,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(bpage); } + free_page((unsigned long)cpu_buffer->free_page); + kfree(cpu_buffer); } @@ -1430,6 +1438,8 @@ ring_buffer_free(struct ring_buffer *buffer) cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); + irq_work_sync(&buffer->irq_work.work); + for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); @@ -1509,6 +1519,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) to_remove = rb_list_head(to_remove)->next; head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; } + /* Read iterators need to reset themselves when some pages removed */ + cpu_buffer->pages_removed += nr_removed; next_page = rb_list_head(to_remove)->next; @@ -1530,12 +1542,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) cpu_buffer->head_page = list_entry(next_page, struct buffer_page, list); - /* - * change read pointer to make sure any read iterators reset - * themselves - */ - cpu_buffer->read = 0; - /* pages are removed, resume tracing and then free the pages */ atomic_dec(&cpu_buffer->record_disabled); raw_spin_unlock_irq(&cpu_buffer->reader_lock); @@ -1749,6 +1755,8 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, err = -ENOMEM; goto out_err; } + + cond_resched(); } get_online_cpus(); @@ -2156,6 +2164,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* Mark the rest of the page with padding */ rb_event_set_padding(event); + /* Make sure the padding is visible before the write update */ + smp_wmb(); + /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; @@ -2167,6 +2178,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* Make sure the padding is visible before the tail_page->write update */ + smp_wmb(); + /* Set write to end of buffer */ length = (tail + length) - BUF_PAGE_SIZE; local_sub(length, &tail_page->write); @@ -2456,6 +2470,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_is_reader_page(cpu_buffer->tail_page))) return; + /* + * No need for a memory barrier here, as the update + * of the tail_page did it for this page. + */ local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); @@ -2469,6 +2487,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { + /* Make sure the readers see the content of what is committed. */ + smp_wmb(); local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); RB_WARN_ON(cpu_buffer, @@ -2871,6 +2891,12 @@ rb_reserve_next_event(struct ring_buffer *buffer, int nr_loops = 0; u64 diff; + /* ring buffer does cmpxchg, make sure it is safe in NMI context */ + if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && + (unlikely(in_nmi()))) { + return NULL; + } + rb_start_commit(cpu_buffer); #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP @@ -3565,6 +3591,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->cache_reader_page = iter->head_page; iter->cache_read = cpu_buffer->read; + iter->cache_pages_removed = cpu_buffer->pages_removed; if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; @@ -3812,6 +3839,38 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + /* + * The writer has preempt disable, wait for it. But not forever + * Although, 1 second is pretty much "forever" + */ +#define USECS_WAIT 1000000 + for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) { + /* If the write is past the end of page, a writer is still updating it */ + if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE)) + break; + + udelay(1); + + /* Get the latest version of the reader write value */ + smp_rmb(); + } + + /* The writer is not moving forward? Something is wrong */ + if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT)) + reader = NULL; + + /* + * Make sure we see any padding after the write update + * (see rb_reset_tail()). + * + * In addition, a writer may be writing on the reader page + * if the page has not been fully filled, so the read barrier + * is also needed to make sure we see the content of what is + * committed by the writer (see rb_set_commit_to_write()). + */ + smp_rmb(); + + return reader; } @@ -3973,12 +4032,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) buffer = cpu_buffer->buffer; /* - * Check if someone performed a consuming read to - * the buffer. A consuming read invalidates the iterator - * and we need to reset the iterator in this case. + * Check if someone performed a consuming read to the buffer + * or removed some pages from the buffer. In these cases, + * iterator was invalidated and we need to reset it. */ if (unlikely(iter->cache_read != cpu_buffer->read || - iter->cache_reader_page != cpu_buffer->reader_page)) + iter->cache_reader_page != cpu_buffer->reader_page || + iter->cache_pages_removed != cpu_buffer->pages_removed)) rb_iter_reset(iter); again: @@ -4359,28 +4419,34 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_size); +static void rb_clear_buffer_page(struct buffer_page *page) +{ + local_set(&page->write, 0); + local_set(&page->entries, 0); + rb_init_page(page->page); + page->read = 0; +} + static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) { + struct buffer_page *page; + rb_head_page_deactivate(cpu_buffer); cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); - local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->entries, 0); - local_set(&cpu_buffer->head_page->page->commit, 0); - - cpu_buffer->head_page->read = 0; + rb_clear_buffer_page(cpu_buffer->head_page); + list_for_each_entry(page, cpu_buffer->pages, list) { + rb_clear_buffer_page(page); + } cpu_buffer->tail_page = cpu_buffer->head_page; cpu_buffer->commit_page = cpu_buffer->head_page; INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); - local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->entries, 0); - local_set(&cpu_buffer->reader_page->page->commit, 0); - cpu_buffer->reader_page->read = 0; + rb_clear_buffer_page(cpu_buffer->reader_page); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); @@ -4399,6 +4465,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->last_overrun = 0; rb_head_page_activate(cpu_buffer); + cpu_buffer->pages_removed = 0; } /** @@ -4651,11 +4718,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); */ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) { - struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_per_cpu *cpu_buffer; struct buffer_data_page *bpage = data; struct page *page = virt_to_page(bpage); unsigned long flags; + if (!buffer || !buffer->buffers || !buffer->buffers[cpu]) + return; + + cpu_buffer = buffer->buffers[cpu]; + /* If the page is still in use someplace else, we can't reuse it */ if (page_ref_count(page) > 1) goto out; @@ -4770,7 +4842,15 @@ int ring_buffer_read_page(struct ring_buffer *buffer, unsigned int pos = 0; unsigned int size; - if (full) + /* + * If a full page is expected, this can still be returned + * if there's been a previous partial read and the + * rest of the page can be read and the commit page is off + * the reader page. + */ + if (full && + (!read || (len < (commit - read)) || + cpu_buffer->reader_page == cpu_buffer->commit_page)) goto out_unlock; if (len > (commit - read)) |