aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch940
1 files changed, 940 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch
new file mode 100644
index 00000000..742373d6
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0060-perf-x86-amd-Add-AMD-Fam19h-Branch-Sampling-support.patch
@@ -0,0 +1,940 @@
+From cb555e5e85438938a91440bd9594ca145505039c Mon Sep 17 00:00:00 2001
+From: Stephane Eranian <eranian@google.com>
+Date: Tue, 22 Mar 2022 15:15:07 -0700
+Subject: [PATCH 60/86] perf/x86/amd: Add AMD Fam19h Branch Sampling support
+
+commit ada543459cab7f653dcacdaba4011a8bb19c627c upstream
+
+Add support for the AMD Fam19h 16-deep branch sampling feature as
+described in the AMD PPR Fam19h Model 01h Revision B1. This is a model
+specific extension. It is not an architected AMD feature.
+
+The Branch Sampling (BRS) operates with a 16-deep saturating buffer in MSR
+registers. There is no branch type filtering. All control flow changes are
+captured. BRS relies on specific programming of the core PMU of Fam19h. In
+particular, the following requirements must be met:
+ - the sampling period be greater than 16 (BRS depth)
+ - the sampling period must use a fixed and not frequency mode
+
+BRS interacts with the NMI interrupt as well. Because enabling BRS is
+expensive, it is only activated after P event occurrences, where P is the
+desired sampling period. At P occurrences of the event, the counter
+overflows, the CPU catches the interrupt, activates BRS for 16 branches until
+it saturates, and then delivers the NMI to the kernel. Between the overflow
+and the time BRS activates more branches may be executed skewing the period.
+All along, the sampling event keeps counting. The skid may be attenuated by
+reducing the sampling period by 16 (subsequent patch).
+
+BRS is integrated into perf_events seamlessly via the same
+PERF_RECORD_BRANCH_STACK sample format. BRS generates perf_branch_entry
+records in the sampling buffer. No prediction information is supported. The
+branches are stored in reverse order of execution. The most recent branch is
+the first entry in each record.
+
+No modification to the perf tool is necessary.
+
+BRS can be used with any sampling event. However, it is recommended to use
+the RETIRED_BRANCH_INSTRUCTIONS event because it matches what the BRS
+captures.
+
+$ perf record -b -c 1000037 -e cpu/event=0xc2,name=ret_br_instructions/ test
+
+$ perf report -D
+56531696056126 0x193c000 [0x1a8]: PERF_RECORD_SAMPLE(IP, 0x2): 18122/18230: 0x401d24 period: 1000037 addr: 0
+... branch stack: nr:16
+..... 0: 0000000000401d24 -> 0000000000401d5a 0 cycles 0
+..... 1: 0000000000401d5c -> 0000000000401d24 0 cycles 0
+..... 2: 0000000000401d22 -> 0000000000401d5c 0 cycles 0
+..... 3: 0000000000401d5e -> 0000000000401d22 0 cycles 0
+..... 4: 0000000000401d20 -> 0000000000401d5e 0 cycles 0
+..... 5: 0000000000401d3e -> 0000000000401d20 0 cycles 0
+..... 6: 0000000000401d42 -> 0000000000401d3e 0 cycles 0
+..... 7: 0000000000401d3c -> 0000000000401d42 0 cycles 0
+..... 8: 0000000000401d44 -> 0000000000401d3c 0 cycles 0
+..... 9: 0000000000401d3a -> 0000000000401d44 0 cycles 0
+..... 10: 0000000000401d46 -> 0000000000401d3a 0 cycles 0
+..... 11: 0000000000401d38 -> 0000000000401d46 0 cycles 0
+..... 12: 0000000000401d48 -> 0000000000401d38 0 cycles 0
+..... 13: 0000000000401d36 -> 0000000000401d48 0 cycles 0
+..... 14: 0000000000401d4a -> 0000000000401d36 0 cycles 0
+..... 15: 0000000000401d34 -> 0000000000401d4a 0 cycles 0
+ ... thread: test:18230
+ ...... dso: test
+
+Signed-off-by: Stephane Eranian <eranian@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220322221517.2510440-4-eranian@google.com
+Signed-off-by: Zhaolong Zhang <zhaolong.zhang@windriver.com>
+---
+ arch/x86/events/amd/Makefile | 2 +-
+ arch/x86/events/amd/brs.c | 317 +++++++++++++++++++++++++++++++
+ arch/x86/events/amd/core.c | 233 ++++++++++++++++++++++-
+ arch/x86/events/core.c | 10 +-
+ arch/x86/events/perf_event.h | 101 ++++++++--
+ arch/x86/include/asm/msr-index.h | 4 +
+ 6 files changed, 645 insertions(+), 22 deletions(-)
+ create mode 100644 arch/x86/events/amd/brs.c
+
+diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
+index 6cbe38d5fd9d..cf323ffab5cd 100644
+--- a/arch/x86/events/amd/Makefile
++++ b/arch/x86/events/amd/Makefile
+@@ -1,5 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0
+-obj-$(CONFIG_CPU_SUP_AMD) += core.o
++obj-$(CONFIG_CPU_SUP_AMD) += core.o brs.o
+ obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o
+ obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o
+ obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) += amd-uncore.o
+diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
+new file mode 100644
+index 000000000000..3c13c484c637
+--- /dev/null
++++ b/arch/x86/events/amd/brs.c
+@@ -0,0 +1,317 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Implement support for AMD Fam19h Branch Sampling feature
++ * Based on specifications published in AMD PPR Fam19 Model 01
++ *
++ * Copyright 2021 Google LLC
++ * Contributed by Stephane Eranian <eranian@google.com>
++ */
++#include <linux/kernel.h>
++#include <asm/msr.h>
++#include <asm/cpufeature.h>
++
++#include "../perf_event.h"
++
++#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
++
++/* Debug Extension Configuration register layout */
++union amd_debug_extn_cfg {
++ __u64 val;
++ struct {
++ __u64 rsvd0:2, /* reserved */
++ brsmen:1, /* branch sample enable */
++ rsvd4_3:2,/* reserved - must be 0x3 */
++ vb:1, /* valid branches recorded */
++ rsvd2:10, /* reserved */
++ msroff:4, /* index of next entry to write */
++ rsvd3:4, /* reserved */
++ pmc:3, /* #PMC holding the sampling event */
++ rsvd4:37; /* reserved */
++ };
++};
++
++static inline unsigned int brs_from(int idx)
++{
++ return MSR_AMD_SAMP_BR_FROM + 2 * idx;
++}
++
++static inline unsigned int brs_to(int idx)
++{
++ return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
++}
++
++static inline void set_debug_extn_cfg(u64 val)
++{
++ /* bits[4:3] must always be set to 11b */
++ wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3);
++}
++
++static inline u64 get_debug_extn_cfg(void)
++{
++ u64 val;
++
++ rdmsrl(MSR_AMD_DBG_EXTN_CFG, val);
++ return val;
++}
++
++static bool __init amd_brs_detect(void)
++{
++ if (!boot_cpu_has(X86_FEATURE_BRS))
++ return false;
++
++ switch (boot_cpu_data.x86) {
++ case 0x19: /* AMD Fam19h (Zen3) */
++ x86_pmu.lbr_nr = 16;
++
++ /* No hardware filtering supported */
++ x86_pmu.lbr_sel_map = NULL;
++ x86_pmu.lbr_sel_mask = 0;
++ break;
++ default:
++ return false;
++ }
++
++ return true;
++}
++
++/*
++ * Current BRS implementation does not support branch type or privilege level
++ * filtering. Therefore, this function simply enforces these limitations. No need for
++ * a br_sel_map. Software filtering is not supported because it would not correlate well
++ * with a sampling period.
++ */
++int amd_brs_setup_filter(struct perf_event *event)
++{
++ u64 type = event->attr.branch_sample_type;
++
++ /* No BRS support */
++ if (!x86_pmu.lbr_nr)
++ return -EOPNOTSUPP;
++
++ /* Can only capture all branches, i.e., no filtering */
++ if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
++ return -EINVAL;
++
++ /* can only capture at all priv levels due to the way BRS works */
++ if ((type & PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_PLM_ALL)
++ return -EINVAL;
++
++ return 0;
++}
++
++/* tos = top of stack, i.e., last valid entry written */
++static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
++{
++ /*
++ * msroff: index of next entry to write so top-of-stack is one off
++ * if BRS is full then msroff is set back to 0.
++ */
++ return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
++}
++
++/*
++ * make sure we have a sane BRS offset to begin with
++ * especially with kexec
++ */
++void amd_brs_reset(void)
++{
++ /*
++ * Reset config
++ */
++ set_debug_extn_cfg(0);
++
++ /*
++ * Mark first entry as poisoned
++ */
++ wrmsrl(brs_to(0), BRS_POISON);
++}
++
++int __init amd_brs_init(void)
++{
++ if (!amd_brs_detect())
++ return -EOPNOTSUPP;
++
++ pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
++
++ return 0;
++}
++
++void amd_brs_enable(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ union amd_debug_extn_cfg cfg;
++
++ /* Activate only on first user */
++ if (++cpuc->brs_active > 1)
++ return;
++
++ cfg.val = 0; /* reset all fields */
++ cfg.brsmen = 1; /* enable branch sampling */
++
++ /* Set enable bit */
++ set_debug_extn_cfg(cfg.val);
++}
++
++void amd_brs_enable_all(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ if (cpuc->lbr_users)
++ amd_brs_enable();
++}
++
++void amd_brs_disable(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ union amd_debug_extn_cfg cfg;
++
++ /* Check if active (could be disabled via x86_pmu_disable_all()) */
++ if (!cpuc->brs_active)
++ return;
++
++ /* Only disable for last user */
++ if (--cpuc->brs_active)
++ return;
++
++ /*
++ * Clear the brsmen bit but preserve the others as they contain
++ * useful state such as vb and msroff
++ */
++ cfg.val = get_debug_extn_cfg();
++
++ /*
++ * When coming in on interrupt and BRS is full, then hw will have
++ * already stopped BRS, no need to issue wrmsr again
++ */
++ if (cfg.brsmen) {
++ cfg.brsmen = 0;
++ set_debug_extn_cfg(cfg.val);
++ }
++}
++
++void amd_brs_disable_all(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ if (cpuc->lbr_users)
++ amd_brs_disable();
++}
++
++/*
++ * Caller must ensure amd_brs_inuse() is true before calling
++ * return:
++ */
++void amd_brs_drain(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ struct perf_event *event = cpuc->events[0];
++ struct perf_branch_entry *br = cpuc->lbr_entries;
++ union amd_debug_extn_cfg cfg;
++ u32 i, nr = 0, num, tos, start;
++ u32 shift = 64 - boot_cpu_data.x86_virt_bits;
++
++ /*
++ * BRS event forced on PMC0,
++ * so check if there is an event.
++ * It is possible to have lbr_users > 0 but the event
++ * not yet scheduled due to long latency PMU irq
++ */
++ if (!event)
++ goto empty;
++
++ cfg.val = get_debug_extn_cfg();
++
++ /* Sanity check [0-x86_pmu.lbr_nr] */
++ if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
++ goto empty;
++
++ /* No valid branch */
++ if (cfg.vb == 0)
++ goto empty;
++
++ /*
++ * msr.off points to next entry to be written
++ * tos = most recent entry index = msr.off - 1
++ * BRS register buffer saturates, so we know we have
++ * start < tos and that we have to read from start to tos
++ */
++ start = 0;
++ tos = amd_brs_get_tos(&cfg);
++
++ num = tos - start + 1;
++
++ /*
++ * BRS is only one pass (saturation) from MSROFF to depth-1
++ * MSROFF wraps to zero when buffer is full
++ */
++ for (i = 0; i < num; i++) {
++ u32 brs_idx = tos - i;
++ u64 from, to;
++
++ rdmsrl(brs_to(brs_idx), to);
++
++ /* Entry does not belong to us (as marked by kernel) */
++ if (to == BRS_POISON)
++ break;
++
++ rdmsrl(brs_from(brs_idx), from);
++
++ /*
++ * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
++ * Necessary to generate proper virtual addresses suitable for
++ * symbolization
++ */
++ to = (u64)(((s64)to << shift) >> shift);
++
++ perf_clear_branch_entry_bitfields(br+nr);
++
++ br[nr].from = from;
++ br[nr].to = to;
++
++ nr++;
++ }
++empty:
++ /* Record number of sampled branches */
++ cpuc->lbr_stack.nr = nr;
++}
++
++/*
++ * Poison most recent entry to prevent reuse by next task
++ * required because BRS entry are not tagged by PID
++ */
++static void amd_brs_poison_buffer(void)
++{
++ union amd_debug_extn_cfg cfg;
++ unsigned int idx;
++
++ /* Get current state */
++ cfg.val = get_debug_extn_cfg();
++
++ /* idx is most recently written entry */
++ idx = amd_brs_get_tos(&cfg);
++
++ /* Poison target of entry */
++ wrmsrl(brs_to(idx), BRS_POISON);
++}
++
++/*
++ * On context switch in, we need to make sure no samples from previous user
++ * are left in the BRS.
++ *
++ * On ctxswin, sched_in = true, called after the PMU has started
++ * On ctxswout, sched_in = false, called before the PMU is stopped
++ */
++void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++ /* no active users */
++ if (!cpuc->lbr_users)
++ return;
++
++ /*
++ * On context switch in, we need to ensure we do not use entries
++ * from previous BRS user on that CPU, so we poison the buffer as
++ * a faster way compared to resetting all entries.
++ */
++ if (sched_in)
++ amd_brs_poison_buffer();
++}
+diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
+index 9687a8aef01c..c7ac70d8ed9a 100644
+--- a/arch/x86/events/amd/core.c
++++ b/arch/x86/events/amd/core.c
+@@ -325,8 +325,16 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
+ }
+ }
+
++#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
++static inline int amd_is_brs_event(struct perf_event *e)
++{
++ return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
++}
++
+ static int amd_core_hw_config(struct perf_event *event)
+ {
++ int ret = 0;
++
+ if (event->attr.exclude_host && event->attr.exclude_guest)
+ /*
+ * When HO == GO == 1 the hardware treats that as GO == HO == 0
+@@ -343,7 +351,66 @@ static int amd_core_hw_config(struct perf_event *event)
+ if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
+ event->hw.flags |= PERF_X86_EVENT_PAIR;
+
+- return 0;
++ /*
++ * if branch stack is requested
++ */
++ if (has_branch_stack(event)) {
++ /*
++ * Due to interrupt holding, BRS is not recommended in
++ * counting mode.
++ */
++ if (!is_sampling_event(event))
++ return -EINVAL;
++
++ /*
++ * Due to the way BRS operates by holding the interrupt until
++ * lbr_nr entries have been captured, it does not make sense
++ * to allow sampling on BRS with an event that does not match
++ * what BRS is capturing, i.e., retired taken branches.
++ * Otherwise the correlation with the event's period is even
++ * more loose:
++ *
++ * With retired taken branch:
++ * Effective P = P + 16 + X
++ * With any other event:
++ * Effective P = P + Y + X
++ *
++ * Where X is the number of taken branches due to interrupt
++ * skid. Skid is large.
++ *
++ * Where Y is the occurences of the event while BRS is
++ * capturing the lbr_nr entries.
++ *
++ * By using retired taken branches, we limit the impact on the
++ * Y variable. We know it cannot be more than the depth of
++ * BRS.
++ */
++ if (!amd_is_brs_event(event))
++ return -EINVAL;
++
++ /*
++ * BRS implementation does not work with frequency mode
++ * reprogramming of the period.
++ */
++ if (event->attr.freq)
++ return -EINVAL;
++ /*
++ * The kernel subtracts BRS depth from period, so it must
++ * be big enough.
++ */
++ if (event->attr.sample_period <= x86_pmu.lbr_nr)
++ return -EINVAL;
++
++ /*
++ * Check if we can allow PERF_SAMPLE_BRANCH_STACK
++ */
++ ret = amd_brs_setup_filter(event);
++
++ /* only set in case of success */
++ if (!ret)
++ event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
++ }
++ return ret;
+ }
+
+ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+@@ -366,7 +433,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+- if (has_branch_stack(event))
++ if (has_branch_stack(event) && !x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+@@ -555,6 +622,8 @@ static void amd_pmu_cpu_starting(int cpu)
+
+ cpuc->amd_nb->nb_id = nb_id;
+ cpuc->amd_nb->refcnt++;
++
++ amd_brs_reset();
+ }
+
+ static void amd_pmu_cpu_dead(int cpu)
+@@ -610,6 +679,8 @@ static void amd_pmu_disable_all(void)
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int idx;
+
++ amd_brs_disable_all();
++
+ x86_pmu_disable_all();
+
+ /*
+@@ -634,6 +705,30 @@ static void amd_pmu_disable_all(void)
+ }
+ }
+
++static void amd_pmu_enable_event(struct perf_event *event)
++{
++ x86_pmu_enable_event(event);
++}
++
++static void amd_pmu_enable_all(int added)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++ struct hw_perf_event *hwc;
++ int idx;
++
++ amd_brs_enable_all();
++
++ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
++ hwc = &cpuc->events[idx]->hw;
++
++ /* only activate events which are marked as active */
++ if (!test_bit(idx, cpuc->active_mask))
++ continue;
++
++ amd_pmu_enable_event(cpuc->events[idx]);
++ }
++}
++
+ static void amd_pmu_disable_event(struct perf_event *event)
+ {
+ x86_pmu_disable_event(event);
+@@ -651,6 +746,18 @@ static void amd_pmu_disable_event(struct perf_event *event)
+ amd_pmu_wait_on_overflow(event->hw.idx);
+ }
+
++static void amd_pmu_add_event(struct perf_event *event)
++{
++ if (needs_branch_stack(event))
++ amd_pmu_brs_add(event);
++}
++
++static void amd_pmu_del_event(struct perf_event *event)
++{
++ if (needs_branch_stack(event))
++ amd_pmu_brs_del(event);
++}
++
+ /*
+ * Because of NMI latency, if multiple PMC counters are active or other sources
+ * of NMIs are received, the perf NMI handler can handle one or more overflowed
+@@ -671,11 +778,31 @@ static void amd_pmu_disable_event(struct perf_event *event)
+ */
+ static int amd_pmu_handle_irq(struct pt_regs *regs)
+ {
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int handled;
++ int pmu_enabled;
++
++ /*
++ * Save the PMU state.
++ * It needs to be restored when leaving the handler.
++ */
++ pmu_enabled = cpuc->enabled;
++ cpuc->enabled = 0;
++
++ /* stop everything (includes BRS) */
++ amd_pmu_disable_all();
++
++ /* Drain BRS is in use (could be inactive) */
++ if (cpuc->lbr_users)
++ amd_brs_drain();
+
+ /* Process any counter overflows */
+ handled = x86_pmu_handle_irq(regs);
+
++ cpuc->enabled = pmu_enabled;
++ if (pmu_enabled)
++ amd_pmu_enable_all(0);
++
+ /*
+ * If a counter was handled, record a timestamp such that un-handled
+ * NMIs will be claimed if arriving within that window.
+@@ -897,6 +1024,51 @@ static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
+ --cpuc->n_pair;
+ }
+
++/*
++ * Because of the way BRS operates with an inactive and active phases, and
++ * the link to one counter, it is not possible to have two events using BRS
++ * scheduled at the same time. There would be an issue with enforcing the
++ * period of each one and given that the BRS saturates, it would not be possible
++ * to guarantee correlated content for all events. Therefore, in situations
++ * where multiple events want to use BRS, the kernel enforces mutual exclusion.
++ * Exclusion is enforced by chosing only one counter for events using BRS.
++ * The event scheduling logic will then automatically multiplex the
++ * events and ensure that at most one event is actively using BRS.
++ *
++ * The BRS counter could be any counter, but there is no constraint on Fam19h,
++ * therefore all counters are equal and thus we pick the first one: PMC0
++ */
++static struct event_constraint amd_fam19h_brs_cntr0_constraint =
++ EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
++
++static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
++ __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
++
++static struct event_constraint *
++amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
++ struct perf_event *event)
++{
++ struct hw_perf_event *hwc = &event->hw;
++ bool has_brs = has_amd_brs(hwc);
++
++ /*
++ * In case BRS is used with an event requiring a counter pair,
++ * the kernel allows it but only on counter 0 & 1 to enforce
++ * multiplexing requiring to protect BRS in case of multiple
++ * BRS users
++ */
++ if (amd_is_pair_event_code(hwc)) {
++ return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
++ : &pair_constraint;
++ }
++
++ if (has_brs)
++ return &amd_fam19h_brs_cntr0_constraint;
++
++ return &unconstrained;
++}
++
++
+ static ssize_t amd_event_sysfs_show(char *page, u64 config)
+ {
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+@@ -905,12 +1077,19 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
+ return x86_event_sysfs_show(page, config, event);
+ }
+
++static void amd_pmu_sched_task(struct perf_event_context *ctx,
++ bool sched_in)
++{
++ if (sched_in && x86_pmu.lbr_nr)
++ amd_pmu_brs_sched_task(ctx, sched_in);
++}
++
+ static __initconst const struct x86_pmu amd_pmu = {
+ .name = "AMD",
+ .handle_irq = amd_pmu_handle_irq,
+ .disable_all = amd_pmu_disable_all,
+- .enable_all = x86_pmu_enable_all,
+- .enable = x86_pmu_enable_event,
++ .enable_all = amd_pmu_enable_all,
++ .enable = amd_pmu_enable_event,
+ .disable = amd_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+@@ -920,6 +1099,8 @@ static __initconst const struct x86_pmu amd_pmu = {
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = AMD64_NUM_COUNTERS,
++ .add = amd_pmu_add_event,
++ .del = amd_pmu_del_event,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+@@ -938,6 +1119,37 @@ static __initconst const struct x86_pmu amd_pmu = {
+ .amd_nb_constraints = 1,
+ };
+
++static ssize_t branches_show(struct device *cdev,
++ struct device_attribute *attr,
++ char *buf)
++{
++ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
++}
++
++static DEVICE_ATTR_RO(branches);
++
++static struct attribute *amd_pmu_brs_attrs[] = {
++ &dev_attr_branches.attr,
++ NULL,
++};
++
++static umode_t
++amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
++{
++ return x86_pmu.lbr_nr ? attr->mode : 0;
++}
++
++static struct attribute_group group_caps_amd_brs = {
++ .name = "caps",
++ .attrs = amd_pmu_brs_attrs,
++ .is_visible = amd_brs_is_visible,
++};
++
++static const struct attribute_group *amd_attr_update[] = {
++ &group_caps_amd_brs,
++ NULL,
++};
++
+ static int __init amd_core_pmu_init(void)
+ {
+ u64 even_ctr_mask = 0ULL;
+@@ -989,6 +1201,19 @@ static int __init amd_core_pmu_init(void)
+ x86_pmu.flags |= PMU_FL_PAIR;
+ }
+
++ /*
++ * BRS requires special event constraints and flushing on ctxsw.
++ */
++ if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
++ x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
++ x86_pmu.sched_task = amd_pmu_sched_task;
++ /*
++ * put_event_constraints callback same as Fam17h, set above
++ */
++ }
++
++ x86_pmu.attr_update = amd_attr_update;
++
+ pr_cont("core perfctr, ");
+ return 0;
+ }
+diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
+index 4147c9b94929..ddbfbf304b2d 100644
+--- a/arch/x86/events/core.c
++++ b/arch/x86/events/core.c
+@@ -1334,6 +1334,10 @@ static void x86_pmu_enable(struct pmu *pmu)
+ if (hwc->state & PERF_HES_ARCH)
+ continue;
+
++ /*
++ * if cpuc->enabled = 0, then no wrmsr as
++ * per x86_pmu_enable_event()
++ */
+ x86_pmu_start(event, PERF_EF_RELOAD);
+ }
+ cpuc->n_added = 0;
+@@ -1700,11 +1704,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
+ * event overflow
+ */
+ handled++;
+- perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
++ perf_sample_data_init(&data, 0, event->hw.last_period);
++
++ if (has_branch_stack(event))
++ data.br_stack = &cpuc->lbr_stack;
++
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index 84d9bef41159..ec7eb3fedbce 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -66,22 +66,23 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
+ /*
+ * struct hw_perf_event.flags flags
+ */
+-#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
+-#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
+-#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
+-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
+-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
+-#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
+-#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
+-
+-#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
+-#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
+-#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
+-#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
+-#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
+-#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
+-#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
+-#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
++#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */
++#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */
++#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */
++#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */
++#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */
++#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */
++#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */
++
++#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */
++#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */
++#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */
++#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */
++#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */
++#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */
++#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
++#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
++#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
+
+ static inline bool is_topdown_count(struct perf_event *event)
+ {
+@@ -324,6 +325,8 @@ struct cpu_hw_events {
+ * AMD specific bits
+ */
+ struct amd_nb *amd_nb;
++ int brs_active; /* BRS is enabled */
++
+ /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+ u64 perf_ctr_virt_mask;
+ int n_pair; /* Large increment events */
+@@ -1103,6 +1106,11 @@ int x86_pmu_hw_config(struct perf_event *event);
+
+ void x86_pmu_disable_all(void);
+
++static inline bool has_amd_brs(struct hw_perf_event *hwc)
++{
++ return hwc->flags & PERF_X86_EVENT_AMD_BRS;
++}
++
+ static inline bool is_counter_pair(struct hw_perf_event *hwc)
+ {
+ return hwc->flags & PERF_X86_EVENT_PAIR;
+@@ -1208,6 +1216,50 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
+ #ifdef CONFIG_CPU_SUP_AMD
+
+ int amd_pmu_init(void);
++int amd_brs_init(void);
++void amd_brs_disable(void);
++void amd_brs_enable(void);
++void amd_brs_enable_all(void);
++void amd_brs_disable_all(void);
++void amd_brs_drain(void);
++void amd_brs_disable_all(void);
++int amd_brs_setup_filter(struct perf_event *event);
++void amd_brs_reset(void);
++
++static inline void amd_pmu_brs_add(struct perf_event *event)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++ perf_sched_cb_inc(event->ctx->pmu);
++ cpuc->lbr_users++;
++ /*
++ * No need to reset BRS because it is reset
++ * on brs_enable() and it is saturating
++ */
++}
++
++static inline void amd_pmu_brs_del(struct perf_event *event)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++ cpuc->lbr_users--;
++ WARN_ON_ONCE(cpuc->lbr_users < 0);
++
++ perf_sched_cb_dec(event->ctx->pmu);
++}
++
++void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
++
++/*
++ * check if BRS is activated on the CPU
++ * active defined as it has non-zero users and DBG_EXT_CFG.BRSEN=1
++ */
++static inline bool amd_brs_active(void)
++{
++ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
++
++ return cpuc->brs_active;
++}
+
+ #else /* CONFIG_CPU_SUP_AMD */
+
+@@ -1216,6 +1268,23 @@ static inline int amd_pmu_init(void)
+ return 0;
+ }
+
++static inline int amd_brs_init(void)
++{
++ return -EOPNOTSUPP;
++}
++
++static inline void amd_brs_drain(void)
++{
++}
++
++static inline void amd_brs_enable_all(void)
++{
++}
++
++static inline void amd_brs_disable_all(void)
++{
++}
++
+ #endif /* CONFIG_CPU_SUP_AMD */
+
+ static inline int is_pebs_pt(struct perf_event *event)
+diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
+index 23efce987acf..480e4870aa42 100644
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -707,6 +707,10 @@
+ #define MSR_IA32_PERF_CTL 0x00000199
+ #define INTEL_PERF_CTL_MASK 0xffff
+
++/* AMD Branch Sampling configuration */
++#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
++#define MSR_AMD_SAMP_BR_FROM 0xc0010300
++
+ #define MSR_IA32_MPERF 0x000000e7
+ #define MSR_IA32_APERF 0x000000e8
+
+--
+2.37.3
+