aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch296
1 files changed, 296 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch
new file mode 100644
index 00000000..2d9ca24a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch
@@ -0,0 +1,296 @@
+From 6577a635e80832fdae2b2c4b7a68639203b77f7a Mon Sep 17 00:00:00 2001
+From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Date: Tue, 4 Jan 2022 08:51:16 -0800
+Subject: [PATCH 57/86] perf/x86/intel/lbr: Support LBR format V7
+
+commit 1ac7fd8159a842b3aa51f0b46a351fa3eeb8fbf3 upstream
+
+The Goldmont plus and Tremont have LBR format V7. The V7 has LBR_INFO,
+which is the same as LBR format V5. But V7 doesn't support TSX.
+
+Without the patch, the associated misprediction and cycles information
+in the LBR_INFO may be lost on a Goldmont plus platform.
+For Tremont, the patch only impacts the non-PEBS events. Because of the
+adaptive PEBS, the LBR_INFO is always processed for a PEBS event.
+
+Currently, two different ways are used to check the LBR capabilities,
+which make the codes complex and confusing.
+For the LBR format V4 and earlier, the global static lbr_desc array is
+used to store the flags for the LBR capabilities in each LBR format.
+For LBR format V5 and V6, the current code checks the version number
+for the LBR capabilities.
+
+There are common LBR capabilities among LBR format versions. Several
+flags for the LBR capabilities are introduced into the struct x86_pmu.
+The flags, which can be shared among LBR formats, are used to check
+the LBR capabilities. Add intel_pmu_lbr_init() to set the flags
+accordingly at boot time.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Kan Liang <kan.liang@linux.intel.com>
+Link: https://lkml.kernel.org/r/1641315077-96661-1-git-send-email-peterz@infradead.org
+Signed-off-by: Zhaolong Zhang <zhaolong.zhang@windriver.com>
+---
+ arch/x86/events/intel/core.c | 2 +
+ arch/x86/events/intel/lbr.c | 114 ++++++++++++++++++++---------------
+ arch/x86/events/perf_event.h | 10 ++-
+ 3 files changed, 75 insertions(+), 51 deletions(-)
+
+diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
+index 9918f0b08552..9089b13d1a9b 100644
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -6364,6 +6364,8 @@ __init int intel_pmu_init(void)
+ }
+
+ if (x86_pmu.lbr_nr) {
++ intel_pmu_lbr_init();
++
+ pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
+
+ /* only support branch_stack snapshot for perfmon >= v2 */
+diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
+index 513bf1f30c2a..f8e7dc02846d 100644
+--- a/arch/x86/events/intel/lbr.c
++++ b/arch/x86/events/intel/lbr.c
+@@ -8,14 +8,6 @@
+
+ #include "../perf_event.h"
+
+-static const enum {
+- LBR_EIP_FLAGS = 1,
+- LBR_TSX = 2,
+-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
+- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
+-};
+-
+ /*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ wrmsrl(x86_pmu.lbr_from + i, 0);
+ wrmsrl(x86_pmu.lbr_to + i, 0);
+- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
++ if (x86_pmu.lbr_has_info)
+ wrmsrl(x86_pmu.lbr_info + i, 0);
+ }
+ }
+@@ -303,11 +295,10 @@ enum {
+ */
+ static inline bool lbr_from_signext_quirk_needed(void)
+ {
+- int lbr_format = x86_pmu.intel_cap.lbr_format;
+ bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+ boot_cpu_has(X86_FEATURE_RTM);
+
+- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
++ return !tsx_support && x86_pmu.lbr_has_tsx;
+ }
+
+ static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+@@ -425,12 +416,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
+
+ void intel_pmu_lbr_restore(void *ctx)
+ {
+- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx = ctx;
+- int i;
+- unsigned lbr_idx, mask;
++ bool need_info = x86_pmu.lbr_has_info;
+ u64 tos = task_ctx->tos;
++ unsigned lbr_idx, mask;
++ int i;
+
+ mask = x86_pmu.lbr_nr - 1;
+ for (i = 0; i < task_ctx->valid_lbrs; i++) {
+@@ -442,7 +433,7 @@ void intel_pmu_lbr_restore(void *ctx)
+ lbr_idx = (tos - i) & mask;
+ wrlbr_from(lbr_idx, 0);
+ wrlbr_to(lbr_idx, 0);
+- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
++ if (need_info)
+ wrlbr_info(lbr_idx, 0);
+ }
+
+@@ -517,9 +508,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
+
+ void intel_pmu_lbr_save(void *ctx)
+ {
+- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_task_context *task_ctx = ctx;
++ bool need_info = x86_pmu.lbr_has_info;
+ unsigned lbr_idx, mask;
+ u64 tos;
+ int i;
+@@ -814,7 +805,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+ {
+ bool need_info = false, call_stack = false;
+ unsigned long mask = x86_pmu.lbr_nr - 1;
+- int lbr_format = x86_pmu.intel_cap.lbr_format;
+ u64 tos = intel_pmu_lbr_tos();
+ int i;
+ int out = 0;
+@@ -829,9 +819,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+ for (i = 0; i < num; i++) {
+ unsigned long lbr_idx = (tos - i) & mask;
+ u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+- int skip = 0;
+ u16 cycles = 0;
+- int lbr_flags = lbr_desc[lbr_format];
+
+ from = rdlbr_from(lbr_idx, NULL);
+ to = rdlbr_to(lbr_idx, NULL);
+@@ -843,37 +831,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+ if (call_stack && !from)
+ break;
+
+- if (lbr_format == LBR_FORMAT_INFO && need_info) {
+- u64 info;
+-
+- info = rdlbr_info(lbr_idx, NULL);
+- mis = !!(info & LBR_INFO_MISPRED);
+- pred = !mis;
+- in_tx = !!(info & LBR_INFO_IN_TX);
+- abort = !!(info & LBR_INFO_ABORT);
+- cycles = (info & LBR_INFO_CYCLES);
+- }
+-
+- if (lbr_format == LBR_FORMAT_TIME) {
+- mis = !!(from & LBR_FROM_FLAG_MISPRED);
+- pred = !mis;
+- skip = 1;
+- cycles = ((to >> 48) & LBR_INFO_CYCLES);
+-
+- to = (u64)((((s64)to) << 16) >> 16);
+- }
+-
+- if (lbr_flags & LBR_EIP_FLAGS) {
+- mis = !!(from & LBR_FROM_FLAG_MISPRED);
+- pred = !mis;
+- skip = 1;
+- }
+- if (lbr_flags & LBR_TSX) {
+- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+- abort = !!(from & LBR_FROM_FLAG_ABORT);
+- skip = 3;
++ if (x86_pmu.lbr_has_info) {
++ if (need_info) {
++ u64 info;
++
++ info = rdlbr_info(lbr_idx, NULL);
++ mis = !!(info & LBR_INFO_MISPRED);
++ pred = !mis;
++ cycles = (info & LBR_INFO_CYCLES);
++ if (x86_pmu.lbr_has_tsx) {
++ in_tx = !!(info & LBR_INFO_IN_TX);
++ abort = !!(info & LBR_INFO_ABORT);
++ }
++ }
++ } else {
++ int skip = 0;
++
++ if (x86_pmu.lbr_from_flags) {
++ mis = !!(from & LBR_FROM_FLAG_MISPRED);
++ pred = !mis;
++ skip = 1;
++ }
++ if (x86_pmu.lbr_has_tsx) {
++ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
++ abort = !!(from & LBR_FROM_FLAG_ABORT);
++ skip = 3;
++ }
++ from = (u64)((((s64)from) << skip) >> skip);
++
++ if (x86_pmu.lbr_to_cycles) {
++ cycles = ((to >> 48) & LBR_INFO_CYCLES);
++ to = (u64)((((s64)to) << 16) >> 16);
++ }
+ }
+- from = (u64)((((s64)from) << skip) >> skip);
+
+ /*
+ * Some CPUs report duplicated abort records,
+@@ -1126,7 +1116,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+
+ if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+ (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
++ x86_pmu.lbr_has_info)
+ reg->config |= LBR_NO_INFO;
+
+ return 0;
+@@ -1712,6 +1702,30 @@ void intel_pmu_lbr_init_knl(void)
+ x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
+ }
+
++void intel_pmu_lbr_init(void)
++{
++ switch (x86_pmu.intel_cap.lbr_format) {
++ case LBR_FORMAT_EIP_FLAGS2:
++ x86_pmu.lbr_has_tsx = 1;
++ fallthrough;
++ case LBR_FORMAT_EIP_FLAGS:
++ x86_pmu.lbr_from_flags = 1;
++ break;
++
++ case LBR_FORMAT_INFO:
++ x86_pmu.lbr_has_tsx = 1;
++ fallthrough;
++ case LBR_FORMAT_INFO2:
++ x86_pmu.lbr_has_info = 1;
++ break;
++
++ case LBR_FORMAT_TIME:
++ x86_pmu.lbr_from_flags = 1;
++ x86_pmu.lbr_to_cycles = 1;
++ break;
++ }
++}
++
+ /*
+ * LBR state size is variable based on the max number of registers.
+ * This calculates the expected state size, which should match
+diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
+index b48f3ab9125f..84d9bef41159 100644
+--- a/arch/x86/events/perf_event.h
++++ b/arch/x86/events/perf_event.h
+@@ -214,7 +214,8 @@ enum {
+ LBR_FORMAT_EIP_FLAGS2 = 0x04,
+ LBR_FORMAT_INFO = 0x05,
+ LBR_FORMAT_TIME = 0x06,
+- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
++ LBR_FORMAT_INFO2 = 0x07,
++ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
+ };
+
+ enum {
+@@ -838,6 +839,11 @@ struct x86_pmu {
+ bool lbr_double_abort; /* duplicated lbr aborts */
+ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+
++ unsigned int lbr_has_info:1;
++ unsigned int lbr_has_tsx:1;
++ unsigned int lbr_from_flags:1;
++ unsigned int lbr_to_cycles:1;
++
+ /*
+ * Intel Architectural LBR CPUID Enumeration
+ */
+@@ -1390,6 +1396,8 @@ void intel_pmu_lbr_init_skl(void);
+
+ void intel_pmu_lbr_init_knl(void);
+
++void intel_pmu_lbr_init(void);
++
+ void intel_pmu_arch_lbr_init(void);
+
+ void intel_pmu_pebs_data_source_nhm(void);
+--
+2.37.3
+