diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch new file mode 100644 index 00000000..2d9ca24a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0057-perf-x86-intel-lbr-Support-LBR-format-V7.patch @@ -0,0 +1,296 @@ +From 6577a635e80832fdae2b2c4b7a68639203b77f7a Mon Sep 17 00:00:00 2001 +From: "Peter Zijlstra (Intel)" <peterz@infradead.org> +Date: Tue, 4 Jan 2022 08:51:16 -0800 +Subject: [PATCH 57/86] perf/x86/intel/lbr: Support LBR format V7 + +commit 1ac7fd8159a842b3aa51f0b46a351fa3eeb8fbf3 upstream + +The Goldmont plus and Tremont have LBR format V7. The V7 has LBR_INFO, +which is the same as LBR format V5. But V7 doesn't support TSX. + +Without the patch, the associated misprediction and cycles information +in the LBR_INFO may be lost on a Goldmont plus platform. +For Tremont, the patch only impacts the non-PEBS events. Because of the +adaptive PEBS, the LBR_INFO is always processed for a PEBS event. + +Currently, two different ways are used to check the LBR capabilities, +which make the codes complex and confusing. +For the LBR format V4 and earlier, the global static lbr_desc array is +used to store the flags for the LBR capabilities in each LBR format. +For LBR format V5 and V6, the current code checks the version number +for the LBR capabilities. + +There are common LBR capabilities among LBR format versions. Several +flags for the LBR capabilities are introduced into the struct x86_pmu. +The flags, which can be shared among LBR formats, are used to check +the LBR capabilities. Add intel_pmu_lbr_init() to set the flags +accordingly at boot time. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Kan Liang <kan.liang@linux.intel.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Tested-by: Kan Liang <kan.liang@linux.intel.com> +Link: https://lkml.kernel.org/r/1641315077-96661-1-git-send-email-peterz@infradead.org +Signed-off-by: Zhaolong Zhang <zhaolong.zhang@windriver.com> +--- + arch/x86/events/intel/core.c | 2 + + arch/x86/events/intel/lbr.c | 114 ++++++++++++++++++++--------------- + arch/x86/events/perf_event.h | 10 ++- + 3 files changed, 75 insertions(+), 51 deletions(-) + +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index 9918f0b08552..9089b13d1a9b 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -6364,6 +6364,8 @@ __init int intel_pmu_init(void) + } + + if (x86_pmu.lbr_nr) { ++ intel_pmu_lbr_init(); ++ + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + + /* only support branch_stack snapshot for perfmon >= v2 */ +diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c +index 513bf1f30c2a..f8e7dc02846d 100644 +--- a/arch/x86/events/intel/lbr.c ++++ b/arch/x86/events/intel/lbr.c +@@ -8,14 +8,6 @@ + + #include "../perf_event.h" + +-static const enum { +- LBR_EIP_FLAGS = 1, +- LBR_TSX = 2, +-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { +- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, +- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, +-}; +- + /* + * Intel LBR_SELECT bits + * Intel Vol3a, April 2011, Section 16.7 Table 16-10 +@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void) + for (i = 0; i < x86_pmu.lbr_nr; i++) { + wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr_to + i, 0); +- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) ++ if (x86_pmu.lbr_has_info) + wrmsrl(x86_pmu.lbr_info + i, 0); + } + } +@@ -303,11 +295,10 @@ enum { + */ + static inline bool lbr_from_signext_quirk_needed(void) + { +- int lbr_format = x86_pmu.intel_cap.lbr_format; + bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || + boot_cpu_has(X86_FEATURE_RTM); + +- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); ++ return !tsx_support && x86_pmu.lbr_has_tsx; + } + + static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); +@@ -425,12 +416,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) + + void intel_pmu_lbr_restore(void *ctx) + { +- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; +- int i; +- unsigned lbr_idx, mask; ++ bool need_info = x86_pmu.lbr_has_info; + u64 tos = task_ctx->tos; ++ unsigned lbr_idx, mask; ++ int i; + + mask = x86_pmu.lbr_nr - 1; + for (i = 0; i < task_ctx->valid_lbrs; i++) { +@@ -442,7 +433,7 @@ void intel_pmu_lbr_restore(void *ctx) + lbr_idx = (tos - i) & mask; + wrlbr_from(lbr_idx, 0); + wrlbr_to(lbr_idx, 0); +- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) ++ if (need_info) + wrlbr_info(lbr_idx, 0); + } + +@@ -517,9 +508,9 @@ static void __intel_pmu_lbr_restore(void *ctx) + + void intel_pmu_lbr_save(void *ctx) + { +- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx = ctx; ++ bool need_info = x86_pmu.lbr_has_info; + unsigned lbr_idx, mask; + u64 tos; + int i; +@@ -814,7 +805,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) + { + bool need_info = false, call_stack = false; + unsigned long mask = x86_pmu.lbr_nr - 1; +- int lbr_format = x86_pmu.intel_cap.lbr_format; + u64 tos = intel_pmu_lbr_tos(); + int i; + int out = 0; +@@ -829,9 +819,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) + for (i = 0; i < num; i++) { + unsigned long lbr_idx = (tos - i) & mask; + u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; +- int skip = 0; + u16 cycles = 0; +- int lbr_flags = lbr_desc[lbr_format]; + + from = rdlbr_from(lbr_idx, NULL); + to = rdlbr_to(lbr_idx, NULL); +@@ -843,37 +831,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) + if (call_stack && !from) + break; + +- if (lbr_format == LBR_FORMAT_INFO && need_info) { +- u64 info; +- +- info = rdlbr_info(lbr_idx, NULL); +- mis = !!(info & LBR_INFO_MISPRED); +- pred = !mis; +- in_tx = !!(info & LBR_INFO_IN_TX); +- abort = !!(info & LBR_INFO_ABORT); +- cycles = (info & LBR_INFO_CYCLES); +- } +- +- if (lbr_format == LBR_FORMAT_TIME) { +- mis = !!(from & LBR_FROM_FLAG_MISPRED); +- pred = !mis; +- skip = 1; +- cycles = ((to >> 48) & LBR_INFO_CYCLES); +- +- to = (u64)((((s64)to) << 16) >> 16); +- } +- +- if (lbr_flags & LBR_EIP_FLAGS) { +- mis = !!(from & LBR_FROM_FLAG_MISPRED); +- pred = !mis; +- skip = 1; +- } +- if (lbr_flags & LBR_TSX) { +- in_tx = !!(from & LBR_FROM_FLAG_IN_TX); +- abort = !!(from & LBR_FROM_FLAG_ABORT); +- skip = 3; ++ if (x86_pmu.lbr_has_info) { ++ if (need_info) { ++ u64 info; ++ ++ info = rdlbr_info(lbr_idx, NULL); ++ mis = !!(info & LBR_INFO_MISPRED); ++ pred = !mis; ++ cycles = (info & LBR_INFO_CYCLES); ++ if (x86_pmu.lbr_has_tsx) { ++ in_tx = !!(info & LBR_INFO_IN_TX); ++ abort = !!(info & LBR_INFO_ABORT); ++ } ++ } ++ } else { ++ int skip = 0; ++ ++ if (x86_pmu.lbr_from_flags) { ++ mis = !!(from & LBR_FROM_FLAG_MISPRED); ++ pred = !mis; ++ skip = 1; ++ } ++ if (x86_pmu.lbr_has_tsx) { ++ in_tx = !!(from & LBR_FROM_FLAG_IN_TX); ++ abort = !!(from & LBR_FROM_FLAG_ABORT); ++ skip = 3; ++ } ++ from = (u64)((((s64)from) << skip) >> skip); ++ ++ if (x86_pmu.lbr_to_cycles) { ++ cycles = ((to >> 48) & LBR_INFO_CYCLES); ++ to = (u64)((((s64)to) << 16) >> 16); ++ } + } +- from = (u64)((((s64)from) << skip) >> skip); + + /* + * Some CPUs report duplicated abort records, +@@ -1126,7 +1116,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) + + if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && + (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && +- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) ++ x86_pmu.lbr_has_info) + reg->config |= LBR_NO_INFO; + + return 0; +@@ -1712,6 +1702,30 @@ void intel_pmu_lbr_init_knl(void) + x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; + } + ++void intel_pmu_lbr_init(void) ++{ ++ switch (x86_pmu.intel_cap.lbr_format) { ++ case LBR_FORMAT_EIP_FLAGS2: ++ x86_pmu.lbr_has_tsx = 1; ++ fallthrough; ++ case LBR_FORMAT_EIP_FLAGS: ++ x86_pmu.lbr_from_flags = 1; ++ break; ++ ++ case LBR_FORMAT_INFO: ++ x86_pmu.lbr_has_tsx = 1; ++ fallthrough; ++ case LBR_FORMAT_INFO2: ++ x86_pmu.lbr_has_info = 1; ++ break; ++ ++ case LBR_FORMAT_TIME: ++ x86_pmu.lbr_from_flags = 1; ++ x86_pmu.lbr_to_cycles = 1; ++ break; ++ } ++} ++ + /* + * LBR state size is variable based on the max number of registers. + * This calculates the expected state size, which should match +diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h +index b48f3ab9125f..84d9bef41159 100644 +--- a/arch/x86/events/perf_event.h ++++ b/arch/x86/events/perf_event.h +@@ -214,7 +214,8 @@ enum { + LBR_FORMAT_EIP_FLAGS2 = 0x04, + LBR_FORMAT_INFO = 0x05, + LBR_FORMAT_TIME = 0x06, +- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, ++ LBR_FORMAT_INFO2 = 0x07, ++ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, + }; + + enum { +@@ -838,6 +839,11 @@ struct x86_pmu { + bool lbr_double_abort; /* duplicated lbr aborts */ + bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + ++ unsigned int lbr_has_info:1; ++ unsigned int lbr_has_tsx:1; ++ unsigned int lbr_from_flags:1; ++ unsigned int lbr_to_cycles:1; ++ + /* + * Intel Architectural LBR CPUID Enumeration + */ +@@ -1390,6 +1396,8 @@ void intel_pmu_lbr_init_skl(void); + + void intel_pmu_lbr_init_knl(void); + ++void intel_pmu_lbr_init(void); ++ + void intel_pmu_arch_lbr_init(void); + + void intel_pmu_pebs_data_source_nhm(void); +-- +2.37.3 + |