aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch197
1 files changed, 197 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch
new file mode 100644
index 00000000..8dfcef2b
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-5.15/0076-perf-amd-ibs-Add-support-for-L3-miss-filtering.patch
@@ -0,0 +1,197 @@
+From 9f9d4ab0ec570a5ca9d1057127ad26fdbd6cbf15 Mon Sep 17 00:00:00 2001
+From: Ravi Bangoria <ravi.bangoria@amd.com>
+Date: Mon, 9 May 2022 10:19:09 +0530
+Subject: [PATCH 76/86] perf/amd/ibs: Add support for L3 miss filtering
+
+commit ba5d35b442c65f32d38ef61f732218274c6dcf4c upstream
+
+IBS L3 miss filtering works by tagging an instruction on IBS counter
+overflow and generating an NMI if the tagged instruction causes an L3
+miss. Samples without an L3 miss are discarded and counter is reset
+with random value (between 1-15 for fetch pmu and 1-127 for op pmu).
+This helps in reducing sampling overhead when user is interested only
+in such samples. One of the use case of such filtered samples is to
+feed data to page-migration daemon in tiered memory systems.
+
+Add support for L3 miss filtering in IBS driver via new pmu attribute
+"l3missonly". Example usage:
+
+ # perf record -a -e ibs_op/l3missonly=1/ --raw-samples sleep 5
+
+Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220509044914.1473-4-ravi.bangoria@amd.com
+Signed-off-by: Zhaolong Zhang <zhaolong.zhang@windriver.com>
+---
+ arch/x86/events/amd/ibs.c | 67 +++++++++++++++++++++++++++----
+ arch/x86/include/asm/perf_event.h | 3 ++
+ 2 files changed, 63 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
+index ece4f6a7d24b..2dc8b7ec030a 100644
+--- a/arch/x86/events/amd/ibs.c
++++ b/arch/x86/events/amd/ibs.c
+@@ -544,22 +544,46 @@ static const struct attribute_group *empty_attr_groups[] = {
+
+ PMU_FORMAT_ATTR(rand_en, "config:57");
+ PMU_FORMAT_ATTR(cnt_ctl, "config:19");
++PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59");
++PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16");
++
++static umode_t
++zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i)
++{
++ return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0;
++}
+
+ static struct attribute *rand_en_attrs[] = {
+ &format_attr_rand_en.attr,
+ NULL,
+ };
+
++static struct attribute *fetch_l3missonly_attrs[] = {
++ &fetch_l3missonly.attr.attr,
++ NULL,
++};
++
+ static struct attribute_group group_rand_en = {
+ .name = "format",
+ .attrs = rand_en_attrs,
+ };
+
++static struct attribute_group group_fetch_l3missonly = {
++ .name = "format",
++ .attrs = fetch_l3missonly_attrs,
++ .is_visible = zen4_ibs_extensions_is_visible,
++};
++
+ static const struct attribute_group *fetch_attr_groups[] = {
+ &group_rand_en,
+ NULL,
+ };
+
++static const struct attribute_group *fetch_attr_update[] = {
++ &group_fetch_l3missonly,
++ NULL,
++};
++
+ static umode_t
+ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+ {
+@@ -571,14 +595,26 @@ static struct attribute *cnt_ctl_attrs[] = {
+ NULL,
+ };
+
++static struct attribute *op_l3missonly_attrs[] = {
++ &op_l3missonly.attr.attr,
++ NULL,
++};
++
+ static struct attribute_group group_cnt_ctl = {
+ .name = "format",
+ .attrs = cnt_ctl_attrs,
+ .is_visible = cnt_ctl_is_visible,
+ };
+
++static struct attribute_group group_op_l3missonly = {
++ .name = "format",
++ .attrs = op_l3missonly_attrs,
++ .is_visible = zen4_ibs_extensions_is_visible,
++};
++
+ static const struct attribute_group *op_attr_update[] = {
+ &group_cnt_ctl,
++ &group_op_l3missonly,
+ NULL,
+ };
+
+@@ -805,10 +841,8 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+ return ret;
+ }
+
+-static __init int perf_event_ibs_init(void)
++static __init int perf_ibs_fetch_init(void)
+ {
+- int ret;
+-
+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+ * they need a 0-1 transition of IbsFetchEn.
+@@ -819,12 +853,17 @@ static __init int perf_event_ibs_init(void)
+ if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10)
+ perf_ibs_fetch.fetch_ignore_if_zero_rip = 1;
+
++ if (ibs_caps & IBS_CAPS_ZEN4)
++ perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY;
++
+ perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups;
++ perf_ibs_fetch.pmu.attr_update = fetch_attr_update;
+
+- ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+- if (ret)
+- return ret;
++ return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
++}
+
++static __init int perf_ibs_op_init(void)
++{
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+
+@@ -834,10 +873,24 @@ static __init int perf_event_ibs_init(void)
+ perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ }
+
++ if (ibs_caps & IBS_CAPS_ZEN4)
++ perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY;
++
+ perf_ibs_op.pmu.attr_groups = empty_attr_groups;
+ perf_ibs_op.pmu.attr_update = op_attr_update;
+
+- ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
++ return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
++}
++
++static __init int perf_event_ibs_init(void)
++{
++ int ret;
++
++ ret = perf_ibs_fetch_init();
++ if (ret)
++ return ret;
++
++ ret = perf_ibs_op_init();
+ if (ret)
+ goto err_op;
+
+diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
+index 92d72a4db57b..686189d5f307 100644
+--- a/arch/x86/include/asm/perf_event.h
++++ b/arch/x86/include/asm/perf_event.h
+@@ -410,6 +410,7 @@ struct pebs_xmm {
+ #define IBS_CAPS_OPBRNFUSE (1U<<8)
+ #define IBS_CAPS_FETCHCTLEXTD (1U<<9)
+ #define IBS_CAPS_OPDATA4 (1U<<10)
++#define IBS_CAPS_ZEN4 (1U<<11)
+
+ #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
+ | IBS_CAPS_FETCHSAM \
+@@ -423,6 +424,7 @@ struct pebs_xmm {
+ #define IBSCTL_LVT_OFFSET_MASK 0x0F
+
+ /* IBS fetch bits/masks */
++#define IBS_FETCH_L3MISSONLY (1ULL<<59)
+ #define IBS_FETCH_RAND_EN (1ULL<<57)
+ #define IBS_FETCH_VAL (1ULL<<49)
+ #define IBS_FETCH_ENABLE (1ULL<<48)
+@@ -439,6 +441,7 @@ struct pebs_xmm {
+ #define IBS_OP_CNT_CTL (1ULL<<19)
+ #define IBS_OP_VAL (1ULL<<18)
+ #define IBS_OP_ENABLE (1ULL<<17)
++#define IBS_OP_L3MISSONLY (1ULL<<16)
+ #define IBS_OP_MAX_CNT 0x0000FFFFULL
+ #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+ #define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
+--
+2.37.3
+