aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch338
1 files changed, 338 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch
new file mode 100644
index 00000000..880c1321
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3121-drm-amdgpu-add-perfmon-and-fica-atomics-for-df.patch
@@ -0,0 +1,338 @@
+From b5c4e0f89339081b9b6e8008b2652f90e6bef53d Mon Sep 17 00:00:00 2001
+From: Jonathan Kim <jonathan.kim@amd.com>
+Date: Tue, 9 Jul 2019 15:47:57 -0400
+Subject: [PATCH 3121/4256] drm/amdgpu: add perfmon and fica atomics for df
+
+adding perfmon and fica atomic operations to adhere to data fabrics finite
+state machine requirements for indirect register access.
+
+Change-Id: I36c8fbe8b2df2ee848ac5b3bb931557c5488cd13
+Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
+Reviewed-by: Kent Russell <Kent.Russell@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
+ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 202 +++++++++++++++++----------
+ 2 files changed, 128 insertions(+), 77 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index c3885b95727a..fcb92ec6b73d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -727,6 +727,9 @@ struct amdgpu_df_funcs {
+ int is_disable);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ uint64_t *count);
++ uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
++ void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
++ uint32_t ficadl_val, uint32_t ficadh_val);
+ };
+ /* Define the HW IP blocks will be used in driver , add more if necessary */
+ enum amd_hw_ip_block_type {
+diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+index 7e0e8cda31b7..47ba0b31a8a4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
++++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+@@ -93,6 +93,96 @@ const struct attribute_group *df_v3_6_attr_groups[] = {
+ NULL
+ };
+
++static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
++ uint32_t ficaa_val)
++{
++ unsigned long flags, address, data;
++ uint32_t ficadl_val, ficadh_val;
++
++ address = adev->nbio_funcs->get_pcie_index_offset(adev);
++ data = adev->nbio_funcs->get_pcie_data_offset(adev);
++
++ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
++ WREG32(data, ficaa_val);
++
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
++ ficadl_val = RREG32(data);
++
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
++ ficadh_val = RREG32(data);
++
++ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
++
++ return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
++}
++
++static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
++ uint32_t ficadl_val, uint32_t ficadh_val)
++{
++ unsigned long flags, address, data;
++
++ address = adev->nbio_funcs->get_pcie_index_offset(adev);
++ data = adev->nbio_funcs->get_pcie_data_offset(adev);
++
++ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
++ WREG32(data, ficaa_val);
++
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
++ WREG32(data, ficadl_val);
++
++ WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
++ WREG32(data, ficadh_val);
++
++ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
++}
++
++/*
++ * df_v3_6_perfmon_rreg - read perfmon lo and hi
++ *
++ * required to be atomic. no mmio method provided so subsequent reads for lo
++ * and hi require to preserve df finite state machine
++ */
++static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
++ uint32_t lo_addr, uint32_t *lo_val,
++ uint32_t hi_addr, uint32_t *hi_val)
++{
++ unsigned long flags, address, data;
++
++ address = adev->nbio_funcs->get_pcie_index_offset(adev);
++ data = adev->nbio_funcs->get_pcie_data_offset(adev);
++
++ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
++ WREG32(address, lo_addr);
++ *lo_val = RREG32(data);
++ WREG32(address, hi_addr);
++ *hi_val = RREG32(data);
++ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
++}
++
++/*
++ * df_v3_6_perfmon_wreg - write to perfmon lo and hi
++ *
++ * required to be atomic. no mmio method provided so subsequent reads after
++ * data writes cannot occur to preserve data fabrics finite state machine.
++ */
++static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
++ uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
++{
++ unsigned long flags, address, data;
++
++ address = adev->nbio_funcs->get_pcie_index_offset(adev);
++ data = adev->nbio_funcs->get_pcie_data_offset(adev);
++
++ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
++ WREG32(address, lo_addr);
++ WREG32(data, lo_val);
++ WREG32(address, hi_addr);
++ WREG32(data, hi_val);
++ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
++}
++
+ /* get the number of df counters available */
+ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
+ struct device_attribute *attr,
+@@ -268,6 +358,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+ uint32_t *lo_val,
+ uint32_t *hi_val)
+ {
++
++ uint32_t eventsel, instance, unitmask;
++ uint32_t instance_10, instance_5432, instance_76;
++
+ df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
+
+ if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+@@ -276,40 +370,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+ return -ENXIO;
+ }
+
+- if (lo_val && hi_val) {
+- uint32_t eventsel, instance, unitmask;
+- uint32_t instance_10, instance_5432, instance_76;
++ eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
++ unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
++ instance = DF_V3_6_GET_INSTANCE(config);
+
+- eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
+- unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
+- instance = DF_V3_6_GET_INSTANCE(config);
++ instance_10 = instance & 0x3;
++ instance_5432 = (instance >> 2) & 0xf;
++ instance_76 = (instance >> 6) & 0x3;
+
+- instance_10 = instance & 0x3;
+- instance_5432 = (instance >> 2) & 0xf;
+- instance_76 = (instance >> 6) & 0x3;
++ *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
++ *hi_val = (instance_76 << 29) | instance_5432;
+
+- *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
+- *hi_val = (instance_76 << 29) | instance_5432;
+- }
++ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
++ config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
+
+ return 0;
+ }
+
+-/* assign df performance counters for read */
+-static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
+- uint64_t config,
+- int *is_assigned)
++/* add df performance counters for read */
++static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
++ uint64_t config)
+ {
+ int i, target_cntr;
+
+- *is_assigned = 0;
+-
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+- if (target_cntr >= 0) {
+- *is_assigned = 1;
++ if (target_cntr >= 0)
+ return 0;
+- }
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if (adev->df_perfmon_config_assign_mask[i] == 0U) {
+@@ -344,45 +431,13 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+- WREG32_PCIE(lo_base_addr, 0UL);
+- WREG32_PCIE(hi_base_addr, 0UL);
+-}
+-
+-
+-static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
+- uint64_t config)
+-{
+- uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+- int ret, is_assigned;
+-
+- ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
+-
+- if (ret || is_assigned)
+- return ret;
+-
+- ret = df_v3_6_pmc_get_ctrl_settings(adev,
+- config,
+- &lo_base_addr,
+- &hi_base_addr,
+- &lo_val,
+- &hi_val);
+-
+- if (ret)
+- return ret;
+-
+- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+- config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+-
+- WREG32_PCIE(lo_base_addr, lo_val);
+- WREG32_PCIE(hi_base_addr, hi_val);
+-
+- return ret;
++ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+ }
+
+ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ int is_enable)
+ {
+- uint32_t lo_base_addr, hi_base_addr, lo_val;
++ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int ret = 0;
+
+ switch (adev->asic_type) {
+@@ -391,24 +446,20 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ df_v3_6_reset_perfmon_cntr(adev, config);
+
+ if (is_enable) {
+- ret = df_v3_6_add_perfmon_cntr(adev, config);
++ ret = df_v3_6_pmc_add_cntr(adev, config);
+ } else {
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+- NULL,
+- NULL);
++ &lo_val,
++ &hi_val);
+
+ if (ret)
+ return ret;
+
+- lo_val = RREG32_PCIE(lo_base_addr);
+-
+- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
+- config, lo_base_addr, hi_base_addr, lo_val);
+-
+- WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
++ df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
++ hi_base_addr, hi_val);
+ }
+
+ break;
+@@ -422,7 +473,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+ int is_disable)
+ {
+- uint32_t lo_base_addr, hi_base_addr, lo_val;
++ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int ret = 0;
+
+ switch (adev->asic_type) {
+@@ -432,18 +483,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+- NULL,
+- NULL);
++ &lo_val,
++ &hi_val);
+
+ if (ret)
+ return ret;
+
+- lo_val = RREG32_PCIE(lo_base_addr);
+-
+- DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
+- config, lo_base_addr, hi_base_addr, lo_val);
+-
+- WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
++ df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+
+ if (is_disable)
+ df_v3_6_pmc_release_cntr(adev, config);
+@@ -472,8 +518,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+- lo_val = RREG32_PCIE(lo_base_addr);
+- hi_val = RREG32_PCIE(hi_base_addr);
++ df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
++ hi_base_addr, &hi_val);
+
+ *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+
+@@ -481,7 +527,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+ *count = 0;
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+- config, lo_base_addr, hi_base_addr, lo_val, hi_val);
++ config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+
+ break;
+
+@@ -500,5 +546,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
+ .get_clockgating_state = df_v3_6_get_clockgating_state,
+ .pmc_start = df_v3_6_pmc_start,
+ .pmc_stop = df_v3_6_pmc_stop,
+- .pmc_get_count = df_v3_6_pmc_get_count
++ .pmc_get_count = df_v3_6_pmc_get_count,
++ .get_fica = df_v3_6_get_fica,
++ .set_fica = df_v3_6_set_fica
+ };
+--
+2.17.1
+