From 186c46dfbae2e7bc411fbb5de80322440472fd3c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 19 Jun 2019 23:37:59 -0400 Subject: [PATCH 2119/2940] drm/amdgpu: update df_v3_6 for xgmi perfmons add pmu attribute groups and structures for perf events. add sysfs to track available df perfmon counters fix overflow handling in perfmon counter reads. Change-Id: Ifaade7846ac08d6be0276e066d95731f63c63945 Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 17 ++ drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 436 ++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/df_v3_6.h | 19 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 + 4 files changed, 220 insertions(+), 256 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 92c0f91e262a..6666c88cd3e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -763,6 +763,7 @@ struct amd_powerplay { }; #define AMDGPU_RESET_MAGIC_NUM 64 +#define AMDGPU_MAX_DF_PERFMONS 16 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -998,6 +999,7 @@ struct amdgpu_device { long compute_timeout; uint64_t unique_id; + uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1237,4 +1239,19 @@ static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return #endif #include "amdgpu_object.h" + +/* used by df_v3_6.c and amdgpu_pmu.c */ +#define AMDGPU_PMU_ATTR(_name, _object) \ +static ssize_t \ +_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1); \ + return sprintf(page, _object "\n"); \ +} \ + \ +static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) + #endif + diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index dd2fb0a941a5..08780fa10f32 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -30,8 +30,104 @@ static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; +/* init df format attrs */ +AMDGPU_PMU_ATTR(event, "config:0-7"); +AMDGPU_PMU_ATTR(instance, "config:8-15"); +AMDGPU_PMU_ATTR(umask, "config:16-23"); + +/* df format attributes */ +static struct attribute *df_v3_6_format_attrs[] = { + &pmu_attr_event.attr, + &pmu_attr_instance.attr, + &pmu_attr_umask.attr, + NULL +}; + +/* df format attribute group */ +static struct attribute_group df_v3_6_format_attr_group = { + .name = "format", + .attrs = df_v3_6_format_attrs, +}; + +/* df event attrs */ +AMDGPU_PMU_ATTR(cake0_pcsout_txdata, + "event=0x7,instance=0x46,umask=0x2"); +AMDGPU_PMU_ATTR(cake1_pcsout_txdata, + "event=0x7,instance=0x47,umask=0x2"); +AMDGPU_PMU_ATTR(cake0_pcsout_txmeta, + "event=0x7,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_pcsout_txmeta, + "event=0x7,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc, + "event=0xb,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc, + "event=0xb,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc, + "event=0xb,instance=0x46,umask=0x8"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc, + "event=0xb,instance=0x47,umask=0x8"); + +/* df event attributes */ +static struct attribute *df_v3_6_event_attrs[] = { + &pmu_attr_cake0_pcsout_txdata.attr, + &pmu_attr_cake1_pcsout_txdata.attr, + &pmu_attr_cake0_pcsout_txmeta.attr, + &pmu_attr_cake1_pcsout_txmeta.attr, + &pmu_attr_cake0_ftiinstat_reqalloc.attr, + &pmu_attr_cake1_ftiinstat_reqalloc.attr, + &pmu_attr_cake0_ftiinstat_rspalloc.attr, + &pmu_attr_cake1_ftiinstat_rspalloc.attr, + NULL +}; + +/* df event attribute group */ +static struct attribute_group df_v3_6_event_attr_group = { + .name = "events", + .attrs = df_v3_6_event_attrs +}; + +/* df event attr groups */ +const struct attribute_group *df_v3_6_attr_groups[] = { + &df_v3_6_format_attr_group, + &df_v3_6_event_attr_group, + NULL +}; + +/* get the number of df counters available */ +static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev; + struct drm_device *ddev; + int i, count; + + ddev = dev_get_drvdata(dev); + adev = ddev->dev_private; + count = 0; + + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0) + count++; + } + + return snprintf(buf, PAGE_SIZE, "%i\n", count); +} + +/* device attr for available perfmon counters */ +static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); + +/* init perfmons */ static void df_v3_6_init(struct amdgpu_device *adev) { + int i, ret; + + ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail); + if (ret) + DRM_ERROR("failed to create file for available df counters\n"); + + for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) + adev->df_perfmon_config_assign_mask[i] = 0; } static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, @@ -105,28 +201,19 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } -/* hold counter assignment per gpu struct */ -struct df_v3_6_event_mask { - struct amdgpu_device gpu; - uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; -}; - /* get assigned df perfmon ctr as int */ -static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, - uint64_t config, - int *counter) +static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config) { - struct df_v3_6_event_mask *mask; int i; - mask = container_of(adev, struct df_v3_6_event_mask, gpu); - - for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { - if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { - *counter = i; - return; - } + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == + adev->df_perfmon_config_assign_mask[i]) + return i; } + + return -EINVAL; } /* get address based on counter assignment */ @@ -136,10 +223,7 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, uint32_t *lo_base_addr, uint32_t *hi_base_addr) { - - int target_cntr = -1; - - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr < 0) return; @@ -184,33 +268,29 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint32_t *lo_val, uint32_t *hi_val) { - - uint32_t eventsel, instance, unitmask; - uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; - df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); - if (lo_val == NULL || hi_val == NULL) - return -EINVAL; - if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { - DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", + DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x", *lo_base_addr, *hi_base_addr); return -ENXIO; } - eventsel = GET_EVENT(config); - instance = GET_INSTANCE(config); - unitmask = GET_UNITMASK(config); + if (lo_val && hi_val) { + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); - es_5_0 = eventsel & 0x3FUL; - es_13_6 = instance; - es_13_0 = (es_13_6 << 6) + es_5_0; - es_13_12 = (es_13_0 & 0x03000UL) >> 12; - es_11_8 = (es_13_0 & 0x0F00UL) >> 8; - es_7_0 = es_13_0 & 0x0FFUL; - *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); - *hi_val = (es_11_8 | ((es_13_12)<<(29))); + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; + + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; + *hi_val = (instance_76 << 29) | instance_5432; + } return 0; } @@ -220,26 +300,21 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, uint64_t config, int *is_assigned) { - - struct df_v3_6_event_mask *mask; int i, target_cntr; - target_cntr = -1; - *is_assigned = 0; - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr >= 0) { *is_assigned = 1; return 0; } - mask = container_of(adev, struct df_v3_6_event_mask, gpu); - - for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { - if (mask->config_assign_mask[i] == 0ULL) { - mask->config_assign_mask[i] = config & 0x0FFFFFFUL; + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0U) { + adev->df_perfmon_config_assign_mask[i] = + config & 0x0FFFFFFUL; return 0; } } @@ -251,66 +326,17 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, uint64_t config) { - - struct df_v3_6_event_mask *mask; - int target_cntr; - - target_cntr = -1; - - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); - - mask = container_of(adev, struct df_v3_6_event_mask, gpu); + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr >= 0) - mask->config_assign_mask[target_cntr] = 0ULL; - + adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL; } -/* - * get xgmi link counters via programmable data fabric (df) counters (max 4) - * using cake tx event. - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @count -> counters to pass - * - */ -static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - uint64_t *count) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - uint64_t config; - - config = GET_INSTANCE_CONFIG(instance); - - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, - &hi_base_addr); - - if ((lo_base_addr == 0) || (hi_base_addr == 0)) - return; - - lo_val = RREG32_PCIE(lo_base_addr); - hi_val = RREG32_PCIE(hi_base_addr); - - *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); -} - -/* - * reset xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * - */ -static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, - int instance) +static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, + uint64_t config) { uint32_t lo_base_addr, hi_base_addr; - uint64_t config; - - config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, &hi_base_addr); @@ -322,26 +348,13 @@ static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, WREG32_PCIE(hi_base_addr, 0UL); } -/* - * add xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * - */ -static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, - int instance) +static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, + uint64_t config) { uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - uint64_t config; int ret, is_assigned; - if (instance < 0 || instance > 1) - return -EINVAL; - - config = GET_INSTANCE_CONFIG(instance); - ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); if (ret || is_assigned) @@ -357,126 +370,46 @@ static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, if (ret) return ret; + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, lo_base_addr, hi_base_addr, lo_val, hi_val); + WREG32_PCIE(lo_base_addr, lo_val); WREG32_PCIE(hi_base_addr, hi_val); return ret; } - -/* - * start xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @is_enable -> either resume or assign event via df perfmon - * - */ - -static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - int is_enable) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val; - uint64_t config; - int ret; - - if (instance < 0 || instance > 1) - return -EINVAL; - - if (is_enable) { - - ret = df_v3_6_add_xgmi_link_cntr(adev, instance); - - if (ret) - return ret; - - } else { - - config = GET_INSTANCE_CONFIG(instance); - - df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - NULL, - NULL); - - if (lo_base_addr == 0) - return -EINVAL; - - lo_val = RREG32_PCIE(lo_base_addr); - - WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); - - ret = 0; - } - - return ret; - -} - -/* - * start xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @is_enable -> either pause or unassign event via df perfmon - * - */ - -static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - int is_disable) -{ - - uint32_t lo_base_addr, hi_base_addr, lo_val; - uint64_t config; - - config = GET_INSTANCE_CONFIG(instance); - - if (is_disable) { - df_v3_6_reset_xgmi_link_cntr(adev, instance); - df_v3_6_pmc_release_cntr(adev, config); - } else { - - df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - NULL, - NULL); - - if ((lo_base_addr == 0) || (hi_base_addr == 0)) - return -EINVAL; - - lo_val = RREG32_PCIE(lo_base_addr); - - WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); - } - - return 0; -} - static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, int is_enable) { - int xgmi_tx_link, ret = 0; + uint32_t lo_base_addr, hi_base_addr, lo_val; + int ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + df_v3_6_reset_perfmon_cntr(adev, config); - if (xgmi_tx_link >= 0) - ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, - is_enable); + if (is_enable) { + ret = df_v3_6_add_perfmon_cntr(adev, config); + } else { + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); - if (ret) - return ret; + if (ret) + return ret; - ret = 0; + lo_val = RREG32_PCIE(lo_base_addr); + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", + config, lo_base_addr, hi_base_addr, lo_val); + + WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + } break; default: @@ -489,25 +422,33 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, int is_disable) { - int xgmi_tx_link, ret; + uint32_t lo_base_addr, hi_base_addr, lo_val; + int ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if (ret) + return ret; + + lo_val = RREG32_PCIE(lo_base_addr); - if (xgmi_tx_link >= 0) { - ret = df_v3_6_stop_xgmi_link_cntr(adev, - xgmi_tx_link, - is_disable); - if (ret) - return ret; - } + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", + config, lo_base_addr, hi_base_addr, lo_val); + + WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); - ret = 0; + if (is_disable) + df_v3_6_pmc_release_cntr(adev, config); - break; + break; default: break; } @@ -519,25 +460,34 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, uint64_t config, uint64_t *count) { - - int xgmi_tx_link; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + *count = 0; switch (adev->asic_type) { case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); - if (xgmi_tx_link >= 0) { - df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); - df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); - } + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + lo_val = RREG32_PCIE(lo_base_addr); + hi_val = RREG32_PCIE(hi_base_addr); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); + + if (*count >= DF_V3_6_PERFMON_OVERFLOW) + *count = 0; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; + default: break; } - } const struct amdgpu_df_funcs df_v3_6_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index fcffd807764d..76998541bc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -36,22 +36,15 @@ enum DF_V3_6_MGCG { }; /* Defined in global_features.h as FTI_PERFMON_VISIBLE */ -#define AMDGPU_DF_MAX_COUNTERS 4 +#define DF_V3_6_MAX_COUNTERS 4 /* get flags from df perfmon config */ -#define GET_EVENT(x) (x & 0xFFUL) -#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL) -#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL) -#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \ - | ((0x046ULL + x) << 8) \ - | (0x02 << 16)) - -/* df event conf macros */ -#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ - && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) -#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ - && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) +#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL) +#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL +extern const struct attribute_group *df_v3_6_attr_groups[]; extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 78bd4fc07bab..7fee24ea7863 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1066,6 +1066,7 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; /* enable pcie gen2/3 link */ soc15_pcie_gen3_enable(adev); @@ -1079,6 +1080,9 @@ static int soc15_common_hw_init(void *handle) */ if (adev->nbio_funcs->remap_hdp_registers) adev->nbio_funcs->remap_hdp_registers(adev); + + adev->df_funcs->init(adev); + /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not -- 2.17.1