diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch | 1833 |
1 files changed, 1833 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch new file mode 100644 index 00000000..2f3ee5fa --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch @@ -0,0 +1,1833 @@ +From 009b039b27a5e490402c6e20a7e069cb47993dc4 Mon Sep 17 00:00:00 2001 +From: James Zhu <James.Zhu@amd.com> +Date: Wed, 10 Jul 2019 11:06:37 -0500 +Subject: [PATCH 2965/4256] drm/amdgpu: add multiple instances support for + Arcturus + +Arcturus has dual-VCN. Need add multiple instances support for Arcturus. + +Signed-off-by: James Zhu <James.Zhu@amd.com> +Reviewed-by: Leo Liu <leo.liu@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 166 ++-- + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1178 ++++++++++++----------- + 3 files changed, 737 insertions(+), 627 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 8782a58570e2..4169f6936367 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -402,23 +402,29 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, + break; + case AMDGPU_HW_IP_VCN_DEC: + type = AMD_IP_BLOCK_TYPE_VCN; +- if (adev->vcn.inst[0].ring_dec.sched.ready) +- ++num_rings; ++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { ++ if (adev->vcn.inst[i].ring_dec.sched.ready) ++ ++num_rings; ++ } + ib_start_alignment = 16; + ib_size_alignment = 16; + break; + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; +- for (i = 0; i < adev->vcn.num_enc_rings; i++) +- if (adev->vcn.inst[0].ring_enc[i].sched.ready) +- ++num_rings; ++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { ++ for (j = 0; j < adev->vcn.num_enc_rings; j++) ++ if (adev->vcn.inst[i].ring_enc[j].sched.ready) ++ ++num_rings; ++ } + ib_start_alignment = 64; + ib_size_alignment = 1; + break; + case AMDGPU_HW_IP_VCN_JPEG: + type = AMD_IP_BLOCK_TYPE_VCN; +- if (adev->vcn.inst[0].ring_jpeg.sched.ready) +- ++num_rings; ++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { ++ if (adev->vcn.inst[i].ring_jpeg.sched.ready) ++ ++num_rings; ++ } + ib_start_alignment = 16; + ib_size_alignment = 16; + break; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +index 330f355b93a9..5016fc570211 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +@@ -64,7 +64,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned char fw_check; +- int r; ++ int i, r; + + INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + +@@ -145,12 +145,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) + bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); +- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, +- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[0].vcpu_bo, +- &adev->vcn.inst[0].gpu_addr, &adev->vcn.inst[0].cpu_addr); +- if (r) { +- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); +- return r; ++ ++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) { ++ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, ++ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, ++ &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); ++ if (r) { ++ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); ++ return r; ++ } + } + + if (adev->vcn.indirect_sram) { +@@ -168,26 +171,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) + + int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) + { +- int i; +- +- kvfree(adev->vcn.inst[0].saved_bo); ++ int i, j; + + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, +- &adev->vcn.dpg_sram_gpu_addr, +- (void **)&adev->vcn.dpg_sram_cpu_addr); ++ &adev->vcn.dpg_sram_gpu_addr, ++ (void **)&adev->vcn.dpg_sram_cpu_addr); + } + +- amdgpu_bo_free_kernel(&adev->vcn.inst[0].vcpu_bo, +- &adev->vcn.inst[0].gpu_addr, +- (void **)&adev->vcn.inst[0].cpu_addr); ++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { ++ kvfree(adev->vcn.inst[j].saved_bo); + +- amdgpu_ring_fini(&adev->vcn.inst[0].ring_dec); ++ amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, ++ &adev->vcn.inst[j].gpu_addr, ++ (void **)&adev->vcn.inst[j].cpu_addr); + +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) +- amdgpu_ring_fini(&adev->vcn.inst[0].ring_enc[i]); ++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); + +- amdgpu_ring_fini(&adev->vcn.inst[0].ring_jpeg); ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) ++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); ++ ++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); ++ } + + release_firmware(adev->vcn.fw); + +@@ -198,21 +203,23 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) + { + unsigned size; + void *ptr; ++ int i; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + +- if (adev->vcn.inst[0].vcpu_bo == NULL) +- return 0; ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.inst[i].vcpu_bo == NULL) ++ return 0; + +- size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo); +- ptr = adev->vcn.inst[0].cpu_addr; ++ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); ++ ptr = adev->vcn.inst[i].cpu_addr; + +- adev->vcn.inst[0].saved_bo = kvmalloc(size, GFP_KERNEL); +- if (!adev->vcn.inst[0].saved_bo) +- return -ENOMEM; +- +- memcpy_fromio(adev->vcn.inst[0].saved_bo, ptr, size); ++ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); ++ if (!adev->vcn.inst[i].saved_bo) ++ return -ENOMEM; + ++ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); ++ } + return 0; + } + +@@ -220,32 +227,34 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) + { + unsigned size; + void *ptr; ++ int i; + +- if (adev->vcn.inst[0].vcpu_bo == NULL) +- return -EINVAL; +- +- size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo); +- ptr = adev->vcn.inst[0].cpu_addr; +- +- if (adev->vcn.inst[0].saved_bo != NULL) { +- memcpy_toio(ptr, adev->vcn.inst[0].saved_bo, size); +- kvfree(adev->vcn.inst[0].saved_bo); +- adev->vcn.inst[0].saved_bo = NULL; +- } else { +- const struct common_firmware_header *hdr; +- unsigned offset; +- +- hdr = (const struct common_firmware_header *)adev->vcn.fw->data; +- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { +- offset = le32_to_cpu(hdr->ucode_array_offset_bytes); +- memcpy_toio(adev->vcn.inst[0].cpu_addr, adev->vcn.fw->data + offset, +- le32_to_cpu(hdr->ucode_size_bytes)); +- size -= le32_to_cpu(hdr->ucode_size_bytes); +- ptr += le32_to_cpu(hdr->ucode_size_bytes); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ if (adev->vcn.inst[i].vcpu_bo == NULL) ++ return -EINVAL; ++ ++ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); ++ ptr = adev->vcn.inst[i].cpu_addr; ++ ++ if (adev->vcn.inst[i].saved_bo != NULL) { ++ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); ++ kvfree(adev->vcn.inst[i].saved_bo); ++ adev->vcn.inst[i].saved_bo = NULL; ++ } else { ++ const struct common_firmware_header *hdr; ++ unsigned offset; ++ ++ hdr = (const struct common_firmware_header *)adev->vcn.fw->data; ++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { ++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes); ++ memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, ++ le32_to_cpu(hdr->ucode_size_bytes)); ++ size -= le32_to_cpu(hdr->ucode_size_bytes); ++ ptr += le32_to_cpu(hdr->ucode_size_bytes); ++ } ++ memset_io(ptr, 0, size); + } +- memset_io(ptr, 0, size); + } +- + return 0; + } + +@@ -253,31 +262,34 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) + { + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); +- unsigned int fences = 0; +- unsigned int i; ++ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; ++ unsigned int i, j; + +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]); +- } ++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); ++ } + +- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { +- struct dpg_pause_state new_state; ++ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { ++ struct dpg_pause_state new_state; + +- if (fences) +- new_state.fw_based = VCN_DPG_STATE__PAUSE; +- else +- new_state.fw_based = VCN_DPG_STATE__UNPAUSE; ++ if (fence[j]) ++ new_state.fw_based = VCN_DPG_STATE__PAUSE; ++ else ++ new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + +- if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg)) +- new_state.jpeg = VCN_DPG_STATE__PAUSE; +- else +- new_state.jpeg = VCN_DPG_STATE__UNPAUSE; ++ if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) ++ new_state.jpeg = VCN_DPG_STATE__PAUSE; ++ else ++ new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + +- adev->vcn.pause_dpg_mode(adev, &new_state); +- } ++ adev->vcn.pause_dpg_mode(adev, &new_state); ++ } + +- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg); +- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_dec); ++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); ++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); ++ fences += fence[j]; ++ } + + if (fences == 0) { + amdgpu_gfx_off_ctrl(adev, true); +@@ -311,14 +323,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) + unsigned int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]); ++ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); + } + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + +- if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg)) ++ if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; +@@ -344,7 +356,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) + unsigned i; + int r; + +- WREG32(adev->vcn.inst[0].external.scratch9, 0xCAFEDEAD); ++ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; +@@ -352,7 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { +- tmp = RREG32(adev->vcn.inst[0].external.scratch9); ++ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); +@@ -663,7 +675,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) + unsigned i; + int r; + +- WREG32(adev->vcn.inst[0].external.jpeg_pitch, 0xCAFEDEAD); ++ WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; +@@ -673,7 +685,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { +- tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch); ++ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); +@@ -747,7 +759,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) + } + + for (i = 0; i < adev->usec_timeout; i++) { +- tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch); ++ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); +diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +index e27351267c9e..b7dc069b637c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c ++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +@@ -48,6 +48,8 @@ + + #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + ++#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 ++ + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); + static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); + static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); +@@ -55,6 +57,11 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); + static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + ++static int amdgpu_ih_clientid_vcns[] = { ++ SOC15_IH_CLIENTID_VCN, ++ SOC15_IH_CLIENTID_VCN1 ++}; ++ + /** + * vcn_v2_5_early_init - set function pointers + * +@@ -65,8 +72,11 @@ static int vcn_v2_5_set_powergating_state(void *handle, + static int vcn_v2_5_early_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ if (adev->asic_type == CHIP_ARCTURUS) + +- adev->vcn.num_vcn_inst = 1; ++ adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; ++ else ++ adev->vcn.num_vcn_inst = 1; + adev->vcn.num_enc_rings = 2; + + vcn_v2_5_set_dec_ring_funcs(adev); +@@ -87,29 +97,31 @@ static int vcn_v2_5_early_init(void *handle) + static int vcn_v2_5_sw_init(void *handle) + { + struct amdgpu_ring *ring; +- int i, r; ++ int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- /* VCN DEC TRAP */ +- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +- VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[0].irq); +- if (r) +- return r; ++ for (j = 0; j < adev->vcn.num_vcn_inst; j++) { ++ /* VCN DEC TRAP */ ++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], ++ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); ++ if (r) ++ return r; ++ ++ /* VCN ENC TRAP */ ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], ++ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); ++ if (r) ++ return r; ++ } + +- /* VCN ENC TRAP */ +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +- i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[0].irq); ++ /* VCN JPEG TRAP */ ++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], ++ VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + +- /* VCN JPEG TRAP */ +- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +- VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[0].irq); +- if (r) +- return r; +- + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; +@@ -121,6 +133,13 @@ static int vcn_v2_5_sw_init(void *handle) + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); ++ ++ if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { ++ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; ++ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; ++ adev->firmware.fw_size += ++ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); ++ } + DRM_INFO("PSP loading VCN firmware\n"); + } + +@@ -128,52 +147,54 @@ static int vcn_v2_5_sw_init(void *handle) + if (r) + return r; + +- ring = &adev->vcn.inst[0].ring_dec; +- ring->use_doorbell = true; +- ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; +- sprintf(ring->name, "vcn_dec"); +- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); +- if (r) +- return r; +- +- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; +- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; +- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; +- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; +- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; +- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; +- +- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); +- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); +- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); +- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); +- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); +- +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- ring = &adev->vcn.inst[0].ring_enc[i]; ++ for (j = 0; j < adev->vcn.num_vcn_inst; j++) { ++ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; ++ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; ++ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; ++ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; ++ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; ++ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; ++ ++ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); ++ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); ++ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); ++ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); ++ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); ++ ++ adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; ++ adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); ++ ++ ring = &adev->vcn.inst[j].ring_dec; + ring->use_doorbell = true; +- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; +- sprintf(ring->name, "vcn_enc%d", i); +- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); ++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; ++ sprintf(ring->name, "vcn_dec_%d", j); ++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; +- } + +- ring = &adev->vcn.inst[0].ring_jpeg; +- ring->use_doorbell = true; +- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; +- sprintf(ring->name, "vcn_jpeg"); +- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); +- if (r) +- return r; ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ ring = &adev->vcn.inst[j].ring_enc[i]; ++ ring->use_doorbell = true; ++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; ++ sprintf(ring->name, "vcn_enc_%d.%d", j, i); ++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); ++ if (r) ++ return r; ++ } + +- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; +- adev->vcn.inst[0].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); ++ ring = &adev->vcn.inst[j].ring_jpeg; ++ ring->use_doorbell = true; ++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; ++ sprintf(ring->name, "vcn_jpeg_%d", j); ++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); ++ if (r) ++ return r; ++ } + + return 0; + } +@@ -209,36 +230,39 @@ static int vcn_v2_5_sw_fini(void *handle) + static int vcn_v2_5_hw_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; +- int i, r; ++ struct amdgpu_ring *ring; ++ int i, j, r; + +- adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, +- ring->doorbell_index, 0); ++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { ++ ring = &adev->vcn.inst[j].ring_dec; + +- r = amdgpu_ring_test_ring(ring); +- if (r) { +- ring->sched.ready = false; +- goto done; +- } ++ adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, ++ ring->doorbell_index, j); + +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- ring = &adev->vcn.inst[0].ring_enc[i]; +- ring->sched.ready = false; +- continue; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } +- } + +- ring = &adev->vcn.inst[0].ring_jpeg; +- r = amdgpu_ring_test_ring(ring); +- if (r) { +- ring->sched.ready = false; +- goto done; +- } ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ ring = &adev->vcn.inst[j].ring_enc[i]; ++ ring->sched.ready = false; ++ continue; ++ r = amdgpu_ring_test_ring(ring); ++ if (r) { ++ ring->sched.ready = false; ++ goto done; ++ } ++ } + ++ ring = &adev->vcn.inst[j].ring_jpeg; ++ r = amdgpu_ring_test_ring(ring); ++ if (r) { ++ ring->sched.ready = false; ++ goto done; ++ } ++ } + done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); +@@ -256,21 +280,25 @@ static int vcn_v2_5_hw_init(void *handle) + static int vcn_v2_5_hw_fini(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; ++ struct amdgpu_ring *ring; + int i; + +- if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) +- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ ring = &adev->vcn.inst[i].ring_dec; + +- ring->sched.ready = false; ++ if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) ++ vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +- ring = &adev->vcn.inst[0].ring_enc[i]; + ring->sched.ready = false; +- } + +- ring = &adev->vcn.inst[0].ring_jpeg; +- ring->sched.ready = false; ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ ring = &adev->vcn.inst[i].ring_enc[i]; ++ ring->sched.ready = false; ++ } ++ ++ ring = &adev->vcn.inst[i].ring_jpeg; ++ ring->sched.ready = false; ++ } + + return 0; + } +@@ -328,44 +356,47 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) + { + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; ++ int i; + +- /* cache window 0: fw */ +- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); +- offset = 0; +- } else { +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +- lower_32_bits(adev->vcn.inst[0].gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +- upper_32_bits(adev->vcn.inst[0].gpu_addr)); +- offset = size; +- /* No signed header for now from firmware +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, +- AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +- */ +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* cache window 0: fw */ ++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, ++ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, ++ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); ++ offset = 0; ++ } else { ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, ++ lower_32_bits(adev->vcn.inst[i].gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, ++ upper_32_bits(adev->vcn.inst[i].gpu_addr)); ++ offset = size; ++ /* No signed header for now from firmware ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, ++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3); ++ */ ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); ++ } ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); ++ ++ /* cache window 1: stack */ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, ++ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, ++ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); ++ ++ /* cache window 2: context */ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, ++ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, ++ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); +- +- /* cache window 1: stack */ +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, +- lower_32_bits(adev->vcn.inst[0].gpu_addr + offset)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, +- upper_32_bits(adev->vcn.inst[0].gpu_addr + offset)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); +- +- /* cache window 2: context */ +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, +- lower_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, +- upper_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } + + /** +@@ -380,106 +411,109 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) + { + uint32_t data; + int ret = 0; ++ int i; + +- /* UVD disable CGC */ +- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); +- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) +- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +- else +- data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; +- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); +- +- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); +- data &= ~(UVD_CGC_GATE__SYS_MASK +- | UVD_CGC_GATE__UDEC_MASK +- | UVD_CGC_GATE__MPEG2_MASK +- | UVD_CGC_GATE__REGS_MASK +- | UVD_CGC_GATE__RBC_MASK +- | UVD_CGC_GATE__LMI_MC_MASK +- | UVD_CGC_GATE__LMI_UMC_MASK +- | UVD_CGC_GATE__IDCT_MASK +- | UVD_CGC_GATE__MPRD_MASK +- | UVD_CGC_GATE__MPC_MASK +- | UVD_CGC_GATE__LBSI_MASK +- | UVD_CGC_GATE__LRBBM_MASK +- | UVD_CGC_GATE__UDEC_RE_MASK +- | UVD_CGC_GATE__UDEC_CM_MASK +- | UVD_CGC_GATE__UDEC_IT_MASK +- | UVD_CGC_GATE__UDEC_DB_MASK +- | UVD_CGC_GATE__UDEC_MP_MASK +- | UVD_CGC_GATE__WCB_MASK +- | UVD_CGC_GATE__VCPU_MASK +- | UVD_CGC_GATE__MMSCH_MASK); +- +- WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); +- +- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); +- +- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); +- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK +- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK +- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK +- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK +- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK +- | UVD_CGC_CTRL__SYS_MODE_MASK +- | UVD_CGC_CTRL__UDEC_MODE_MASK +- | UVD_CGC_CTRL__MPEG2_MODE_MASK +- | UVD_CGC_CTRL__REGS_MODE_MASK +- | UVD_CGC_CTRL__RBC_MODE_MASK +- | UVD_CGC_CTRL__LMI_MC_MODE_MASK +- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK +- | UVD_CGC_CTRL__IDCT_MODE_MASK +- | UVD_CGC_CTRL__MPRD_MODE_MASK +- | UVD_CGC_CTRL__MPC_MODE_MASK +- | UVD_CGC_CTRL__LBSI_MODE_MASK +- | UVD_CGC_CTRL__LRBBM_MODE_MASK +- | UVD_CGC_CTRL__WCB_MODE_MASK +- | UVD_CGC_CTRL__VCPU_MODE_MASK +- | UVD_CGC_CTRL__MMSCH_MODE_MASK); +- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); +- +- /* turn on */ +- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); +- data |= (UVD_SUVD_CGC_GATE__SRE_MASK +- | UVD_SUVD_CGC_GATE__SIT_MASK +- | UVD_SUVD_CGC_GATE__SMP_MASK +- | UVD_SUVD_CGC_GATE__SCM_MASK +- | UVD_SUVD_CGC_GATE__SDB_MASK +- | UVD_SUVD_CGC_GATE__SRE_H264_MASK +- | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK +- | UVD_SUVD_CGC_GATE__SIT_H264_MASK +- | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK +- | UVD_SUVD_CGC_GATE__SCM_H264_MASK +- | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK +- | UVD_SUVD_CGC_GATE__SDB_H264_MASK +- | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK +- | UVD_SUVD_CGC_GATE__SCLR_MASK +- | UVD_SUVD_CGC_GATE__UVD_SC_MASK +- | UVD_SUVD_CGC_GATE__ENT_MASK +- | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK +- | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK +- | UVD_SUVD_CGC_GATE__SITE_MASK +- | UVD_SUVD_CGC_GATE__SRE_VP9_MASK +- | UVD_SUVD_CGC_GATE__SCM_VP9_MASK +- | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK +- | UVD_SUVD_CGC_GATE__SDB_VP9_MASK +- | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); +- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); +- +- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); +- data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK +- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK +- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK +- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); +- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* UVD disable CGC */ ++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); ++ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) ++ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; ++ else ++ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; ++ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; ++ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; ++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); ++ ++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); ++ data &= ~(UVD_CGC_GATE__SYS_MASK ++ | UVD_CGC_GATE__UDEC_MASK ++ | UVD_CGC_GATE__MPEG2_MASK ++ | UVD_CGC_GATE__REGS_MASK ++ | UVD_CGC_GATE__RBC_MASK ++ | UVD_CGC_GATE__LMI_MC_MASK ++ | UVD_CGC_GATE__LMI_UMC_MASK ++ | UVD_CGC_GATE__IDCT_MASK ++ | UVD_CGC_GATE__MPRD_MASK ++ | UVD_CGC_GATE__MPC_MASK ++ | UVD_CGC_GATE__LBSI_MASK ++ | UVD_CGC_GATE__LRBBM_MASK ++ | UVD_CGC_GATE__UDEC_RE_MASK ++ | UVD_CGC_GATE__UDEC_CM_MASK ++ | UVD_CGC_GATE__UDEC_IT_MASK ++ | UVD_CGC_GATE__UDEC_DB_MASK ++ | UVD_CGC_GATE__UDEC_MP_MASK ++ | UVD_CGC_GATE__WCB_MASK ++ | UVD_CGC_GATE__VCPU_MASK ++ | UVD_CGC_GATE__MMSCH_MASK); ++ ++ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); ++ ++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); ++ ++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); ++ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK ++ | UVD_CGC_CTRL__SYS_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_MODE_MASK ++ | UVD_CGC_CTRL__MPEG2_MODE_MASK ++ | UVD_CGC_CTRL__REGS_MODE_MASK ++ | UVD_CGC_CTRL__RBC_MODE_MASK ++ | UVD_CGC_CTRL__LMI_MC_MODE_MASK ++ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK ++ | UVD_CGC_CTRL__IDCT_MODE_MASK ++ | UVD_CGC_CTRL__MPRD_MODE_MASK ++ | UVD_CGC_CTRL__MPC_MODE_MASK ++ | UVD_CGC_CTRL__LBSI_MODE_MASK ++ | UVD_CGC_CTRL__LRBBM_MODE_MASK ++ | UVD_CGC_CTRL__WCB_MODE_MASK ++ | UVD_CGC_CTRL__VCPU_MODE_MASK ++ | UVD_CGC_CTRL__MMSCH_MODE_MASK); ++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); ++ ++ /* turn on */ ++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); ++ data |= (UVD_SUVD_CGC_GATE__SRE_MASK ++ | UVD_SUVD_CGC_GATE__SIT_MASK ++ | UVD_SUVD_CGC_GATE__SMP_MASK ++ | UVD_SUVD_CGC_GATE__SCM_MASK ++ | UVD_SUVD_CGC_GATE__SDB_MASK ++ | UVD_SUVD_CGC_GATE__SRE_H264_MASK ++ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK ++ | UVD_SUVD_CGC_GATE__SIT_H264_MASK ++ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK ++ | UVD_SUVD_CGC_GATE__SCM_H264_MASK ++ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK ++ | UVD_SUVD_CGC_GATE__SDB_H264_MASK ++ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK ++ | UVD_SUVD_CGC_GATE__SCLR_MASK ++ | UVD_SUVD_CGC_GATE__UVD_SC_MASK ++ | UVD_SUVD_CGC_GATE__ENT_MASK ++ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK ++ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK ++ | UVD_SUVD_CGC_GATE__SITE_MASK ++ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK ++ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK ++ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK ++ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK ++ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); ++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); ++ ++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); ++ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); ++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); ++ } + } + + /** +@@ -493,51 +527,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) + static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) + { + uint32_t data = 0; ++ int i; + +- /* enable UVD CGC */ +- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); +- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) +- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +- else +- data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); +- +- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); +- data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK +- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK +- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK +- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK +- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK +- | UVD_CGC_CTRL__SYS_MODE_MASK +- | UVD_CGC_CTRL__UDEC_MODE_MASK +- | UVD_CGC_CTRL__MPEG2_MODE_MASK +- | UVD_CGC_CTRL__REGS_MODE_MASK +- | UVD_CGC_CTRL__RBC_MODE_MASK +- | UVD_CGC_CTRL__LMI_MC_MODE_MASK +- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK +- | UVD_CGC_CTRL__IDCT_MODE_MASK +- | UVD_CGC_CTRL__MPRD_MODE_MASK +- | UVD_CGC_CTRL__MPC_MODE_MASK +- | UVD_CGC_CTRL__LBSI_MODE_MASK +- | UVD_CGC_CTRL__LRBBM_MODE_MASK +- | UVD_CGC_CTRL__WCB_MODE_MASK +- | UVD_CGC_CTRL__VCPU_MODE_MASK); +- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); +- +- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); +- data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK +- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK +- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK +- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK +- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); +- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* enable UVD CGC */ ++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); ++ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) ++ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; ++ else ++ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; ++ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; ++ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; ++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); ++ ++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); ++ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK ++ | UVD_CGC_CTRL__SYS_MODE_MASK ++ | UVD_CGC_CTRL__UDEC_MODE_MASK ++ | UVD_CGC_CTRL__MPEG2_MODE_MASK ++ | UVD_CGC_CTRL__REGS_MODE_MASK ++ | UVD_CGC_CTRL__RBC_MODE_MASK ++ | UVD_CGC_CTRL__LMI_MC_MODE_MASK ++ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK ++ | UVD_CGC_CTRL__IDCT_MODE_MASK ++ | UVD_CGC_CTRL__MPRD_MODE_MASK ++ | UVD_CGC_CTRL__MPC_MODE_MASK ++ | UVD_CGC_CTRL__LBSI_MODE_MASK ++ | UVD_CGC_CTRL__LRBBM_MODE_MASK ++ | UVD_CGC_CTRL__WCB_MODE_MASK ++ | UVD_CGC_CTRL__VCPU_MODE_MASK); ++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); ++ ++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); ++ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK ++ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); ++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); ++ } + } + + /** +@@ -549,60 +586,64 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) + */ + static int jpeg_v2_5_start(struct amdgpu_device *adev) + { +- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_jpeg; ++ struct amdgpu_ring *ring; + uint32_t tmp; ++ int i; + +- /* disable anti hang mechanism */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), 0, +- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +- +- /* JPEG disable CGC */ +- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); +- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); +- +- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); +- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK +- | JPEG_CGC_GATE__JPEG2_DEC_MASK +- | JPEG_CGC_GATE__JMCIF_MASK +- | JPEG_CGC_GATE__JRBBM_MASK); +- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); +- +- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); +- tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK +- | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK +- | JPEG_CGC_CTRL__JMCIF_MODE_MASK +- | JPEG_CGC_CTRL__JRBBM_MODE_MASK); +- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); +- +- /* MJPEG global tiling registers */ +- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX8_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- +- /* enable JMI channel */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, +- ~UVD_JMI_CNTL__SOFT_RESET_MASK); +- +- /* enable System Interrupt for JRBC */ +- WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), +- JPEG_SYS_INT_EN__DJRBC_MASK, +- ~JPEG_SYS_INT_EN__DJRBC_MASK); +- +- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, +- lower_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, +- upper_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); +- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ ring = &adev->vcn.inst[i].ring_jpeg; ++ /* disable anti hang mechanism */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, ++ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); ++ ++ /* JPEG disable CGC */ ++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); ++ tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; ++ tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; ++ tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; ++ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); ++ ++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); ++ tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK ++ | JPEG_CGC_GATE__JPEG2_DEC_MASK ++ | JPEG_CGC_GATE__JMCIF_MASK ++ | JPEG_CGC_GATE__JRBBM_MASK); ++ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); ++ ++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); ++ tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK ++ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK ++ | JPEG_CGC_CTRL__JMCIF_MODE_MASK ++ | JPEG_CGC_CTRL__JRBBM_MODE_MASK); ++ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); ++ ++ /* MJPEG global tiling registers */ ++ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ ++ /* enable JMI channel */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, ++ ~UVD_JMI_CNTL__SOFT_RESET_MASK); ++ ++ /* enable System Interrupt for JRBC */ ++ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), ++ JPEG_SYS_INT_EN__DJRBC_MASK, ++ ~JPEG_SYS_INT_EN__DJRBC_MASK); ++ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); ++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, ++ lower_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, ++ upper_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); ++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); ++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); ++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); ++ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); ++ } + + return 0; + } +@@ -617,185 +658,194 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev) + static int jpeg_v2_5_stop(struct amdgpu_device *adev) + { + uint32_t tmp; ++ int i; + +- /* reset JMI */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), +- UVD_JMI_CNTL__SOFT_RESET_MASK, +- ~UVD_JMI_CNTL__SOFT_RESET_MASK); +- +- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); +- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK +- |JPEG_CGC_GATE__JPEG2_DEC_MASK +- |JPEG_CGC_GATE__JMCIF_MASK +- |JPEG_CGC_GATE__JRBBM_MASK); +- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); +- +- /* enable anti hang mechanism */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), +- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, +- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* reset JMI */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), ++ UVD_JMI_CNTL__SOFT_RESET_MASK, ++ ~UVD_JMI_CNTL__SOFT_RESET_MASK); ++ ++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); ++ tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK ++ |JPEG_CGC_GATE__JPEG2_DEC_MASK ++ |JPEG_CGC_GATE__JMCIF_MASK ++ |JPEG_CGC_GATE__JRBBM_MASK); ++ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); ++ ++ /* enable anti hang mechanism */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), ++ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, ++ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); ++ } + + return 0; + } + + static int vcn_v2_5_start(struct amdgpu_device *adev) + { +- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; ++ struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; +- int i, j, r; ++ int i, j, k, r; + +- /* disable register anti-hang mechanism */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, +- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* disable register anti-hang mechanism */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, ++ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + +- /* set uvd status busy */ +- tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; +- WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); ++ /* set uvd status busy */ ++ tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; ++ WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); ++ } + + /*SW clock gating */ + vcn_v2_5_disable_clock_gating(adev); + +- /* enable VCPU clock */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), +- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); +- +- /* disable master interrupt */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, +- ~UVD_MASTINT_EN__VCPU_EN_MASK); +- +- /* setup mmUVD_LMI_CTRL */ +- tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL); +- tmp &= ~0xff; +- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | 0x8| +- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | +- UVD_LMI_CTRL__MASK_MC_URGENT_MASK | +- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | +- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); +- +- /* setup mmUVD_MPC_CNTL */ +- tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL); +- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; +- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; +- WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp); +- +- /* setup UVD_MPC_SET_MUXA0 */ +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, +- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | +- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | +- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | +- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); +- +- /* setup UVD_MPC_SET_MUXB0 */ +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, +- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | +- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | +- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | +- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); +- +- /* setup mmUVD_MPC_SET_MUX */ +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, +- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | +- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | +- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* enable VCPU clock */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), ++ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); ++ ++ /* disable master interrupt */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, ++ ~UVD_MASTINT_EN__VCPU_EN_MASK); ++ ++ /* setup mmUVD_LMI_CTRL */ ++ tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); ++ tmp &= ~0xff; ++ WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| ++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | ++ UVD_LMI_CTRL__MASK_MC_URGENT_MASK | ++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | ++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); ++ ++ /* setup mmUVD_MPC_CNTL */ ++ tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); ++ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; ++ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; ++ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); ++ ++ /* setup UVD_MPC_SET_MUXA0 */ ++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, ++ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | ++ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | ++ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | ++ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); ++ ++ /* setup UVD_MPC_SET_MUXB0 */ ++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, ++ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | ++ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | ++ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | ++ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); ++ ++ /* setup mmUVD_MPC_SET_MUX */ ++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, ++ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | ++ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | ++ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); ++ } + + vcn_v2_5_mc_resume(adev); + +- /* VCN global tiling registers */ +- WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* VCN global tiling registers */ ++ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); + +- /* enable LMI MC and UMC channels */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ /* enable LMI MC and UMC channels */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + +- /* unblock VCPU register access */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL), 0, +- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); ++ /* unblock VCPU register access */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, ++ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, +- ~UVD_VCPU_CNTL__BLK_RST_MASK); +- +- for (i = 0; i < 10; ++i) { +- uint32_t status; ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, ++ ~UVD_VCPU_CNTL__BLK_RST_MASK); + +- for (j = 0; j < 100; ++j) { +- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); ++ for (k = 0; k < 10; ++k) { ++ uint32_t status; ++ ++ for (j = 0; j < 100; ++j) { ++ status = RREG32_SOC15(UVD, i, mmUVD_STATUS); ++ if (status & 2) ++ break; ++ if (amdgpu_emu_mode == 1) ++ msleep(500); ++ else ++ mdelay(10); ++ } ++ r = 0; + if (status & 2) + break; +- if (amdgpu_emu_mode == 1) +- msleep(500); +- else +- mdelay(10); +- } +- r = 0; +- if (status & 2) +- break; +- +- DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), +- UVD_VCPU_CNTL__BLK_RST_MASK, +- ~UVD_VCPU_CNTL__BLK_RST_MASK); +- mdelay(10); +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, +- ~UVD_VCPU_CNTL__BLK_RST_MASK); + +- mdelay(10); +- r = -1; +- } ++ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), ++ UVD_VCPU_CNTL__BLK_RST_MASK, ++ ~UVD_VCPU_CNTL__BLK_RST_MASK); ++ mdelay(10); ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, ++ ~UVD_VCPU_CNTL__BLK_RST_MASK); + +- if (r) { +- DRM_ERROR("VCN decode not responding, giving up!!!\n"); +- return r; +- } ++ mdelay(10); ++ r = -1; ++ } + +- /* enable master interrupt */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), +- UVD_MASTINT_EN__VCPU_EN_MASK, +- ~UVD_MASTINT_EN__VCPU_EN_MASK); +- +- /* clear the busy bit of VCN_STATUS */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, +- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); +- +- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0); +- +- /* force RBC into idle state */ +- rb_bufsz = order_base_2(ring->ring_size); +- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); +- +- /* programm the RB_BASE for ring buffer */ +- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, +- lower_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, +- upper_32_bits(ring->gpu_addr)); +- +- /* Initialize the ring buffer's read and write pointers */ +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); +- +- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, +- lower_32_bits(ring->wptr)); +- ring = &adev->vcn.inst[0].ring_enc[0]; +- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); +- +- ring = &adev->vcn.inst[0].ring_enc[1]; +- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); ++ if (r) { ++ DRM_ERROR("VCN decode not responding, giving up!!!\n"); ++ return r; ++ } + ++ /* enable master interrupt */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), ++ UVD_MASTINT_EN__VCPU_EN_MASK, ++ ~UVD_MASTINT_EN__VCPU_EN_MASK); ++ ++ /* clear the busy bit of VCN_STATUS */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, ++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); ++ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); ++ ++ ring = &adev->vcn.inst[i].ring_dec; ++ /* force RBC into idle state */ ++ rb_bufsz = order_base_2(ring->ring_size); ++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); ++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); ++ ++ /* programm the RB_BASE for ring buffer */ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, ++ lower_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, ++ upper_32_bits(ring->gpu_addr)); ++ ++ /* Initialize the ring buffer's read and write pointers */ ++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); ++ ++ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); ++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, ++ lower_32_bits(ring->wptr)); ++ ring = &adev->vcn.inst[i].ring_enc[0]; ++ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); ++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); ++ ++ ring = &adev->vcn.inst[i].ring_enc[1]; ++ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); ++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); ++ } + r = jpeg_v2_5_start(adev); + + return r; +@@ -804,59 +854,61 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) + static int vcn_v2_5_stop(struct amdgpu_device *adev) + { + uint32_t tmp; +- int r; ++ int i, r; + + r = jpeg_v2_5_stop(adev); + if (r) + return r; + +- /* wait for vcn idle */ +- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); +- if (r) +- return r; ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ /* wait for vcn idle */ ++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); ++ if (r) ++ return r; + +- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | +- UVD_LMI_STATUS__READ_CLEAN_MASK | +- UVD_LMI_STATUS__WRITE_CLEAN_MASK | +- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; +- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); +- if (r) +- return r; ++ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | ++ UVD_LMI_STATUS__READ_CLEAN_MASK | ++ UVD_LMI_STATUS__WRITE_CLEAN_MASK | ++ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; ++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); ++ if (r) ++ return r; + +- /* block LMI UMC channel */ +- tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2); +- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; +- WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp); ++ /* block LMI UMC channel */ ++ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); ++ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; ++ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + +- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| +- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; +- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); +- if (r) +- return r; ++ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| ++ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; ++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); ++ if (r) ++ return r; + +- /* block VCPU register access */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL), +- UVD_RB_ARB_CTRL__VCPU_DIS_MASK, +- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); ++ /* block VCPU register access */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), ++ UVD_RB_ARB_CTRL__VCPU_DIS_MASK, ++ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + +- /* reset VCPU */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), +- UVD_VCPU_CNTL__BLK_RST_MASK, +- ~UVD_VCPU_CNTL__BLK_RST_MASK); ++ /* reset VCPU */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), ++ UVD_VCPU_CNTL__BLK_RST_MASK, ++ ~UVD_VCPU_CNTL__BLK_RST_MASK); + +- /* disable VCPU clock */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, +- ~(UVD_VCPU_CNTL__CLK_EN_MASK)); ++ /* disable VCPU clock */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, ++ ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + +- /* clear status */ +- WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); ++ /* clear status */ ++ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + +- vcn_v2_5_enable_clock_gating(adev); ++ vcn_v2_5_enable_clock_gating(adev); + +- /* enable register anti-hang mechanism */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), +- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, +- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); ++ /* enable register anti-hang mechanism */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), ++ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ++ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); ++ } + + return 0; + } +@@ -872,7 +924,7 @@ static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); + } + + /** +@@ -889,7 +941,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else +- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); + } + + /** +@@ -907,7 +959,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } + } + +@@ -952,10 +1004,10 @@ static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- if (ring == &adev->vcn.inst[0].ring_enc[0]) +- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); ++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else +- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); + } + + /** +@@ -969,16 +1021,16 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- if (ring == &adev->vcn.inst[0].ring_enc[0]) { ++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else +- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else +- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } + } + +@@ -993,19 +1045,19 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- if (ring == &adev->vcn.inst[0].ring_enc[0]) { ++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } + } +@@ -1051,7 +1103,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); + } + + /** +@@ -1068,7 +1120,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else +- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); + } + + /** +@@ -1086,7 +1138,7 @@ static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { +- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } + } + +@@ -1122,40 +1174,62 @@ static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { + + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) + { +- adev->vcn.inst[0].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; +- DRM_INFO("VCN decode is enabled in VM mode\n"); ++ int i; ++ ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; ++ adev->vcn.inst[i].ring_dec.me = i; ++ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); ++ } + } + + static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) + { +- int i; +- +- for (i = 0; i < adev->vcn.num_enc_rings; ++i) +- adev->vcn.inst[0].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; ++ int i, j; + +- DRM_INFO("VCN encode is enabled in VM mode\n"); ++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { ++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ++ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; ++ adev->vcn.inst[j].ring_enc[i].me = j; ++ } ++ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); ++ } + } + + static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) + { +- adev->vcn.inst[0].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; +- DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); ++ int i; ++ ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; ++ adev->vcn.inst[i].ring_jpeg.me = i; ++ DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); ++ } + } + + static bool vcn_v2_5_is_idle(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ int i, ret = 1; ++ ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); ++ } + +- return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); ++ return ret; + } + + static int vcn_v2_5_wait_for_idle(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- int ret = 0; ++ int i, ret = 0; + +- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, +- UVD_STATUS__IDLE, ret); ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, ++ UVD_STATUS__IDLE, ret); ++ if (ret) ++ return ret; ++ } + + return ret; + } +@@ -1209,20 +1283,34 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) + { ++ uint32_t ip_instance; ++ ++ switch (entry->client_id) { ++ case SOC15_IH_CLIENTID_VCN: ++ ip_instance = 0; ++ break; ++ case SOC15_IH_CLIENTID_VCN1: ++ ip_instance = 1; ++ break; ++ default: ++ DRM_ERROR("Unhandled client id: %d\n", entry->client_id); ++ return 0; ++ } ++ + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: +- amdgpu_fence_process(&adev->vcn.inst[0].ring_dec); ++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: +- amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[0]); ++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: +- amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[1]); ++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: +- amdgpu_fence_process(&adev->vcn.inst[0].ring_jpeg); ++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", +@@ -1240,8 +1328,12 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { + + static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) + { +- adev->vcn.inst[0].irq.num_types = adev->vcn.num_enc_rings + 2; +- adev->vcn.inst[0].irq.funcs = &vcn_v2_5_irq_funcs; ++ int i; ++ ++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { ++ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; ++ adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; ++ } + } + + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { +-- +2.17.1 + |