From 009b039b27a5e490402c6e20a7e069cb47993dc4 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 10 Jul 2019 11:06:37 -0500 Subject: [PATCH 2965/4256] drm/amdgpu: add multiple instances support for Arcturus Arcturus has dual-VCN. Need add multiple instances support for Arcturus. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 166 ++-- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1178 ++++++++++++----------- 3 files changed, 737 insertions(+), 627 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 8782a58570e2..4169f6936367 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -402,23 +402,29 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.inst[0].ring_dec.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.inst[0].ring_enc[i].sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + for (j = 0; j < adev->vcn.num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.inst[0].ring_jpeg.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.inst[i].ring_jpeg.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 330f355b93a9..5016fc570211 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -64,7 +64,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; - int r; + int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -145,12 +145,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[0].vcpu_bo, - &adev->vcn.inst[0].gpu_addr, &adev->vcn.inst[0].cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); - return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } } if (adev->vcn.indirect_sram) { @@ -168,26 +171,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { - int i; - - kvfree(adev->vcn.inst[0].saved_bo); + int i, j; if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); + &adev->vcn.dpg_sram_gpu_addr, + (void **)&adev->vcn.dpg_sram_cpu_addr); } - amdgpu_bo_free_kernel(&adev->vcn.inst[0].vcpu_bo, - &adev->vcn.inst[0].gpu_addr, - (void **)&adev->vcn.inst[0].cpu_addr); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + kvfree(adev->vcn.inst[j].saved_bo); - amdgpu_ring_fini(&adev->vcn.inst[0].ring_dec); + amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, + &adev->vcn.inst[j].gpu_addr, + (void **)&adev->vcn.inst[j].cpu_addr); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.inst[0].ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); - amdgpu_ring_fini(&adev->vcn.inst[0].ring_jpeg); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); + } release_firmware(adev->vcn.fw); @@ -198,21 +203,23 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.inst[0].vcpu_bo == NULL) - return 0; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo); - ptr = adev->vcn.inst[0].cpu_addr; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - adev->vcn.inst[0].saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.inst[0].saved_bo) - return -ENOMEM; - - memcpy_fromio(adev->vcn.inst[0].saved_bo, ptr, size); + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + } return 0; } @@ -220,32 +227,34 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->vcn.inst[0].vcpu_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo); - ptr = adev->vcn.inst[0].cpu_addr; - - if (adev->vcn.inst[0].saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.inst[0].saved_bo, size); - kvfree(adev->vcn.inst[0].saved_bo); - adev->vcn.inst[0].saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.inst[0].cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; + + if (adev->vcn.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); } - memset_io(ptr, 0, size); } - return 0; } @@ -253,31 +262,34 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned int fences = 0; - unsigned int i; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]); - } + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - struct dpg_pause_state new_state; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; - if (fences) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + if (fence[j]) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, &new_state); - } + adev->vcn.pause_dpg_mode(adev, &new_state); + } - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg); - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_dec); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); + fences += fence[j]; + } if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); @@ -311,14 +323,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } if (fences) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg)) + if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -344,7 +356,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(adev->vcn.inst[0].external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -352,7 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.inst[0].external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -663,7 +675,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(adev->vcn.inst[0].external.jpeg_pitch, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -673,7 +685,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -747,7 +759,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index e27351267c9e..b7dc069b637c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -48,6 +48,8 @@ #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); @@ -55,6 +57,11 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + /** * vcn_v2_5_early_init - set function pointers * @@ -65,8 +72,11 @@ static int vcn_v2_5_set_powergating_state(void *handle, static int vcn_v2_5_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) - adev->vcn.num_vcn_inst = 1; + adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; + else + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v2_5_set_dec_ring_funcs(adev); @@ -87,29 +97,31 @@ static int vcn_v2_5_early_init(void *handle) static int vcn_v2_5_sw_init(void *handle) { struct amdgpu_ring *ring; - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[0].irq); - if (r) - return r; + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); + if (r) + return r; + } - /* VCN ENC TRAP */ - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[0].irq); + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); if (r) return r; } - /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[0].irq); - if (r) - return r; - r = amdgpu_vcn_sw_init(adev); if (r) return r; @@ -121,6 +133,13 @@ static int vcn_v2_5_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } DRM_INFO("PSP loading VCN firmware\n"); } @@ -128,52 +147,54 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; - ring = &adev->vcn.inst[0].ring_dec; - ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; - sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); - if (r) - return r; - - adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; - adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; - adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; - adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; - - adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; - adev->vcn.inst[0].external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; - adev->vcn.inst[0].external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; - adev->vcn.inst[0].external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; - adev->vcn.inst[0].external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; - adev->vcn.inst[0].external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); - - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[0].ring_enc[i]; + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); + + ring = &adev->vcn.inst[j].ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; - sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; + sprintf(ring->name, "vcn_dec_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); if (r) return r; - } - ring = &adev->vcn.inst[0].ring_jpeg; - ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0); - if (r) - return r; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } - adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.inst[0].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + ring = &adev->vcn.inst[j].ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; + sprintf(ring->name, "vcn_jpeg_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } return 0; } @@ -209,36 +230,39 @@ static int vcn_v2_5_sw_fini(void *handle) static int vcn_v2_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; - int i, r; + struct amdgpu_ring *ring; + int i, j, r; - adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index, 0); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + ring = &adev->vcn.inst[j].ring_dec; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; - goto done; - } + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[0].ring_enc[i]; - ring->sched.ready = false; - continue; r = amdgpu_ring_test_ring(ring); if (r) { ring->sched.ready = false; goto done; } - } - ring = &adev->vcn.inst[0].ring_jpeg; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; - goto done; - } + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->sched.ready = false; + continue; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } + ring = &adev->vcn.inst[j].ring_jpeg; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } done: if (!r) DRM_INFO("VCN decode and encode initialized successfully.\n"); @@ -256,21 +280,25 @@ static int vcn_v2_5_hw_init(void *handle) static int vcn_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; + struct amdgpu_ring *ring; int i; - if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) - vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_dec; - ring->sched.ready = false; + if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[0].ring_enc[i]; ring->sched.ready = false; - } - ring = &adev->vcn.inst[0].ring_jpeg; - ring->sched.ready = false; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[i].ring_enc[i]; + ring->sched.ready = false; + } + + ring = &adev->vcn.inst[i].ring_jpeg; + ring->sched.ready = false; + } return 0; } @@ -328,44 +356,47 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); uint32_t offset; + int i; - /* cache window 0: fw */ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); - offset = 0; - } else { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[0].gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[0].gpu_addr)); - offset = size; - /* No signed header for now from firmware - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + /* No signed header for now from firmware + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + */ + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + } + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); } - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - - /* cache window 1: stack */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[0].gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[0].gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); - - /* cache window 2: context */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); } /** @@ -380,106 +411,109 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; int ret = 0; + int i; - /* UVD disable CGC */ - data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) - data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else - data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; - data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); - data &= ~(UVD_CGC_GATE__SYS_MASK - | UVD_CGC_GATE__UDEC_MASK - | UVD_CGC_GATE__MPEG2_MASK - | UVD_CGC_GATE__REGS_MASK - | UVD_CGC_GATE__RBC_MASK - | UVD_CGC_GATE__LMI_MC_MASK - | UVD_CGC_GATE__LMI_UMC_MASK - | UVD_CGC_GATE__IDCT_MASK - | UVD_CGC_GATE__MPRD_MASK - | UVD_CGC_GATE__MPC_MASK - | UVD_CGC_GATE__LBSI_MASK - | UVD_CGC_GATE__LRBBM_MASK - | UVD_CGC_GATE__UDEC_RE_MASK - | UVD_CGC_GATE__UDEC_CM_MASK - | UVD_CGC_GATE__UDEC_IT_MASK - | UVD_CGC_GATE__UDEC_DB_MASK - | UVD_CGC_GATE__UDEC_MP_MASK - | UVD_CGC_GATE__WCB_MASK - | UVD_CGC_GATE__VCPU_MASK - | UVD_CGC_GATE__MMSCH_MASK); - - WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); - - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); - - data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK - | UVD_CGC_CTRL__UDEC_CM_MODE_MASK - | UVD_CGC_CTRL__UDEC_IT_MODE_MASK - | UVD_CGC_CTRL__UDEC_DB_MODE_MASK - | UVD_CGC_CTRL__UDEC_MP_MODE_MASK - | UVD_CGC_CTRL__SYS_MODE_MASK - | UVD_CGC_CTRL__UDEC_MODE_MASK - | UVD_CGC_CTRL__MPEG2_MODE_MASK - | UVD_CGC_CTRL__REGS_MODE_MASK - | UVD_CGC_CTRL__RBC_MODE_MASK - | UVD_CGC_CTRL__LMI_MC_MODE_MASK - | UVD_CGC_CTRL__LMI_UMC_MODE_MASK - | UVD_CGC_CTRL__IDCT_MODE_MASK - | UVD_CGC_CTRL__MPRD_MODE_MASK - | UVD_CGC_CTRL__MPC_MODE_MASK - | UVD_CGC_CTRL__LBSI_MODE_MASK - | UVD_CGC_CTRL__LRBBM_MODE_MASK - | UVD_CGC_CTRL__WCB_MODE_MASK - | UVD_CGC_CTRL__VCPU_MODE_MASK - | UVD_CGC_CTRL__MMSCH_MODE_MASK); - WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); - - /* turn on */ - data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); - data |= (UVD_SUVD_CGC_GATE__SRE_MASK - | UVD_SUVD_CGC_GATE__SIT_MASK - | UVD_SUVD_CGC_GATE__SMP_MASK - | UVD_SUVD_CGC_GATE__SCM_MASK - | UVD_SUVD_CGC_GATE__SDB_MASK - | UVD_SUVD_CGC_GATE__SRE_H264_MASK - | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK - | UVD_SUVD_CGC_GATE__SIT_H264_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK - | UVD_SUVD_CGC_GATE__SCM_H264_MASK - | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK - | UVD_SUVD_CGC_GATE__SDB_H264_MASK - | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK - | UVD_SUVD_CGC_GATE__SCLR_MASK - | UVD_SUVD_CGC_GATE__UVD_SC_MASK - | UVD_SUVD_CGC_GATE__ENT_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK - | UVD_SUVD_CGC_GATE__SITE_MASK - | UVD_SUVD_CGC_GATE__SRE_VP9_MASK - | UVD_SUVD_CGC_GATE__SCM_VP9_MASK - | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK - | UVD_SUVD_CGC_GATE__SDB_VP9_MASK - | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); - WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); - - data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); - data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK - | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK - | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK - | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK - | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK - | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK - | UVD_SUVD_CGC_CTRL__IME_MODE_MASK - | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } } /** @@ -493,51 +527,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data = 0; + int i; - /* enable UVD CGC */ - data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) - data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else - data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK - | UVD_CGC_CTRL__UDEC_CM_MODE_MASK - | UVD_CGC_CTRL__UDEC_IT_MODE_MASK - | UVD_CGC_CTRL__UDEC_DB_MODE_MASK - | UVD_CGC_CTRL__UDEC_MP_MODE_MASK - | UVD_CGC_CTRL__SYS_MODE_MASK - | UVD_CGC_CTRL__UDEC_MODE_MASK - | UVD_CGC_CTRL__MPEG2_MODE_MASK - | UVD_CGC_CTRL__REGS_MODE_MASK - | UVD_CGC_CTRL__RBC_MODE_MASK - | UVD_CGC_CTRL__LMI_MC_MODE_MASK - | UVD_CGC_CTRL__LMI_UMC_MODE_MASK - | UVD_CGC_CTRL__IDCT_MODE_MASK - | UVD_CGC_CTRL__MPRD_MODE_MASK - | UVD_CGC_CTRL__MPC_MODE_MASK - | UVD_CGC_CTRL__LBSI_MODE_MASK - | UVD_CGC_CTRL__LRBBM_MODE_MASK - | UVD_CGC_CTRL__WCB_MODE_MASK - | UVD_CGC_CTRL__VCPU_MODE_MASK); - WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); - data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK - | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK - | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK - | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK - | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK - | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK - | UVD_SUVD_CGC_CTRL__IME_MODE_MASK - | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } } /** @@ -549,60 +586,64 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) */ static int jpeg_v2_5_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_jpeg; + struct amdgpu_ring *ring; uint32_t tmp; + int i; - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK - | JPEG_CGC_GATE__JPEG2_DEC_MASK - | JPEG_CGC_GATE__JMCIF_MASK - | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK - | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK - | JPEG_CGC_CTRL__JMCIF_MODE_MASK - | JPEG_CGC_CTRL__JRBBM_MODE_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); - - /* MJPEG global tiling registers */ - WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_jpeg; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + /* MJPEG global tiling registers */ + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); + } return 0; } @@ -617,185 +658,194 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev) static int jpeg_v2_5_stop(struct amdgpu_device *adev) { uint32_t tmp; + int i; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); - tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK - |JPEG_CGC_GATE__JPEG2_DEC_MASK - |JPEG_CGC_GATE__JMCIF_MASK - |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } return 0; } static int vcn_v2_5_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec; + struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; - int i, j, r; + int i, j, k, r; - /* disable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, - ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); - /* set uvd status busy */ - tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + } /*SW clock gating */ vcn_v2_5_disable_clock_gating(adev); - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* setup mmUVD_LMI_CTRL */ - tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL); - tmp &= ~0xff; - WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | 0x8| - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup mmUVD_MPC_CNTL */ - tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup mmUVD_MPC_SET_MUX */ - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + } vcn_v2_5_mc_resume(adev); - /* VCN global tiling registers */ - WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* VCN global tiling registers */ + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - for (i = 0; i < 10; ++i) { - uint32_t status; + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - for (j = 0; j < 100; ++j) { - status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + if (status & 2) + break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; if (status & 2) break; - if (amdgpu_emu_mode == 1) - msleep(500); - else - mdelay(10); - } - r = 0; - if (status & 2) - break; - - DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - r = -1; - } + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - if (r) { - DRM_ERROR("VCN decode not responding, giving up!!!\n"); - return r; - } + mdelay(10); + r = -1; + } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0); - - /* force RBC into idle state */ - rb_bufsz = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); - - /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); - - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - lower_32_bits(ring->wptr)); - ring = &adev->vcn.inst[0].ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - - ring = &adev->vcn.inst[0].ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } r = jpeg_v2_5_start(adev); return r; @@ -804,59 +854,61 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) static int vcn_v2_5_stop(struct amdgpu_device *adev) { uint32_t tmp; - int r; + int i, r; r = jpeg_v2_5_stop(adev); if (r) return r; - /* wait for vcn idle */ - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + /* wait for vcn idle */ + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); - if (r) - return r; + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; - /* block LMI UMC channel */ - tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp); + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); - if (r) - return r; + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* clear status */ - WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); - vcn_v2_5_enable_clock_gating(adev); + vcn_v2_5_enable_clock_gating(adev); - /* enable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, - ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } return 0; } @@ -872,7 +924,7 @@ static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); } /** @@ -889,7 +941,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); } /** @@ -907,7 +959,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } } @@ -952,10 +1004,10 @@ static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.inst[0].ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); } /** @@ -969,16 +1021,16 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.inst[0].ring_enc[0]) { + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); } else { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); } } @@ -993,19 +1045,19 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.inst[0].ring_enc[0]) { + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } else { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } } } @@ -1051,7 +1103,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); } /** @@ -1068,7 +1120,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); } /** @@ -1086,7 +1138,7 @@ static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); } } @@ -1122,40 +1174,62 @@ static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.inst[0].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; - DRM_INFO("VCN decode is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } } static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) { - int i; - - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.inst[0].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + int i, j; - DRM_INFO("VCN encode is enabled in VM mode\n"); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].me = j; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); + } } static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.inst[0].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; - DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; + adev->vcn.inst[i].ring_jpeg.me = i; + DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); + } } static bool vcn_v2_5_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } - return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); + return ret; } static int vcn_v2_5_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 0; + int i, ret = 0; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, - UVD_STATUS__IDLE, ret); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + if (ret) + return ret; + } return ret; } @@ -1209,20 +1283,34 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: - amdgpu_fence_process(&adev->vcn.inst[0].ring_dec); + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); break; case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: - amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); break; case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.inst[0].ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -1240,8 +1328,12 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.inst[0].irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.inst[0].irq.funcs = &vcn_v2_5_irq_funcs; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + } } static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { -- 2.17.1