aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch1833
1 files changed, 1833 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch
new file mode 100644
index 00000000..2f3ee5fa
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2965-drm-amdgpu-add-multiple-instances-support-for-Arctur.patch
@@ -0,0 +1,1833 @@
+From 009b039b27a5e490402c6e20a7e069cb47993dc4 Mon Sep 17 00:00:00 2001
+From: James Zhu <James.Zhu@amd.com>
+Date: Wed, 10 Jul 2019 11:06:37 -0500
+Subject: [PATCH 2965/4256] drm/amdgpu: add multiple instances support for
+ Arcturus
+
+Arcturus has dual-VCN. Need add multiple instances support for Arcturus.
+
+Signed-off-by: James Zhu <James.Zhu@amd.com>
+Reviewed-by: Leo Liu <leo.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 166 ++--
+ drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1178 ++++++++++++-----------
+ 3 files changed, 737 insertions(+), 627 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 8782a58570e2..4169f6936367 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -402,23 +402,29 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+- if (adev->vcn.inst[0].ring_dec.sched.ready)
+- ++num_rings;
++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
++ if (adev->vcn.inst[i].ring_dec.sched.ready)
++ ++num_rings;
++ }
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+- for (i = 0; i < adev->vcn.num_enc_rings; i++)
+- if (adev->vcn.inst[0].ring_enc[i].sched.ready)
+- ++num_rings;
++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
++ for (j = 0; j < adev->vcn.num_enc_rings; j++)
++ if (adev->vcn.inst[i].ring_enc[j].sched.ready)
++ ++num_rings;
++ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+- if (adev->vcn.inst[0].ring_jpeg.sched.ready)
+- ++num_rings;
++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
++ if (adev->vcn.inst[i].ring_jpeg.sched.ready)
++ ++num_rings;
++ }
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+index 330f355b93a9..5016fc570211 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+@@ -64,7 +64,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+ const char *fw_name;
+ const struct common_firmware_header *hdr;
+ unsigned char fw_check;
+- int r;
++ int i, r;
+
+ INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+
+@@ -145,12 +145,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+ bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+- AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[0].vcpu_bo,
+- &adev->vcn.inst[0].gpu_addr, &adev->vcn.inst[0].cpu_addr);
+- if (r) {
+- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+- return r;
++
++ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
++ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
++ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
++ &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
++ if (r) {
++ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
++ return r;
++ }
+ }
+
+ if (adev->vcn.indirect_sram) {
+@@ -168,26 +171,28 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+
+ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+ {
+- int i;
+-
+- kvfree(adev->vcn.inst[0].saved_bo);
++ int i, j;
+
+ if (adev->vcn.indirect_sram) {
+ amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
+- &adev->vcn.dpg_sram_gpu_addr,
+- (void **)&adev->vcn.dpg_sram_cpu_addr);
++ &adev->vcn.dpg_sram_gpu_addr,
++ (void **)&adev->vcn.dpg_sram_cpu_addr);
+ }
+
+- amdgpu_bo_free_kernel(&adev->vcn.inst[0].vcpu_bo,
+- &adev->vcn.inst[0].gpu_addr,
+- (void **)&adev->vcn.inst[0].cpu_addr);
++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
++ kvfree(adev->vcn.inst[j].saved_bo);
+
+- amdgpu_ring_fini(&adev->vcn.inst[0].ring_dec);
++ amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
++ &adev->vcn.inst[j].gpu_addr,
++ (void **)&adev->vcn.inst[j].cpu_addr);
+
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+- amdgpu_ring_fini(&adev->vcn.inst[0].ring_enc[i]);
++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+
+- amdgpu_ring_fini(&adev->vcn.inst[0].ring_jpeg);
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
++
++ amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg);
++ }
+
+ release_firmware(adev->vcn.fw);
+
+@@ -198,21 +203,23 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+ {
+ unsigned size;
+ void *ptr;
++ int i;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+- if (adev->vcn.inst[0].vcpu_bo == NULL)
+- return 0;
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ if (adev->vcn.inst[i].vcpu_bo == NULL)
++ return 0;
+
+- size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo);
+- ptr = adev->vcn.inst[0].cpu_addr;
++ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
++ ptr = adev->vcn.inst[i].cpu_addr;
+
+- adev->vcn.inst[0].saved_bo = kvmalloc(size, GFP_KERNEL);
+- if (!adev->vcn.inst[0].saved_bo)
+- return -ENOMEM;
+-
+- memcpy_fromio(adev->vcn.inst[0].saved_bo, ptr, size);
++ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
++ if (!adev->vcn.inst[i].saved_bo)
++ return -ENOMEM;
+
++ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
++ }
+ return 0;
+ }
+
+@@ -220,32 +227,34 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
+ {
+ unsigned size;
+ void *ptr;
++ int i;
+
+- if (adev->vcn.inst[0].vcpu_bo == NULL)
+- return -EINVAL;
+-
+- size = amdgpu_bo_size(adev->vcn.inst[0].vcpu_bo);
+- ptr = adev->vcn.inst[0].cpu_addr;
+-
+- if (adev->vcn.inst[0].saved_bo != NULL) {
+- memcpy_toio(ptr, adev->vcn.inst[0].saved_bo, size);
+- kvfree(adev->vcn.inst[0].saved_bo);
+- adev->vcn.inst[0].saved_bo = NULL;
+- } else {
+- const struct common_firmware_header *hdr;
+- unsigned offset;
+-
+- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+- memcpy_toio(adev->vcn.inst[0].cpu_addr, adev->vcn.fw->data + offset,
+- le32_to_cpu(hdr->ucode_size_bytes));
+- size -= le32_to_cpu(hdr->ucode_size_bytes);
+- ptr += le32_to_cpu(hdr->ucode_size_bytes);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ if (adev->vcn.inst[i].vcpu_bo == NULL)
++ return -EINVAL;
++
++ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
++ ptr = adev->vcn.inst[i].cpu_addr;
++
++ if (adev->vcn.inst[i].saved_bo != NULL) {
++ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
++ kvfree(adev->vcn.inst[i].saved_bo);
++ adev->vcn.inst[i].saved_bo = NULL;
++ } else {
++ const struct common_firmware_header *hdr;
++ unsigned offset;
++
++ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
++ memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
++ le32_to_cpu(hdr->ucode_size_bytes));
++ size -= le32_to_cpu(hdr->ucode_size_bytes);
++ ptr += le32_to_cpu(hdr->ucode_size_bytes);
++ }
++ memset_io(ptr, 0, size);
+ }
+- memset_io(ptr, 0, size);
+ }
+-
+ return 0;
+ }
+
+@@ -253,31 +262,34 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+ {
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, vcn.idle_work.work);
+- unsigned int fences = 0;
+- unsigned int i;
++ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
++ unsigned int i, j;
+
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]);
+- }
++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
++ }
+
+- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+- struct dpg_pause_state new_state;
++ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
++ struct dpg_pause_state new_state;
+
+- if (fences)
+- new_state.fw_based = VCN_DPG_STATE__PAUSE;
+- else
+- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
++ if (fence[j])
++ new_state.fw_based = VCN_DPG_STATE__PAUSE;
++ else
++ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+- if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg))
+- new_state.jpeg = VCN_DPG_STATE__PAUSE;
+- else
+- new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
++ if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg))
++ new_state.jpeg = VCN_DPG_STATE__PAUSE;
++ else
++ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+- adev->vcn.pause_dpg_mode(adev, &new_state);
+- }
++ adev->vcn.pause_dpg_mode(adev, &new_state);
++ }
+
+- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg);
+- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_dec);
++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg);
++ fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
++ fences += fence[j];
++ }
+
+ if (fences == 0) {
+ amdgpu_gfx_off_ctrl(adev, true);
+@@ -311,14 +323,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+ unsigned int i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_enc[i]);
++ fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ }
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+- if (amdgpu_fence_count_emitted(&adev->vcn.inst[0].ring_jpeg))
++ if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+@@ -344,7 +356,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
+ unsigned i;
+ int r;
+
+- WREG32(adev->vcn.inst[0].external.scratch9, 0xCAFEDEAD);
++ WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+@@ -352,7 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+- tmp = RREG32(adev->vcn.inst[0].external.scratch9);
++ tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+@@ -663,7 +675,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
+ unsigned i;
+ int r;
+
+- WREG32(adev->vcn.inst[0].external.jpeg_pitch, 0xCAFEDEAD);
++ WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r)
+ return r;
+@@ -673,7 +685,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+- tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch);
++ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+@@ -747,7 +759,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+ }
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+- tmp = RREG32(adev->vcn.inst[0].external.jpeg_pitch);
++ tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+index e27351267c9e..b7dc069b637c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+@@ -48,6 +48,8 @@
+
+ #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
++#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
++
+ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
+ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
+ static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev);
+@@ -55,6 +57,11 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
+ static int vcn_v2_5_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+
++static int amdgpu_ih_clientid_vcns[] = {
++ SOC15_IH_CLIENTID_VCN,
++ SOC15_IH_CLIENTID_VCN1
++};
++
+ /**
+ * vcn_v2_5_early_init - set function pointers
+ *
+@@ -65,8 +72,11 @@ static int vcn_v2_5_set_powergating_state(void *handle,
+ static int vcn_v2_5_early_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ if (adev->asic_type == CHIP_ARCTURUS)
+
+- adev->vcn.num_vcn_inst = 1;
++ adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
++ else
++ adev->vcn.num_vcn_inst = 1;
+ adev->vcn.num_enc_rings = 2;
+
+ vcn_v2_5_set_dec_ring_funcs(adev);
+@@ -87,29 +97,31 @@ static int vcn_v2_5_early_init(void *handle)
+ static int vcn_v2_5_sw_init(void *handle)
+ {
+ struct amdgpu_ring *ring;
+- int i, r;
++ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- /* VCN DEC TRAP */
+- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+- VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[0].irq);
+- if (r)
+- return r;
++ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
++ /* VCN DEC TRAP */
++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
++ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
++ if (r)
++ return r;
++
++ /* VCN ENC TRAP */
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
++ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
++ if (r)
++ return r;
++ }
+
+- /* VCN ENC TRAP */
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+- i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[0].irq);
++ /* VCN JPEG TRAP */
++ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
++ VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq);
+ if (r)
+ return r;
+ }
+
+- /* VCN JPEG TRAP */
+- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+- VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[0].irq);
+- if (r)
+- return r;
+-
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+@@ -121,6 +133,13 @@ static int vcn_v2_5_sw_init(void *handle)
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
++
++ if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) {
++ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
++ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
++ adev->firmware.fw_size +=
++ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
++ }
+ DRM_INFO("PSP loading VCN firmware\n");
+ }
+
+@@ -128,52 +147,54 @@ static int vcn_v2_5_sw_init(void *handle)
+ if (r)
+ return r;
+
+- ring = &adev->vcn.inst[0].ring_dec;
+- ring->use_doorbell = true;
+- ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+- sprintf(ring->name, "vcn_dec");
+- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0);
+- if (r)
+- return r;
+-
+- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+-
+- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+-
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- ring = &adev->vcn.inst[0].ring_enc[i];
++ for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
++ adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
++ adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
++ adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
++ adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
++ adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
++ adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
++
++ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9);
++ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0);
++ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1);
++ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD);
++ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
++
++ adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
++ adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH);
++
++ ring = &adev->vcn.inst[j].ring_dec;
+ ring->use_doorbell = true;
+- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
+- sprintf(ring->name, "vcn_enc%d", i);
+- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0);
++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j;
++ sprintf(ring->name, "vcn_dec_%d", j);
++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+ if (r)
+ return r;
+- }
+
+- ring = &adev->vcn.inst[0].ring_jpeg;
+- ring->use_doorbell = true;
+- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+- sprintf(ring->name, "vcn_jpeg");
+- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0);
+- if (r)
+- return r;
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ ring = &adev->vcn.inst[j].ring_enc[i];
++ ring->use_doorbell = true;
++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j;
++ sprintf(ring->name, "vcn_enc_%d.%d", j, i);
++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
++ if (r)
++ return r;
++ }
+
+- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+- adev->vcn.inst[0].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
++ ring = &adev->vcn.inst[j].ring_jpeg;
++ ring->use_doorbell = true;
++ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j;
++ sprintf(ring->name, "vcn_jpeg_%d", j);
++ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
++ if (r)
++ return r;
++ }
+
+ return 0;
+ }
+@@ -209,36 +230,39 @@ static int vcn_v2_5_sw_fini(void *handle)
+ static int vcn_v2_5_hw_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec;
+- int i, r;
++ struct amdgpu_ring *ring;
++ int i, j, r;
+
+- adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+- ring->doorbell_index, 0);
++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
++ ring = &adev->vcn.inst[j].ring_dec;
+
+- r = amdgpu_ring_test_ring(ring);
+- if (r) {
+- ring->sched.ready = false;
+- goto done;
+- }
++ adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
++ ring->doorbell_index, j);
+
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- ring = &adev->vcn.inst[0].ring_enc[i];
+- ring->sched.ready = false;
+- continue;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ goto done;
+ }
+- }
+
+- ring = &adev->vcn.inst[0].ring_jpeg;
+- r = amdgpu_ring_test_ring(ring);
+- if (r) {
+- ring->sched.ready = false;
+- goto done;
+- }
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ ring = &adev->vcn.inst[j].ring_enc[i];
++ ring->sched.ready = false;
++ continue;
++ r = amdgpu_ring_test_ring(ring);
++ if (r) {
++ ring->sched.ready = false;
++ goto done;
++ }
++ }
+
++ ring = &adev->vcn.inst[j].ring_jpeg;
++ r = amdgpu_ring_test_ring(ring);
++ if (r) {
++ ring->sched.ready = false;
++ goto done;
++ }
++ }
+ done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully.\n");
+@@ -256,21 +280,25 @@ static int vcn_v2_5_hw_init(void *handle)
+ static int vcn_v2_5_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec;
++ struct amdgpu_ring *ring;
+ int i;
+
+- if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ ring = &adev->vcn.inst[i].ring_dec;
+
+- ring->sched.ready = false;
++ if (RREG32_SOC15(VCN, i, mmUVD_STATUS))
++ vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+- ring = &adev->vcn.inst[0].ring_enc[i];
+ ring->sched.ready = false;
+- }
+
+- ring = &adev->vcn.inst[0].ring_jpeg;
+- ring->sched.ready = false;
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ ring = &adev->vcn.inst[i].ring_enc[i];
++ ring->sched.ready = false;
++ }
++
++ ring = &adev->vcn.inst[i].ring_jpeg;
++ ring->sched.ready = false;
++ }
+
+ return 0;
+ }
+@@ -328,44 +356,47 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+ {
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
++ int i;
+
+- /* cache window 0: fw */
+- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+- offset = 0;
+- } else {
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+- lower_32_bits(adev->vcn.inst[0].gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+- upper_32_bits(adev->vcn.inst[0].gpu_addr));
+- offset = size;
+- /* No signed header for now from firmware
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+- */
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* cache window 0: fw */
++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
++ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
++ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
++ offset = 0;
++ } else {
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
++ lower_32_bits(adev->vcn.inst[i].gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
++ upper_32_bits(adev->vcn.inst[i].gpu_addr));
++ offset = size;
++ /* No signed header for now from firmware
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
++ */
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
++ }
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
++
++ /* cache window 1: stack */
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
++ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
++ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
++
++ /* cache window 2: context */
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
++ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
++ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+ }
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+-
+- /* cache window 1: stack */
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+- lower_32_bits(adev->vcn.inst[0].gpu_addr + offset));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+- upper_32_bits(adev->vcn.inst[0].gpu_addr + offset));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+-
+- /* cache window 2: context */
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+- lower_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+- upper_32_bits(adev->vcn.inst[0].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+ }
+
+ /**
+@@ -380,106 +411,109 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+ {
+ uint32_t data;
+ int ret = 0;
++ int i;
+
+- /* UVD disable CGC */
+- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+- else
+- data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+-
+- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
+- data &= ~(UVD_CGC_GATE__SYS_MASK
+- | UVD_CGC_GATE__UDEC_MASK
+- | UVD_CGC_GATE__MPEG2_MASK
+- | UVD_CGC_GATE__REGS_MASK
+- | UVD_CGC_GATE__RBC_MASK
+- | UVD_CGC_GATE__LMI_MC_MASK
+- | UVD_CGC_GATE__LMI_UMC_MASK
+- | UVD_CGC_GATE__IDCT_MASK
+- | UVD_CGC_GATE__MPRD_MASK
+- | UVD_CGC_GATE__MPC_MASK
+- | UVD_CGC_GATE__LBSI_MASK
+- | UVD_CGC_GATE__LRBBM_MASK
+- | UVD_CGC_GATE__UDEC_RE_MASK
+- | UVD_CGC_GATE__UDEC_CM_MASK
+- | UVD_CGC_GATE__UDEC_IT_MASK
+- | UVD_CGC_GATE__UDEC_DB_MASK
+- | UVD_CGC_GATE__UDEC_MP_MASK
+- | UVD_CGC_GATE__WCB_MASK
+- | UVD_CGC_GATE__VCPU_MASK
+- | UVD_CGC_GATE__MMSCH_MASK);
+-
+- WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
+-
+- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret);
+-
+- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+- | UVD_CGC_CTRL__SYS_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_MODE_MASK
+- | UVD_CGC_CTRL__MPEG2_MODE_MASK
+- | UVD_CGC_CTRL__REGS_MODE_MASK
+- | UVD_CGC_CTRL__RBC_MODE_MASK
+- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+- | UVD_CGC_CTRL__IDCT_MODE_MASK
+- | UVD_CGC_CTRL__MPRD_MODE_MASK
+- | UVD_CGC_CTRL__MPC_MODE_MASK
+- | UVD_CGC_CTRL__LBSI_MODE_MASK
+- | UVD_CGC_CTRL__LRBBM_MODE_MASK
+- | UVD_CGC_CTRL__WCB_MODE_MASK
+- | UVD_CGC_CTRL__VCPU_MODE_MASK
+- | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+-
+- /* turn on */
+- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
+- data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+- | UVD_SUVD_CGC_GATE__SIT_MASK
+- | UVD_SUVD_CGC_GATE__SMP_MASK
+- | UVD_SUVD_CGC_GATE__SCM_MASK
+- | UVD_SUVD_CGC_GATE__SDB_MASK
+- | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+- | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+- | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+- | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+- | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+- | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+- | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+- | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+- | UVD_SUVD_CGC_GATE__SCLR_MASK
+- | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+- | UVD_SUVD_CGC_GATE__ENT_MASK
+- | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+- | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+- | UVD_SUVD_CGC_GATE__SITE_MASK
+- | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+- | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+- | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+- | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+- | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
+-
+- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+- data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* UVD disable CGC */
++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
++ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
++ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
++ else
++ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
++ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
++ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
++
++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
++ data &= ~(UVD_CGC_GATE__SYS_MASK
++ | UVD_CGC_GATE__UDEC_MASK
++ | UVD_CGC_GATE__MPEG2_MASK
++ | UVD_CGC_GATE__REGS_MASK
++ | UVD_CGC_GATE__RBC_MASK
++ | UVD_CGC_GATE__LMI_MC_MASK
++ | UVD_CGC_GATE__LMI_UMC_MASK
++ | UVD_CGC_GATE__IDCT_MASK
++ | UVD_CGC_GATE__MPRD_MASK
++ | UVD_CGC_GATE__MPC_MASK
++ | UVD_CGC_GATE__LBSI_MASK
++ | UVD_CGC_GATE__LRBBM_MASK
++ | UVD_CGC_GATE__UDEC_RE_MASK
++ | UVD_CGC_GATE__UDEC_CM_MASK
++ | UVD_CGC_GATE__UDEC_IT_MASK
++ | UVD_CGC_GATE__UDEC_DB_MASK
++ | UVD_CGC_GATE__UDEC_MP_MASK
++ | UVD_CGC_GATE__WCB_MASK
++ | UVD_CGC_GATE__VCPU_MASK
++ | UVD_CGC_GATE__MMSCH_MASK);
++
++ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
++
++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret);
++
++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
++ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
++ | UVD_CGC_CTRL__SYS_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_MODE_MASK
++ | UVD_CGC_CTRL__MPEG2_MODE_MASK
++ | UVD_CGC_CTRL__REGS_MODE_MASK
++ | UVD_CGC_CTRL__RBC_MODE_MASK
++ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
++ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
++ | UVD_CGC_CTRL__IDCT_MODE_MASK
++ | UVD_CGC_CTRL__MPRD_MODE_MASK
++ | UVD_CGC_CTRL__MPC_MODE_MASK
++ | UVD_CGC_CTRL__LBSI_MODE_MASK
++ | UVD_CGC_CTRL__LRBBM_MODE_MASK
++ | UVD_CGC_CTRL__WCB_MODE_MASK
++ | UVD_CGC_CTRL__VCPU_MODE_MASK
++ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
++
++ /* turn on */
++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
++ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
++ | UVD_SUVD_CGC_GATE__SIT_MASK
++ | UVD_SUVD_CGC_GATE__SMP_MASK
++ | UVD_SUVD_CGC_GATE__SCM_MASK
++ | UVD_SUVD_CGC_GATE__SDB_MASK
++ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
++ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
++ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
++ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
++ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
++ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
++ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
++ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
++ | UVD_SUVD_CGC_GATE__SCLR_MASK
++ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
++ | UVD_SUVD_CGC_GATE__ENT_MASK
++ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
++ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
++ | UVD_SUVD_CGC_GATE__SITE_MASK
++ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
++ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
++ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
++ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
++ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
++
++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
++ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
++ }
+ }
+
+ /**
+@@ -493,51 +527,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+ {
+ uint32_t data = 0;
++ int i;
+
+- /* enable UVD CGC */
+- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+- else
+- data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+-
+- data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+- data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+- | UVD_CGC_CTRL__SYS_MODE_MASK
+- | UVD_CGC_CTRL__UDEC_MODE_MASK
+- | UVD_CGC_CTRL__MPEG2_MODE_MASK
+- | UVD_CGC_CTRL__REGS_MODE_MASK
+- | UVD_CGC_CTRL__RBC_MODE_MASK
+- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+- | UVD_CGC_CTRL__IDCT_MODE_MASK
+- | UVD_CGC_CTRL__MPRD_MODE_MASK
+- | UVD_CGC_CTRL__MPC_MODE_MASK
+- | UVD_CGC_CTRL__LBSI_MODE_MASK
+- | UVD_CGC_CTRL__LRBBM_MODE_MASK
+- | UVD_CGC_CTRL__WCB_MODE_MASK
+- | UVD_CGC_CTRL__VCPU_MODE_MASK);
+- WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+-
+- data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+- data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+- WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* enable UVD CGC */
++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
++ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
++ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
++ else
++ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
++ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
++ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
++
++ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
++ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
++ | UVD_CGC_CTRL__SYS_MODE_MASK
++ | UVD_CGC_CTRL__UDEC_MODE_MASK
++ | UVD_CGC_CTRL__MPEG2_MODE_MASK
++ | UVD_CGC_CTRL__REGS_MODE_MASK
++ | UVD_CGC_CTRL__RBC_MODE_MASK
++ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
++ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
++ | UVD_CGC_CTRL__IDCT_MODE_MASK
++ | UVD_CGC_CTRL__MPRD_MODE_MASK
++ | UVD_CGC_CTRL__MPC_MODE_MASK
++ | UVD_CGC_CTRL__LBSI_MODE_MASK
++ | UVD_CGC_CTRL__LRBBM_MODE_MASK
++ | UVD_CGC_CTRL__WCB_MODE_MASK
++ | UVD_CGC_CTRL__VCPU_MODE_MASK);
++ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
++
++ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
++ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
++ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
++ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
++ }
+ }
+
+ /**
+@@ -549,60 +586,64 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+ */
+ static int jpeg_v2_5_start(struct amdgpu_device *adev)
+ {
+- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_jpeg;
++ struct amdgpu_ring *ring;
+ uint32_t tmp;
++ int i;
+
+- /* disable anti hang mechanism */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), 0,
+- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+-
+- /* JPEG disable CGC */
+- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
+-
+- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+- | JPEG_CGC_GATE__JPEG2_DEC_MASK
+- | JPEG_CGC_GATE__JMCIF_MASK
+- | JPEG_CGC_GATE__JRBBM_MASK);
+- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
+-
+- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+- tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
+- | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
+- | JPEG_CGC_CTRL__JMCIF_MODE_MASK
+- | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
+- WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
+-
+- /* MJPEG global tiling registers */
+- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+- WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+-
+- /* enable JMI channel */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0,
+- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+-
+- /* enable System Interrupt for JRBC */
+- WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN),
+- JPEG_SYS_INT_EN__DJRBC_MASK,
+- ~JPEG_SYS_INT_EN__DJRBC_MASK);
+-
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+- lower_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+- upper_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ ring = &adev->vcn.inst[i].ring_jpeg;
++ /* disable anti hang mechanism */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0,
++ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
++
++ /* JPEG disable CGC */
++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
++ tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
++ tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
++ tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
++ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
++
++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
++ tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
++ | JPEG_CGC_GATE__JPEG2_DEC_MASK
++ | JPEG_CGC_GATE__JMCIF_MASK
++ | JPEG_CGC_GATE__JRBBM_MASK);
++ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
++
++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
++ tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
++ | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
++ | JPEG_CGC_CTRL__JMCIF_MODE_MASK
++ | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
++ WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
++
++ /* MJPEG global tiling registers */
++ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++ WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++
++ /* enable JMI channel */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0,
++ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
++
++ /* enable System Interrupt for JRBC */
++ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN),
++ JPEG_SYS_INT_EN__DJRBC_MASK,
++ ~JPEG_SYS_INT_EN__DJRBC_MASK);
++
++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0);
++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
++ lower_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
++ upper_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0);
++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0);
++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
++ WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
++ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR);
++ }
+
+ return 0;
+ }
+@@ -617,185 +658,194 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev)
+ static int jpeg_v2_5_stop(struct amdgpu_device *adev)
+ {
+ uint32_t tmp;
++ int i;
+
+- /* reset JMI */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL),
+- UVD_JMI_CNTL__SOFT_RESET_MASK,
+- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+-
+- tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+- |JPEG_CGC_GATE__JPEG2_DEC_MASK
+- |JPEG_CGC_GATE__JMCIF_MASK
+- |JPEG_CGC_GATE__JRBBM_MASK);
+- WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
+-
+- /* enable anti hang mechanism */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS),
+- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* reset JMI */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL),
++ UVD_JMI_CNTL__SOFT_RESET_MASK,
++ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
++
++ tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
++ tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
++ |JPEG_CGC_GATE__JPEG2_DEC_MASK
++ |JPEG_CGC_GATE__JMCIF_MASK
++ |JPEG_CGC_GATE__JRBBM_MASK);
++ WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
++
++ /* enable anti hang mechanism */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS),
++ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
++ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
++ }
+
+ return 0;
+ }
+
+ static int vcn_v2_5_start(struct amdgpu_device *adev)
+ {
+- struct amdgpu_ring *ring = &adev->vcn.inst[0].ring_dec;
++ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+- int i, j, r;
++ int i, j, k, r;
+
+- /* disable register anti-hang mechanism */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* disable register anti-hang mechanism */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
++ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+- /* set uvd status busy */
+- tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+- WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
++ /* set uvd status busy */
++ tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
++ WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp);
++ }
+
+ /*SW clock gating */
+ vcn_v2_5_disable_clock_gating(adev);
+
+- /* enable VCPU clock */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+-
+- /* disable master interrupt */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
+- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+-
+- /* setup mmUVD_LMI_CTRL */
+- tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
+- tmp &= ~0xff;
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | 0x8|
+- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+-
+- /* setup mmUVD_MPC_CNTL */
+- tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
+- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+- WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp);
+-
+- /* setup UVD_MPC_SET_MUXA0 */
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
+- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+-
+- /* setup UVD_MPC_SET_MUXB0 */
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
+- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+-
+- /* setup mmUVD_MPC_SET_MUX */
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
+- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* enable VCPU clock */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
++ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
++
++ /* disable master interrupt */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0,
++ ~UVD_MASTINT_EN__VCPU_EN_MASK);
++
++ /* setup mmUVD_LMI_CTRL */
++ tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL);
++ tmp &= ~0xff;
++ WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8|
++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
++ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
++
++ /* setup mmUVD_MPC_CNTL */
++ tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL);
++ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
++ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
++ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
++
++ /* setup UVD_MPC_SET_MUXA0 */
++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0,
++ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
++ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
++ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
++ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
++
++ /* setup UVD_MPC_SET_MUXB0 */
++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0,
++ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
++ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
++ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
++ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
++
++ /* setup mmUVD_MPC_SET_MUX */
++ WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX,
++ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
++ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
++ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
++ }
+
+ vcn_v2_5_mc_resume(adev);
+
+- /* VCN global tiling registers */
+- WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+- WREG32_SOC15(UVD, 0, mmUVD_GFX8_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* VCN global tiling registers */
++ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++ WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
+
+- /* enable LMI MC and UMC channels */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ /* enable LMI MC and UMC channels */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+- /* unblock VCPU register access */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL), 0,
+- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
++ /* unblock VCPU register access */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0,
++ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+-
+- for (i = 0; i < 10; ++i) {
+- uint32_t status;
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
++ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+- for (j = 0; j < 100; ++j) {
+- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
++ for (k = 0; k < 10; ++k) {
++ uint32_t status;
++
++ for (j = 0; j < 100; ++j) {
++ status = RREG32_SOC15(UVD, i, mmUVD_STATUS);
++ if (status & 2)
++ break;
++ if (amdgpu_emu_mode == 1)
++ msleep(500);
++ else
++ mdelay(10);
++ }
++ r = 0;
+ if (status & 2)
+ break;
+- if (amdgpu_emu_mode == 1)
+- msleep(500);
+- else
+- mdelay(10);
+- }
+- r = 0;
+- if (status & 2)
+- break;
+-
+- DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+- UVD_VCPU_CNTL__BLK_RST_MASK,
+- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+- mdelay(10);
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+- mdelay(10);
+- r = -1;
+- }
++ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
++ UVD_VCPU_CNTL__BLK_RST_MASK,
++ ~UVD_VCPU_CNTL__BLK_RST_MASK);
++ mdelay(10);
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
++ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+- if (r) {
+- DRM_ERROR("VCN decode not responding, giving up!!!\n");
+- return r;
+- }
++ mdelay(10);
++ r = -1;
++ }
+
+- /* enable master interrupt */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+- UVD_MASTINT_EN__VCPU_EN_MASK,
+- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+-
+- /* clear the busy bit of VCN_STATUS */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
+- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+-
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0);
+-
+- /* force RBC into idle state */
+- rb_bufsz = order_base_2(ring->ring_size);
+- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+-
+- /* programm the RB_BASE for ring buffer */
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+- lower_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+- upper_32_bits(ring->gpu_addr));
+-
+- /* Initialize the ring buffer's read and write pointers */
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+-
+- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+- lower_32_bits(ring->wptr));
+- ring = &adev->vcn.inst[0].ring_enc[0];
+- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+-
+- ring = &adev->vcn.inst[0].ring_enc[1];
+- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
++ if (r) {
++ DRM_ERROR("VCN decode not responding, giving up!!!\n");
++ return r;
++ }
+
++ /* enable master interrupt */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
++ UVD_MASTINT_EN__VCPU_EN_MASK,
++ ~UVD_MASTINT_EN__VCPU_EN_MASK);
++
++ /* clear the busy bit of VCN_STATUS */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0,
++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
++
++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0);
++
++ ring = &adev->vcn.inst[i].ring_dec;
++ /* force RBC into idle state */
++ rb_bufsz = order_base_2(ring->ring_size);
++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp);
++
++ /* programm the RB_BASE for ring buffer */
++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
++ lower_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
++ upper_32_bits(ring->gpu_addr));
++
++ /* Initialize the ring buffer's read and write pointers */
++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0);
++
++ ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR);
++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR,
++ lower_32_bits(ring->wptr));
++ ring = &adev->vcn.inst[i].ring_enc[0];
++ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4);
++
++ ring = &adev->vcn.inst[i].ring_enc[1];
++ WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
++ WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
++ }
+ r = jpeg_v2_5_start(adev);
+
+ return r;
+@@ -804,59 +854,61 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
+ static int vcn_v2_5_stop(struct amdgpu_device *adev)
+ {
+ uint32_t tmp;
+- int r;
++ int i, r;
+
+ r = jpeg_v2_5_stop(adev);
+ if (r)
+ return r;
+
+- /* wait for vcn idle */
+- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
+- if (r)
+- return r;
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ /* wait for vcn idle */
++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
++ if (r)
++ return r;
+
+- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+- UVD_LMI_STATUS__READ_CLEAN_MASK |
+- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+- if (r)
+- return r;
++ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
++ UVD_LMI_STATUS__READ_CLEAN_MASK |
++ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
++ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
++ if (r)
++ return r;
+
+- /* block LMI UMC channel */
+- tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2);
+- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+- WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp);
++ /* block LMI UMC channel */
++ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
++ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
++ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+
+- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+- if (r)
+- return r;
++ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
++ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
++ if (r)
++ return r;
+
+- /* block VCPU register access */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_ARB_CTRL),
+- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
++ /* block VCPU register access */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL),
++ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
++ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+- /* reset VCPU */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+- UVD_VCPU_CNTL__BLK_RST_MASK,
+- ~UVD_VCPU_CNTL__BLK_RST_MASK);
++ /* reset VCPU */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
++ UVD_VCPU_CNTL__BLK_RST_MASK,
++ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+- /* disable VCPU clock */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
++ /* disable VCPU clock */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0,
++ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+- /* clear status */
+- WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
++ /* clear status */
++ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+
+- vcn_v2_5_enable_clock_gating(adev);
++ vcn_v2_5_enable_clock_gating(adev);
+
+- /* enable register anti-hang mechanism */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
+- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
++ /* enable register anti-hang mechanism */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS),
++ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
++ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
++ }
+
+ return 0;
+ }
+@@ -872,7 +924,7 @@ static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
+ }
+
+ /**
+@@ -889,7 +941,7 @@ static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
+ }
+
+ /**
+@@ -907,7 +959,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ }
+
+@@ -952,10 +1004,10 @@ static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- if (ring == &adev->vcn.inst[0].ring_enc[0])
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
+ }
+
+ /**
+@@ -969,16 +1021,16 @@ static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- if (ring == &adev->vcn.inst[0].ring_enc[0]) {
++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
+ }
+ }
+
+@@ -993,19 +1045,19 @@ static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- if (ring == &adev->vcn.inst[0].ring_enc[0]) {
++ if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+ }
+@@ -1051,7 +1103,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR);
+ }
+
+ /**
+@@ -1068,7 +1120,7 @@ static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR);
+ }
+
+ /**
+@@ -1086,7 +1138,7 @@ static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ }
+
+@@ -1122,40 +1174,62 @@ static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = {
+
+ static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
+ {
+- adev->vcn.inst[0].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
+- DRM_INFO("VCN decode is enabled in VM mode\n");
++ int i;
++
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
++ adev->vcn.inst[i].ring_dec.me = i;
++ DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
++ }
+ }
+
+ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
+ {
+- int i;
+-
+- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+- adev->vcn.inst[0].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
++ int i, j;
+
+- DRM_INFO("VCN encode is enabled in VM mode\n");
++ for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
++ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
++ adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
++ adev->vcn.inst[j].ring_enc[i].me = j;
++ }
++ DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
++ }
+ }
+
+ static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev)
+ {
+- adev->vcn.inst[0].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs;
+- DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
++ int i;
++
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs;
++ adev->vcn.inst[i].ring_jpeg.me = i;
++ DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i);
++ }
+ }
+
+ static bool vcn_v2_5_is_idle(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ int i, ret = 1;
++
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
++ }
+
+- return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
++ return ret;
+ }
+
+ static int vcn_v2_5_wait_for_idle(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+- int ret = 0;
++ int i, ret = 0;
+
+- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+- UVD_STATUS__IDLE, ret);
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
++ UVD_STATUS__IDLE, ret);
++ if (ret)
++ return ret;
++ }
+
+ return ret;
+ }
+@@ -1209,20 +1283,34 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+ {
++ uint32_t ip_instance;
++
++ switch (entry->client_id) {
++ case SOC15_IH_CLIENTID_VCN:
++ ip_instance = 0;
++ break;
++ case SOC15_IH_CLIENTID_VCN1:
++ ip_instance = 1;
++ break;
++ default:
++ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
++ return 0;
++ }
++
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+- amdgpu_fence_process(&adev->vcn.inst[0].ring_dec);
++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+- amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[0]);
++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+- amdgpu_fence_process(&adev->vcn.inst[0].ring_enc[1]);
++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
+ break;
+ case VCN_2_0__SRCID__JPEG_DECODE:
+- amdgpu_fence_process(&adev->vcn.inst[0].ring_jpeg);
++ amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+@@ -1240,8 +1328,12 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
+
+ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
+ {
+- adev->vcn.inst[0].irq.num_types = adev->vcn.num_enc_rings + 2;
+- adev->vcn.inst[0].irq.funcs = &vcn_v2_5_irq_funcs;
++ int i;
++
++ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
++ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2;
++ adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
++ }
+ }
+
+ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
+--
+2.17.1
+