aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch1874
1 files changed, 1874 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch
new file mode 100644
index 00000000..586cc880
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch
@@ -0,0 +1,1874 @@
+From b120d0a52e56b00f6c667dcffb6fba1455db2af6 Mon Sep 17 00:00:00 2001
+From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com>
+Date: Wed, 9 Jan 2019 19:29:02 +0530
+Subject: [PATCH 4430/5725] drm/amdgpu/vg20:Restruct uvd.inst to support
+ multiple instances
+
+Vega20 has dual-UVD. Need add multiple instances support for uvd.
+Restruct uvd.inst, using uvd.inst[0] to replace uvd.inst->.
+Repurpose amdgpu_ring::me for instance index, and initialize to 0.
+There are no any logical changes here.
+
+Signed-off-by: James Zhu <James.Zhu@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 12 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 231 +++----
+ drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1002 +++++++++++++++--------------
+ 5 files changed, 661 insertions(+), 591 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index fafe54a..bed6d77 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -376,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+ struct amdgpu_device *adev = ring->adev;
+ uint64_t index;
+
+- if (ring != &adev->uvd.inst->ring) {
++ if (ring != &adev->uvd.inst[ring->me].ring) {
+ ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
+ ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
+ } else {
+ /* put fence directly behind firmware */
+ index = ALIGN(adev->uvd.fw->size, 8);
+- ring->fence_drv.cpu_addr = adev->uvd.inst->cpu_addr + index;
+- ring->fence_drv.gpu_addr = adev->uvd.inst->gpu_addr + index;
++ ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
++ ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
+ }
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
+ amdgpu_irq_get(adev, irq_src, irq_type);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 46cfddf..9de27ce 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -283,7 +283,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ struct drm_crtc *crtc;
+ uint32_t ui32 = 0;
+ uint64_t ui64 = 0;
+- int i, found;
++ int i, j, found;
+ int ui32_size = sizeof(ui32);
+
+ if (!info->return_size || !info->return_pointer)
+@@ -359,7 +359,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ break;
+ case AMDGPU_HW_IP_UVD:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+- ring_mask = adev->uvd.inst->ring.ready ? 1 : 0;
++ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
++ ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
+ ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_size_alignment = 16;
+ break;
+@@ -372,8 +373,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+- for (i = 0; i < adev->uvd.num_enc_rings; i++)
+- ring_mask |= ((adev->uvd.inst->ring_enc[i].ready ? 1 : 0) << i);
++ for (i = 0; i < adev->uvd.num_uvd_inst; i++)
++ for (j = 0; j < adev->uvd.num_enc_rings; j++)
++ ring_mask |=
++ ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
++ (j + i * adev->uvd.num_enc_rings));
+ ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+ ib_size_alignment = 1;
+ break;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+index 49cad08..c6850b6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
+
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = NULL;
++ ring->me = 0;
+
+ ring->adev->rings[ring->idx] = NULL;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+index 263cd945..c9ed917 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+@@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+ const char *fw_name;
+ const struct common_firmware_header *hdr;
+ unsigned version_major, version_minor, family_id;
+- int i, r;
++ int i, j, r;
+
+ INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
+
+@@ -236,28 +236,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst->vcpu_bo,
+- &adev->uvd.inst->gpu_addr, &adev->uvd.inst->cpu_addr);
+- if (r) {
+- dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+- return r;
+- }
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+
+- ring = &adev->uvd.inst->ring;
+- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity,
+- rq, amdgpu_sched_jobs, NULL);
+- if (r != 0) {
+- DRM_ERROR("Failed setting up UVD run queue.\n");
+- return r;
+- }
++ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
++ AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
++ &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
++ if (r) {
++ dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
++ return r;
++ }
+
+- for (i = 0; i < adev->uvd.max_handles; ++i) {
+- atomic_set(&adev->uvd.inst->handles[i], 0);
+- adev->uvd.inst->filp[i] = NULL;
+- }
++ ring = &adev->uvd.inst[j].ring;
++ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
++ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
++ rq, amdgpu_sched_jobs, NULL);
++ if (r != 0) {
++ DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
++ return r;
++ }
+
++ for (i = 0; i < adev->uvd.max_handles; ++i) {
++ atomic_set(&adev->uvd.inst[j].handles[i], 0);
++ adev->uvd.inst[j].filp[i] = NULL;
++ }
++ }
+ /* from uvd v5.0 HW addressing capacity increased to 64 bits */
+ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+ adev->uvd.address_64_bit = true;
+@@ -284,20 +286,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+
+ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
+ {
+- int i;
+- kfree(adev->uvd.inst->saved_bo);
++ int i, j;
+
+- drm_sched_entity_fini(&adev->uvd.inst->ring.sched, &adev->uvd.inst->entity);
++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
++ kfree(adev->uvd.inst[j].saved_bo);
+
+- amdgpu_bo_free_kernel(&adev->uvd.inst->vcpu_bo,
+- &adev->uvd.inst->gpu_addr,
+- (void **)&adev->uvd.inst->cpu_addr);
++ drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
+
+- amdgpu_ring_fini(&adev->uvd.inst->ring);
++ amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
++ &adev->uvd.inst[j].gpu_addr,
++ (void **)&adev->uvd.inst[j].cpu_addr);
+
+- for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+- amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
++ amdgpu_ring_fini(&adev->uvd.inst[j].ring);
+
++ for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
++ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
++ }
+ release_firmware(adev->uvd.fw);
+
+ return 0;
+@@ -307,32 +311,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+ {
+ unsigned size;
+ void *ptr;
+- int i;
++ int i, j;
+
+- if (adev->uvd.inst->vcpu_bo == NULL)
+- return 0;
++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
++ if (adev->uvd.inst[j].vcpu_bo == NULL)
++ continue;
+
+- cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
++ cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
+
+- /* only valid for physical mode */
+- if (adev->asic_type < CHIP_POLARIS10) {
+- for (i = 0; i < adev->uvd.max_handles; ++i)
+- if (atomic_read(&adev->uvd.inst->handles[i]))
+- break;
++ /* only valid for physical mode */
++ if (adev->asic_type < CHIP_POLARIS10) {
++ for (i = 0; i < adev->uvd.max_handles; ++i)
++ if (atomic_read(&adev->uvd.inst[j].handles[i]))
++ break;
+
+- if (i == adev->uvd.max_handles)
+- return 0;
+- }
++ if (i == adev->uvd.max_handles)
++ continue;
++ }
+
+- size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
+- ptr = adev->uvd.inst->cpu_addr;
++ size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
++ ptr = adev->uvd.inst[j].cpu_addr;
++
++ adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
++ if (!adev->uvd.inst[j].saved_bo)
++ return -ENOMEM;
++ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
++ }
+
+- adev->uvd.inst->saved_bo = kmalloc(size, GFP_KERNEL);
+- if (!adev->uvd.inst->saved_bo)
+- return -ENOMEM;
+-
+- memcpy_fromio(adev->uvd.inst->saved_bo, ptr, size);
+-
+ return 0;
+ }
+
+@@ -340,59 +345,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
+ {
+ unsigned size;
+ void *ptr;
++ int i;
+
+- if (adev->uvd.inst->vcpu_bo == NULL)
+- return -EINVAL;
++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
++ if (adev->uvd.inst[i].vcpu_bo == NULL)
++ return -EINVAL;
+
+- size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo);
+- ptr = adev->uvd.inst->cpu_addr;
++ size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
++ ptr = adev->uvd.inst[i].cpu_addr;
+
+- if (adev->uvd.inst->saved_bo != NULL) {
+- memcpy_toio(ptr, adev->uvd.inst->saved_bo, size);
+- kfree(adev->uvd.inst->saved_bo);
+- adev->uvd.inst->saved_bo = NULL;
+- } else {
+- const struct common_firmware_header *hdr;
+- unsigned offset;
+-
+- hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+- memcpy_toio(adev->uvd.inst->cpu_addr, adev->uvd.fw->data + offset,
+- le32_to_cpu(hdr->ucode_size_bytes));
+- size -= le32_to_cpu(hdr->ucode_size_bytes);
+- ptr += le32_to_cpu(hdr->ucode_size_bytes);
++ if (adev->uvd.inst[i].saved_bo != NULL) {
++ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
++ kfree(adev->uvd.inst[i].saved_bo);
++ adev->uvd.inst[i].saved_bo = NULL;
++ } else {
++ const struct common_firmware_header *hdr;
++ unsigned offset;
++
++ hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
++ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
++ le32_to_cpu(hdr->ucode_size_bytes));
++ size -= le32_to_cpu(hdr->ucode_size_bytes);
++ ptr += le32_to_cpu(hdr->ucode_size_bytes);
++ }
++ memset_io(ptr, 0, size);
++ /* to restore uvd fence seq */
++ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
+ }
+- memset_io(ptr, 0, size);
+- /* to restore uvd fence seq */
+- amdgpu_fence_driver_force_completion(&adev->uvd.inst->ring);
+ }
+-
+ return 0;
+ }
+
+ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
+ {
+- struct amdgpu_ring *ring = &adev->uvd.inst->ring;
+- int i, r;
++ struct amdgpu_ring *ring;
++ int i, j, r;
+
+- for (i = 0; i < adev->uvd.max_handles; ++i) {
+- uint32_t handle = atomic_read(&adev->uvd.inst->handles[i]);
+- if (handle != 0 && adev->uvd.inst->filp[i] == filp) {
+- struct dma_fence *fence;
+-
+- r = amdgpu_uvd_get_destroy_msg(ring, handle,
+- false, &fence);
+- if (r) {
+- DRM_ERROR("Error destroying UVD (%d)!\n", r);
+- continue;
+- }
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
++ ring = &adev->uvd.inst[j].ring;
+
+- dma_fence_wait(fence, false);
+- dma_fence_put(fence);
++ for (i = 0; i < adev->uvd.max_handles; ++i) {
++ uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
++ if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
++ struct dma_fence *fence;
++
++ r = amdgpu_uvd_get_destroy_msg(ring, handle,
++ false, &fence);
++ if (r) {
++ DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
++ continue;
++ }
+
+- adev->uvd.inst->filp[i] = NULL;
+- atomic_set(&adev->uvd.inst->handles[i], 0);
++ dma_fence_wait(fence, false);
++ dma_fence_put(fence);
++
++ adev->uvd.inst[j].filp[i] = NULL;
++ atomic_set(&adev->uvd.inst[j].handles[i], 0);
++ }
+ }
+ }
+ }
+@@ -667,15 +678,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+ void *ptr;
+ long r;
+ int i;
++ uint32_t ip_instance = ctx->parser->job->ring->me;
+
+ if (offset & 0x3F) {
+- DRM_ERROR("UVD messages must be 64 byte aligned!\n");
++ DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+- DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
++ DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
+ return r;
+ }
+
+@@ -685,7 +697,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+ handle = msg[2];
+
+ if (handle == 0) {
+- DRM_ERROR("Invalid UVD handle!\n");
++ DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
+ return -EINVAL;
+ }
+
+@@ -696,18 +708,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+
+ /* try to alloc a new handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+- if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
+- DRM_ERROR("Handle 0x%x already in use!\n", handle);
++ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
++ DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
+ return -EINVAL;
+ }
+
+- if (!atomic_cmpxchg(&adev->uvd.inst->handles[i], 0, handle)) {
+- adev->uvd.inst->filp[i] = ctx->parser->filp;
++ if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
++ adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
+ return 0;
+ }
+ }
+
+- DRM_ERROR("No more free UVD handles!\n");
++ DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
+ return -ENOSPC;
+
+ case 1:
+@@ -719,27 +731,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+
+ /* validate the handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+- if (atomic_read(&adev->uvd.inst->handles[i]) == handle) {
+- if (adev->uvd.inst->filp[i] != ctx->parser->filp) {
+- DRM_ERROR("UVD handle collision detected!\n");
++ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
++ if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
++ DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
+ return -EINVAL;
+ }
+ return 0;
+ }
+ }
+
+- DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
++ DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
+ return -ENOENT;
+
+ case 2:
+ /* it's a destroy msg, free the handle */
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+- atomic_cmpxchg(&adev->uvd.inst->handles[i], handle, 0);
++ atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
+ amdgpu_bo_kunmap(bo);
+ return 0;
+
+ default:
+- DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
++ DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
+ return -EINVAL;
+ }
+ BUG();
+@@ -1043,7 +1055,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+ if (r)
+ goto err_free;
+
+- r = amdgpu_job_submit(job, ring, &adev->uvd.inst->entity,
++ r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+ if (r)
+ goto err_free;
+@@ -1191,27 +1203,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+ {
+ struct dma_fence *fence;
+ long r;
++ uint32_t ip_instance = ring->me;
+
+ r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
+ if (r) {
+- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
++ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
+ goto error;
+ }
+
+ r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+ if (r) {
+- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
++ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
+ goto error;
+ }
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+- DRM_ERROR("amdgpu: IB test timed out.\n");
++ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
+ r = -ETIMEDOUT;
+ } else if (r < 0) {
+- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
++ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
+ } else {
+- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
++ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
+ r = 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+index debf206..38816227 100644
+--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+@@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
+ }
+
+ /**
+@@ -72,10 +72,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- if (ring == &adev->uvd.inst->ring_enc[0])
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
+ }
+
+ /**
+@@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
+ }
+
+ /**
+@@ -106,10 +106,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+- if (ring == &adev->uvd.inst->ring_enc[0])
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
+ else
+- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
+ }
+
+ /**
+@@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_device *adev = ring->adev;
+
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+
+ /**
+@@ -144,11 +144,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+ return;
+ }
+
+- if (ring == &adev->uvd.inst->ring_enc[0])
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ else
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
+ lower_32_bits(ring->wptr));
+ }
+
+@@ -387,19 +387,21 @@ static int uvd_v7_0_sw_init(void *handle)
+ {
+ struct amdgpu_ring *ring;
+ struct drm_sched_rq *rq;
+- int i, r;
++ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- /* UVD TRAP */
+- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst->irq);
+- if (r)
+- return r;
+-
+- /* UVD ENC TRAP */
+- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst->irq);
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
++ /* UVD TRAP */
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst[j].irq);
+ if (r)
+ return r;
++
++ /* UVD ENC TRAP */
++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst[j].irq);
++ if (r)
++ return r;
++ }
+ }
+
+ r = amdgpu_uvd_sw_init(adev);
+@@ -416,43 +418,48 @@ static int uvd_v7_0_sw_init(void *handle)
+ DRM_INFO("PSP loading UVD firmware\n");
+ }
+
+- ring = &adev->uvd.inst->ring_enc[0];
+- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
+- rq, amdgpu_sched_jobs, NULL);
+- if (r) {
+- DRM_ERROR("Failed setting up UVD ENC run queue.\n");
+- return r;
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
++ ring = &adev->uvd.inst[j].ring_enc[0];
++ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
++ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
++ rq, amdgpu_sched_jobs, NULL);
++ if (r) {
++ DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
++ return r;
++ }
+ }
+
+ r = amdgpu_uvd_resume(adev);
+ if (r)
+ return r;
+- if (!amdgpu_sriov_vf(adev)) {
+- ring = &adev->uvd.inst->ring;
+- sprintf(ring->name, "uvd");
+- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+- if (r)
+- return r;
+- }
+
+- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+- ring = &adev->uvd.inst->ring_enc[i];
+- sprintf(ring->name, "uvd_enc%d", i);
+- if (amdgpu_sriov_vf(adev)) {
+- ring->use_doorbell = true;
+-
+- /* currently only use the first enconding ring for
+- * sriov, so set unused location for other unused rings.
+- */
+- if (i == 0)
+- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+- else
+- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
++ if (!amdgpu_sriov_vf(adev)) {
++ ring = &adev->uvd.inst[j].ring;
++ sprintf(ring->name, "uvd<%d>", j);
++ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
++ if (r)
++ return r;
++ }
++
++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
++ ring = &adev->uvd.inst[j].ring_enc[i];
++ sprintf(ring->name, "uvd_enc%d<%d>", i, j);
++ if (amdgpu_sriov_vf(adev)) {
++ ring->use_doorbell = true;
++
++ /* currently only use the first enconding ring for
++ * sriov, so set unused location for other unused rings.
++ */
++ if (i == 0)
++ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
++ else
++ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
++ }
++ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
++ if (r)
++ return r;
+ }
+- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+- if (r)
+- return r;
+ }
+
+ r = amdgpu_virt_alloc_mm_table(adev);
+@@ -464,7 +471,7 @@ static int uvd_v7_0_sw_init(void *handle)
+
+ static int uvd_v7_0_sw_fini(void *handle)
+ {
+- int i, r;
++ int i, j, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_virt_free_mm_table(adev);
+@@ -473,11 +480,12 @@ static int uvd_v7_0_sw_fini(void *handle)
+ if (r)
+ return r;
+
+- drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
+-
+- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+- amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
++ drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
+
++ for (i = 0; i < adev->uvd.num_enc_rings; ++i)
++ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
++ }
+ return amdgpu_uvd_sw_fini(adev);
+ }
+
+@@ -491,9 +499,9 @@ static int uvd_v7_0_sw_fini(void *handle)
+ static int uvd_v7_0_hw_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+- struct amdgpu_ring *ring = &adev->uvd.inst->ring;
++ struct amdgpu_ring *ring;
+ uint32_t tmp;
+- int i, r;
++ int i, j, r;
+
+ if (amdgpu_sriov_vf(adev))
+ r = uvd_v7_0_sriov_start(adev);
+@@ -502,57 +510,60 @@ static int uvd_v7_0_hw_init(void *handle)
+ if (r)
+ goto done;
+
+- if (!amdgpu_sriov_vf(adev)) {
+- ring->ready = true;
+- r = amdgpu_ring_test_ring(ring);
+- if (r) {
+- ring->ready = false;
+- goto done;
++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
++ ring = &adev->uvd.inst[j].ring;
++
++ if (!amdgpu_sriov_vf(adev)) {
++ ring->ready = true;
++ r = amdgpu_ring_test_ring(ring);
++ if (r) {
++ ring->ready = false;
++ goto done;
++ }
++
++ r = amdgpu_ring_alloc(ring, 10);
++ if (r) {
++ DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
++ goto done;
++ }
++
++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
++ mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
++ amdgpu_ring_write(ring, tmp);
++ amdgpu_ring_write(ring, 0xFFFFF);
++
++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
++ mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
++ amdgpu_ring_write(ring, tmp);
++ amdgpu_ring_write(ring, 0xFFFFF);
++
++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
++ mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
++ amdgpu_ring_write(ring, tmp);
++ amdgpu_ring_write(ring, 0xFFFFF);
++
++ /* Clear timeout status bits */
++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
++ mmUVD_SEMA_TIMEOUT_STATUS), 0));
++ amdgpu_ring_write(ring, 0x8);
++
++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
++ mmUVD_SEMA_CNTL), 0));
++ amdgpu_ring_write(ring, 3);
++
++ amdgpu_ring_commit(ring);
+ }
+
+- r = amdgpu_ring_alloc(ring, 10);
+- if (r) {
+- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
+- goto done;
++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
++ ring = &adev->uvd.inst[j].ring_enc[i];
++ ring->ready = true;
++ r = amdgpu_ring_test_ring(ring);
++ if (r) {
++ ring->ready = false;
++ goto done;
++ }
+ }
+-
+- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+- mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+- amdgpu_ring_write(ring, tmp);
+- amdgpu_ring_write(ring, 0xFFFFF);
+-
+- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+- mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+- amdgpu_ring_write(ring, tmp);
+- amdgpu_ring_write(ring, 0xFFFFF);
+-
+- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
+- mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+- amdgpu_ring_write(ring, tmp);
+- amdgpu_ring_write(ring, 0xFFFFF);
+-
+- /* Clear timeout status bits */
+- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+- mmUVD_SEMA_TIMEOUT_STATUS), 0));
+- amdgpu_ring_write(ring, 0x8);
+-
+- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
+- mmUVD_SEMA_CNTL), 0));
+- amdgpu_ring_write(ring, 3);
+-
+- amdgpu_ring_commit(ring);
+ }
+-
+- for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+- ring = &adev->uvd.inst->ring_enc[i];
+- ring->ready = true;
+- r = amdgpu_ring_test_ring(ring);
+- if (r) {
+- ring->ready = false;
+- goto done;
+- }
+- }
+-
+ done:
+ if (!r)
+ DRM_INFO("UVD and UVD ENC initialized successfully.\n");
+@@ -570,7 +581,7 @@ static int uvd_v7_0_hw_init(void *handle)
+ static int uvd_v7_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+- struct amdgpu_ring *ring = &adev->uvd.inst->ring;
++ int i;
+
+ if (!amdgpu_sriov_vf(adev))
+ uvd_v7_0_stop(adev);
+@@ -579,7 +590,8 @@ static int uvd_v7_0_hw_fini(void *handle)
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ }
+
+- ring->ready = false;
++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
++ adev->uvd.inst[i].ring.ready = false;
+
+ return 0;
+ }
+@@ -619,48 +631,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
+ {
+ uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
+ uint32_t offset;
++ int i;
+
+- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+- offset = 0;
+- } else {
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+- lower_32_bits(adev->uvd.inst->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+- upper_32_bits(adev->uvd.inst->gpu_addr));
+- offset = size;
+- }
++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
++ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
++ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
++ offset = 0;
++ } else {
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
++ lower_32_bits(adev->uvd.inst[i].gpu_addr));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
++ upper_32_bits(adev->uvd.inst[i].gpu_addr));
++ offset = size;
++ }
+
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+-
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+- lower_32_bits(adev->uvd.inst->gpu_addr + offset));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+- upper_32_bits(adev->uvd.inst->gpu_addr + offset));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+-
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+- lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+- upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
+- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+-
+- WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
+- adev->gfx.config.gb_addr_config);
+-
+- WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
++
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
++
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
++ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
++
++ WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++ WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++ WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
++ adev->gfx.config.gb_addr_config);
++
++ WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
++ }
+ }
+
+ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+@@ -670,6 +685,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
+ uint32_t size;
++ int i;
+
+ size = header->header_size + header->vce_table_size + header->uvd_table_size;
+
+@@ -689,11 +705,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
+ /* 4, set resp to zero */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
+
+- WDOORBELL32(adev->uvd.inst->ring_enc[0].doorbell_index, 0);
+- adev->wb.wb[adev->uvd.inst->ring_enc[0].wptr_offs] = 0;
+- adev->uvd.inst->ring_enc[0].wptr = 0;
+- adev->uvd.inst->ring_enc[0].wptr_old = 0;
+-
++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
++ WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
++ adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
++ adev->uvd.inst[i].ring_enc[0].wptr = 0;
++ adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
++ }
+ /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
+ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+@@ -726,6 +743,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+ struct mmsch_v1_0_cmd_end end = { {0} };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
++ uint8_t i = 0;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+@@ -743,120 +761,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+
+ init_table += header->uvd_table_offset;
+
+- ring = &adev->uvd.inst->ring;
+- ring->wptr = 0;
+- size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+-
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+- 0xFFFFFFFF, 0x00000004);
+- /* mc resume*/
+- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+- offset = 0;
+- } else {
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+- lower_32_bits(adev->uvd.inst->gpu_addr));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+- upper_32_bits(adev->uvd.inst->gpu_addr));
+- offset = size;
++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
++ ring = &adev->uvd.inst[i].ring;
++ ring->wptr = 0;
++ size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
++
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
++ 0xFFFFFFFF, 0x00000004);
++ /* mc resume*/
++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
++ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
++ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
++ offset = 0;
++ } else {
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
++ lower_32_bits(adev->uvd.inst[i].gpu_addr));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
++ upper_32_bits(adev->uvd.inst[i].gpu_addr));
++ offset = size;
++ }
++
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
++
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
++
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
++ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
++
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
++ /* mc resume end*/
++
++ /* disable clock gating */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
++ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
++
++ /* disable interupt */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
++ ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
++
++ /* stall UMC and register bus before resetting VCPU */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++
++ /* put LMI, VCPU, RBC etc... into reset */
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
++ (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
++
++ /* initialize UVD memory controller */
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
++ (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
++ UVD_LMI_CTRL__REQ_MODE_MASK |
++ 0x00100000L));
++
++ /* take all subblocks out of reset, except VCPU */
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
++
++ /* enable VCPU clock */
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
++ UVD_VCPU_CNTL__CLK_EN_MASK);
++
++ /* enable master interrupt */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
++ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
++ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
++
++ /* clear the bit 4 of UVD_STATUS */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
++
++ /* force RBC into idle state */
++ size = order_base_2(ring->ring_size);
++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
++
++ ring = &adev->uvd.inst[i].ring_enc[0];
++ ring->wptr = 0;
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
++
++ /* boot up the VCPU */
++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
++
++ /* enable UMC */
++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
++
++ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
+ }
+-
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
+-
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+- lower_32_bits(adev->uvd.inst->gpu_addr + offset));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+- upper_32_bits(adev->uvd.inst->gpu_addr + offset));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+-
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+- lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+- upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
+- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+-
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+- /* mc resume end*/
+-
+- /* disable clock gating */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
+- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+-
+- /* disable interupt */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+- ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+-
+- /* stall UMC and register bus before resetting VCPU */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+-
+- /* put LMI, VCPU, RBC etc... into reset */
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+- (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+-
+- /* initialize UVD memory controller */
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
+- (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+- UVD_LMI_CTRL__REQ_MODE_MASK |
+- 0x00100000L));
+-
+- /* take all subblocks out of reset, except VCPU */
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+-
+- /* enable VCPU clock */
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+- UVD_VCPU_CNTL__CLK_EN_MASK);
+-
+- /* enable master interrupt */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+-
+- /* clear the bit 4 of UVD_STATUS */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
+- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+-
+- /* force RBC into idle state */
+- size = order_base_2(ring->ring_size);
+- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
+-
+- ring = &adev->uvd.inst->ring_enc[0];
+- ring->wptr = 0;
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
+-
+- /* boot up the VCPU */
+- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
+-
+- /* enable UMC */
+- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+-
+- MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
+-
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+@@ -875,15 +894,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
+ */
+ static int uvd_v7_0_start(struct amdgpu_device *adev)
+ {
+- struct amdgpu_ring *ring = &adev->uvd.inst->ring;
++ struct amdgpu_ring *ring;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ uint32_t mp_swap_cntl;
+- int i, j, r;
++ int i, j, k, r;
+
+- /* disable DPG */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+- ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
++ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
++ /* disable DPG */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
++ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
++ }
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+@@ -891,157 +912,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
+
+ uvd_v7_0_mc_resume(adev);
+
+- /* disable clock gating */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0,
+- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
+-
+- /* disable interupt */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
+- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+-
+- /* stall UMC and register bus before resetting VCPU */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+- mdelay(1);
+-
+- /* put LMI, VCPU, RBC etc... into reset */
+- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
+- UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+- mdelay(5);
++ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
++ ring = &adev->uvd.inst[k].ring;
++ /* disable clock gating */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
++ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
+
+- /* initialize UVD memory controller */
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
+- (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+- UVD_LMI_CTRL__REQ_MODE_MASK |
+- 0x00100000L);
++ /* disable interupt */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
++ ~UVD_MASTINT_EN__VCPU_EN_MASK);
++
++ /* stall UMC and register bus before resetting VCPU */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ mdelay(1);
++
++ /* put LMI, VCPU, RBC etc... into reset */
++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
++ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
++ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
++ mdelay(5);
++
++ /* initialize UVD memory controller */
++ WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
++ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
++ UVD_LMI_CTRL__REQ_MODE_MASK |
++ 0x00100000L);
+
+ #ifdef __BIG_ENDIAN
+- /* swap (8 in 32) RB and IB */
+- lmi_swap_cntl = 0xa;
+- mp_swap_cntl = 0;
++ /* swap (8 in 32) RB and IB */
++ lmi_swap_cntl = 0xa;
++ mp_swap_cntl = 0;
+ #endif
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+- WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+-
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
+-
+- /* take all subblocks out of reset, except VCPU */
+- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+- mdelay(5);
++ WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
++ WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+- /* enable VCPU clock */
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
+- UVD_VCPU_CNTL__CLK_EN_MASK);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
+
+- /* enable UMC */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ /* take all subblocks out of reset, except VCPU */
++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
++ mdelay(5);
+
+- /* boot up the VCPU */
+- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
+- mdelay(10);
++ /* enable VCPU clock */
++ WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
++ UVD_VCPU_CNTL__CLK_EN_MASK);
+
+- for (i = 0; i < 10; ++i) {
+- uint32_t status;
++ /* enable UMC */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+- for (j = 0; j < 100; ++j) {
+- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
++ /* boot up the VCPU */
++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
++ mdelay(10);
++
++ for (i = 0; i < 10; ++i) {
++ uint32_t status;
++
++ for (j = 0; j < 100; ++j) {
++ status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
++ if (status & 2)
++ break;
++ mdelay(10);
++ }
++ r = 0;
+ if (status & 2)
+ break;
++
++ DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
++ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
++ mdelay(10);
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
++ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
++ r = -1;
+ }
+- r = 0;
+- if (status & 2)
+- break;
+
+- DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+- mdelay(10);
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+- mdelay(10);
+- r = -1;
+- }
+-
+- if (r) {
+- DRM_ERROR("UVD not responding, giving up!!!\n");
+- return r;
+- }
+- /* enable master interrupt */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+-
+- /* clear the bit 4 of UVD_STATUS */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
+- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+-
+- /* force RBC into idle state */
+- rb_bufsz = order_base_2(ring->ring_size);
+- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+-
+- /* set the write pointer delay */
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+-
+- /* set the wb address */
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+- (upper_32_bits(ring->gpu_addr) >> 2));
+-
+- /* programm the RB_BASE for ring buffer */
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+- lower_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+- upper_32_bits(ring->gpu_addr));
+-
+- /* Initialize the ring buffer's read and write pointers */
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+-
+- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+- lower_32_bits(ring->wptr));
+-
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
+- ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+-
+- ring = &adev->uvd.inst->ring_enc[0];
+- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
++ if (r) {
++ DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
++ return r;
++ }
++ /* enable master interrupt */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
++ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
++ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+- ring = &adev->uvd.inst->ring_enc[1];
+- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
++ /* clear the bit 4 of UVD_STATUS */
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
++ /* force RBC into idle state */
++ rb_bufsz = order_base_2(ring->ring_size);
++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
++
++ /* set the write pointer delay */
++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
++
++ /* set the wb address */
++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
++ (upper_32_bits(ring->gpu_addr) >> 2));
++
++ /* programm the RB_BASE for ring buffer */
++ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
++ lower_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
++ upper_32_bits(ring->gpu_addr));
++
++ /* Initialize the ring buffer's read and write pointers */
++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
++
++ ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
++ lower_32_bits(ring->wptr));
++
++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
++ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
++
++ ring = &adev->uvd.inst[k].ring_enc[0];
++ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
++
++ ring = &adev->uvd.inst[k].ring_enc[1];
++ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
++ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
++ }
+ return 0;
+ }
+
+@@ -1054,26 +1077,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
+ */
+ static void uvd_v7_0_stop(struct amdgpu_device *adev)
+ {
+- /* force RBC into idle state */
+- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
+-
+- /* Stall UMC and register bus before resetting VCPU */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+- mdelay(1);
+-
+- /* put VCPU into reset */
+- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
+- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+- mdelay(5);
++ uint8_t i = 0;
++
++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
++ /* force RBC into idle state */
++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
+
+- /* disable VCPU clock */
+- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
++ /* Stall UMC and register bus before resetting VCPU */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ mdelay(1);
+
+- /* Unstall UMC and register bus */
+- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ /* put VCPU into reset */
++ WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
++ mdelay(5);
++
++ /* disable VCPU clock */
++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
++
++ /* Unstall UMC and register bus */
++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
++ }
+ }
+
+ /**
+@@ -1092,26 +1119,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 2);
+ }
+
+@@ -1181,7 +1208,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+ unsigned i;
+ int r;
+
+- WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
++ WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r) {
+ DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
+@@ -1189,11 +1216,11 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
+ return r;
+ }
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+ for (i = 0; i < adev->usec_timeout; i++) {
+- tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
++ tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+@@ -1225,17 +1252,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+ }
+
+@@ -1263,13 +1290,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 8);
+ }
+
+@@ -1279,16 +1306,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring,
+- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, 12);
+ }
+
+@@ -1313,7 +1340,7 @@ static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+ struct amdgpu_device *adev = ring->adev;
+
+ for (i = 0; i < count; i++)
+- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
+
+ }
+
+@@ -1381,16 +1408,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
+
+ if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
+ REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
+- (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
++ (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
+ AMDGPU_UVD_STATUS_BUSY_MASK))
+ srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
+ SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
+
+ if (srbm_soft_reset) {
+- adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
++ adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
+ return true;
+ } else {
+- adev->uvd.inst->srbm_soft_reset = 0;
++ adev->uvd.inst[ring->me].srbm_soft_reset = 0;
+ return false;
+ }
+ }
+@@ -1399,7 +1426,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- if (!adev->uvd.inst->srbm_soft_reset)
++ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+
+ uvd_v7_0_stop(adev);
+@@ -1411,9 +1438,9 @@ static int uvd_v7_0_soft_reset(void *handle)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 srbm_soft_reset;
+
+- if (!adev->uvd.inst->srbm_soft_reset)
++ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+- srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
++ srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
+
+ if (srbm_soft_reset) {
+ u32 tmp;
+@@ -1441,7 +1468,7 @@ static int uvd_v7_0_post_soft_reset(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+- if (!adev->uvd.inst->srbm_soft_reset)
++ if (!adev->uvd.inst[ring->me].srbm_soft_reset)
+ return 0;
+
+ mdelay(5);
+@@ -1463,17 +1490,29 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+ {
++ uint32_t ip_instance;
++
++ switch (entry->client_id) {
++ case SOC15_IH_CLIENTID_UVD:
++ ip_instance = 0;
++ break;
++ default:
++ DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
++ return 0;
++ }
++
+ DRM_DEBUG("IH: UVD TRAP\n");
++
+ switch (entry->src_id) {
+ case 124:
+- amdgpu_fence_process(&adev->uvd.inst->ring);
++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
+ break;
+ case 119:
+- amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
+ break;
+ case 120:
+ if (!amdgpu_sriov_vf(adev))
+- amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+@@ -1489,9 +1528,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
+ {
+ uint32_t data, data1, data2, suvd_flags;
+
+- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
+- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
+- data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
++ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
++ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
++ data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
+
+ data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
+ UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+@@ -1535,18 +1574,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
+ UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
+ data1 |= suvd_flags;
+
+- WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
+- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
+- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
+- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
+ }
+
+ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
+ {
+ uint32_t data, data1, cgc_flags, suvd_flags;
+
+- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
+- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
++ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
++ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+
+ cgc_flags = UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+@@ -1578,8 +1617,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
+ data |= cgc_flags;
+ data1 |= suvd_flags;
+
+- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
+- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+ }
+
+ static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
+@@ -1638,7 +1677,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+ return 0;
+
+- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
++ WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+
+ if (state == AMD_PG_STATE_GATE) {
+ uvd_v7_0_stop(adev);
+@@ -1742,18 +1781,27 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
+
+ static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
+ {
+- adev->uvd.inst->ring.funcs = &uvd_v7_0_ring_vm_funcs;
+- DRM_INFO("UVD is enabled in VM mode\n");
++ int i;
++
++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
++ adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
++ adev->uvd.inst[i].ring.me = i;
++ DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
++ }
+ }
+
+ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+ {
+- int i;
++ int i, j;
+
+- for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+- adev->uvd.inst->ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
++ adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
++ adev->uvd.inst[j].ring_enc[i].me = j;
++ }
+
+- DRM_INFO("UVD ENC is enabled in VM mode\n");
++ DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
++ }
+ }
+
+ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
+@@ -1763,8 +1811,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
+
+ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
+ {
+- adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
+- adev->uvd.inst->irq.funcs = &uvd_v7_0_irq_funcs;
++ int i;
++
++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
++ adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
++ adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
++ }
+ }
+
+ const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
+--
+2.7.4
+