diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch | 1874 |
1 files changed, 1874 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch new file mode 100644 index 00000000..586cc880 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4430-drm-amdgpu-vg20-Restruct-uvd.inst-to-support-multipl.patch @@ -0,0 +1,1874 @@ +From b120d0a52e56b00f6c667dcffb6fba1455db2af6 Mon Sep 17 00:00:00 2001 +From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +Date: Wed, 9 Jan 2019 19:29:02 +0530 +Subject: [PATCH 4430/5725] drm/amdgpu/vg20:Restruct uvd.inst to support + multiple instances + +Vega20 has dual-UVD. Need add multiple instances support for uvd. +Restruct uvd.inst, using uvd.inst[0] to replace uvd.inst->. +Repurpose amdgpu_ring::me for instance index, and initialize to 0. +There are no any logical changes here. + +Signed-off-by: James Zhu <James.Zhu@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 12 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 231 +++---- + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1002 +++++++++++++++-------------- + 5 files changed, 661 insertions(+), 591 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index fafe54a..bed6d77 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -376,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, + struct amdgpu_device *adev = ring->adev; + uint64_t index; + +- if (ring != &adev->uvd.inst->ring) { ++ if (ring != &adev->uvd.inst[ring->me].ring) { + ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; + ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); + } else { + /* put fence directly behind firmware */ + index = ALIGN(adev->uvd.fw->size, 8); +- ring->fence_drv.cpu_addr = adev->uvd.inst->cpu_addr + index; +- ring->fence_drv.gpu_addr = adev->uvd.inst->gpu_addr + index; ++ ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; ++ ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; + } + amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); + amdgpu_irq_get(adev, irq_src, irq_type); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 46cfddf..9de27ce 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -283,7 +283,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + struct drm_crtc *crtc; + uint32_t ui32 = 0; + uint64_t ui64 = 0; +- int i, found; ++ int i, j, found; + int ui32_size = sizeof(ui32); + + if (!info->return_size || !info->return_pointer) +@@ -359,7 +359,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + break; + case AMDGPU_HW_IP_UVD: + type = AMD_IP_BLOCK_TYPE_UVD; +- ring_mask = adev->uvd.inst->ring.ready ? 1 : 0; ++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) ++ ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 16; + break; +@@ -372,8 +373,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + break; + case AMDGPU_HW_IP_UVD_ENC: + type = AMD_IP_BLOCK_TYPE_UVD; +- for (i = 0; i < adev->uvd.num_enc_rings; i++) +- ring_mask |= ((adev->uvd.inst->ring_enc[i].ready ? 1 : 0) << i); ++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) ++ for (j = 0; j < adev->uvd.num_enc_rings; j++) ++ ring_mask |= ++ ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << ++ (j + i * adev->uvd.num_enc_rings)); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 1; + break; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +index 49cad08..c6850b6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) + + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = NULL; ++ ring->me = 0; + + ring->adev->rings[ring->idx] = NULL; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +index 263cd945..c9ed917 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +@@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned version_major, version_minor, family_id; +- int i, r; ++ int i, j, r; + + INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); + +@@ -236,28 +236,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + +- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, +- AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst->vcpu_bo, +- &adev->uvd.inst->gpu_addr, &adev->uvd.inst->cpu_addr); +- if (r) { +- dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); +- return r; +- } ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + +- ring = &adev->uvd.inst->ring; +- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; +- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity, +- rq, amdgpu_sched_jobs, NULL); +- if (r != 0) { +- DRM_ERROR("Failed setting up UVD run queue.\n"); +- return r; +- } ++ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, ++ AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, ++ &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); ++ if (r) { ++ dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); ++ return r; ++ } + +- for (i = 0; i < adev->uvd.max_handles; ++i) { +- atomic_set(&adev->uvd.inst->handles[i], 0); +- adev->uvd.inst->filp[i] = NULL; +- } ++ ring = &adev->uvd.inst[j].ring; ++ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; ++ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, ++ rq, amdgpu_sched_jobs, NULL); ++ if (r != 0) { ++ DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); ++ return r; ++ } + ++ for (i = 0; i < adev->uvd.max_handles; ++i) { ++ atomic_set(&adev->uvd.inst[j].handles[i], 0); ++ adev->uvd.inst[j].filp[i] = NULL; ++ } ++ } + /* from uvd v5.0 HW addressing capacity increased to 64 bits */ + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) + adev->uvd.address_64_bit = true; +@@ -284,20 +286,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) + + int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) + { +- int i; +- kfree(adev->uvd.inst->saved_bo); ++ int i, j; + +- drm_sched_entity_fini(&adev->uvd.inst->ring.sched, &adev->uvd.inst->entity); ++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { ++ kfree(adev->uvd.inst[j].saved_bo); + +- amdgpu_bo_free_kernel(&adev->uvd.inst->vcpu_bo, +- &adev->uvd.inst->gpu_addr, +- (void **)&adev->uvd.inst->cpu_addr); ++ drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); + +- amdgpu_ring_fini(&adev->uvd.inst->ring); ++ amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, ++ &adev->uvd.inst[j].gpu_addr, ++ (void **)&adev->uvd.inst[j].cpu_addr); + +- for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) +- amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); ++ amdgpu_ring_fini(&adev->uvd.inst[j].ring); + ++ for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) ++ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); ++ } + release_firmware(adev->uvd.fw); + + return 0; +@@ -307,32 +311,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) + { + unsigned size; + void *ptr; +- int i; ++ int i, j; + +- if (adev->uvd.inst->vcpu_bo == NULL) +- return 0; ++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { ++ if (adev->uvd.inst[j].vcpu_bo == NULL) ++ continue; + +- cancel_delayed_work_sync(&adev->uvd.inst->idle_work); ++ cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work); + +- /* only valid for physical mode */ +- if (adev->asic_type < CHIP_POLARIS10) { +- for (i = 0; i < adev->uvd.max_handles; ++i) +- if (atomic_read(&adev->uvd.inst->handles[i])) +- break; ++ /* only valid for physical mode */ ++ if (adev->asic_type < CHIP_POLARIS10) { ++ for (i = 0; i < adev->uvd.max_handles; ++i) ++ if (atomic_read(&adev->uvd.inst[j].handles[i])) ++ break; + +- if (i == adev->uvd.max_handles) +- return 0; +- } ++ if (i == adev->uvd.max_handles) ++ continue; ++ } + +- size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo); +- ptr = adev->uvd.inst->cpu_addr; ++ size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); ++ ptr = adev->uvd.inst[j].cpu_addr; ++ ++ adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); ++ if (!adev->uvd.inst[j].saved_bo) ++ return -ENOMEM; ++ memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); ++ } + +- adev->uvd.inst->saved_bo = kmalloc(size, GFP_KERNEL); +- if (!adev->uvd.inst->saved_bo) +- return -ENOMEM; +- +- memcpy_fromio(adev->uvd.inst->saved_bo, ptr, size); +- + return 0; + } + +@@ -340,59 +345,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) + { + unsigned size; + void *ptr; ++ int i; + +- if (adev->uvd.inst->vcpu_bo == NULL) +- return -EINVAL; ++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) { ++ if (adev->uvd.inst[i].vcpu_bo == NULL) ++ return -EINVAL; + +- size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo); +- ptr = adev->uvd.inst->cpu_addr; ++ size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); ++ ptr = adev->uvd.inst[i].cpu_addr; + +- if (adev->uvd.inst->saved_bo != NULL) { +- memcpy_toio(ptr, adev->uvd.inst->saved_bo, size); +- kfree(adev->uvd.inst->saved_bo); +- adev->uvd.inst->saved_bo = NULL; +- } else { +- const struct common_firmware_header *hdr; +- unsigned offset; +- +- hdr = (const struct common_firmware_header *)adev->uvd.fw->data; +- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { +- offset = le32_to_cpu(hdr->ucode_array_offset_bytes); +- memcpy_toio(adev->uvd.inst->cpu_addr, adev->uvd.fw->data + offset, +- le32_to_cpu(hdr->ucode_size_bytes)); +- size -= le32_to_cpu(hdr->ucode_size_bytes); +- ptr += le32_to_cpu(hdr->ucode_size_bytes); ++ if (adev->uvd.inst[i].saved_bo != NULL) { ++ memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); ++ kfree(adev->uvd.inst[i].saved_bo); ++ adev->uvd.inst[i].saved_bo = NULL; ++ } else { ++ const struct common_firmware_header *hdr; ++ unsigned offset; ++ ++ hdr = (const struct common_firmware_header *)adev->uvd.fw->data; ++ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { ++ offset = le32_to_cpu(hdr->ucode_array_offset_bytes); ++ memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, ++ le32_to_cpu(hdr->ucode_size_bytes)); ++ size -= le32_to_cpu(hdr->ucode_size_bytes); ++ ptr += le32_to_cpu(hdr->ucode_size_bytes); ++ } ++ memset_io(ptr, 0, size); ++ /* to restore uvd fence seq */ ++ amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); + } +- memset_io(ptr, 0, size); +- /* to restore uvd fence seq */ +- amdgpu_fence_driver_force_completion(&adev->uvd.inst->ring); + } +- + return 0; + } + + void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) + { +- struct amdgpu_ring *ring = &adev->uvd.inst->ring; +- int i, r; ++ struct amdgpu_ring *ring; ++ int i, j, r; + +- for (i = 0; i < adev->uvd.max_handles; ++i) { +- uint32_t handle = atomic_read(&adev->uvd.inst->handles[i]); +- if (handle != 0 && adev->uvd.inst->filp[i] == filp) { +- struct dma_fence *fence; +- +- r = amdgpu_uvd_get_destroy_msg(ring, handle, +- false, &fence); +- if (r) { +- DRM_ERROR("Error destroying UVD (%d)!\n", r); +- continue; +- } ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ++ ring = &adev->uvd.inst[j].ring; + +- dma_fence_wait(fence, false); +- dma_fence_put(fence); ++ for (i = 0; i < adev->uvd.max_handles; ++i) { ++ uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]); ++ if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) { ++ struct dma_fence *fence; ++ ++ r = amdgpu_uvd_get_destroy_msg(ring, handle, ++ false, &fence); ++ if (r) { ++ DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r); ++ continue; ++ } + +- adev->uvd.inst->filp[i] = NULL; +- atomic_set(&adev->uvd.inst->handles[i], 0); ++ dma_fence_wait(fence, false); ++ dma_fence_put(fence); ++ ++ adev->uvd.inst[j].filp[i] = NULL; ++ atomic_set(&adev->uvd.inst[j].handles[i], 0); ++ } + } + } + } +@@ -667,15 +678,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + void *ptr; + long r; + int i; ++ uint32_t ip_instance = ctx->parser->job->ring->me; + + if (offset & 0x3F) { +- DRM_ERROR("UVD messages must be 64 byte aligned!\n"); ++ DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance); + return -EINVAL; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { +- DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); ++ DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r); + return r; + } + +@@ -685,7 +697,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + handle = msg[2]; + + if (handle == 0) { +- DRM_ERROR("Invalid UVD handle!\n"); ++ DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance); + return -EINVAL; + } + +@@ -696,18 +708,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + + /* try to alloc a new handle */ + for (i = 0; i < adev->uvd.max_handles; ++i) { +- if (atomic_read(&adev->uvd.inst->handles[i]) == handle) { +- DRM_ERROR("Handle 0x%x already in use!\n", handle); ++ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { ++ DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle); + return -EINVAL; + } + +- if (!atomic_cmpxchg(&adev->uvd.inst->handles[i], 0, handle)) { +- adev->uvd.inst->filp[i] = ctx->parser->filp; ++ if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) { ++ adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp; + return 0; + } + } + +- DRM_ERROR("No more free UVD handles!\n"); ++ DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance); + return -ENOSPC; + + case 1: +@@ -719,27 +731,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, + + /* validate the handle */ + for (i = 0; i < adev->uvd.max_handles; ++i) { +- if (atomic_read(&adev->uvd.inst->handles[i]) == handle) { +- if (adev->uvd.inst->filp[i] != ctx->parser->filp) { +- DRM_ERROR("UVD handle collision detected!\n"); ++ if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { ++ if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) { ++ DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance); + return -EINVAL; + } + return 0; + } + } + +- DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); ++ DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle); + return -ENOENT; + + case 2: + /* it's a destroy msg, free the handle */ + for (i = 0; i < adev->uvd.max_handles; ++i) +- atomic_cmpxchg(&adev->uvd.inst->handles[i], handle, 0); ++ atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0); + amdgpu_bo_kunmap(bo); + return 0; + + default: +- DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); ++ DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type); + return -EINVAL; + } + BUG(); +@@ -1043,7 +1055,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + if (r) + goto err_free; + +- r = amdgpu_job_submit(job, ring, &adev->uvd.inst->entity, ++ r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; +@@ -1191,27 +1203,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) + { + struct dma_fence *fence; + long r; ++ uint32_t ip_instance = ring->me; + + r = amdgpu_uvd_get_create_msg(ring, 1, NULL); + if (r) { +- DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); ++ DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); + goto error; + } + + r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); + if (r) { +- DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); ++ DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { +- DRM_ERROR("amdgpu: IB test timed out.\n"); ++ DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); + r = -ETIMEDOUT; + } else if (r < 0) { +- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); ++ DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); + } else { +- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); ++ DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); + r = 0; + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +index debf206..38816227 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +@@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); + } + + /** +@@ -72,10 +72,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- if (ring == &adev->uvd.inst->ring_enc[0]) +- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); ++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else +- return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); + } + + /** +@@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); + } + + /** +@@ -106,10 +106,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + +- if (ring == &adev->uvd.inst->ring_enc[0]) +- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); ++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + else +- return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); ++ return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } + + /** +@@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) + { + struct amdgpu_device *adev = ring->adev; + +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } + + /** +@@ -144,11 +144,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) + return; + } + +- if (ring == &adev->uvd.inst->ring_enc[0]) +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, ++ if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) ++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + else +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, ++ WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, + lower_32_bits(ring->wptr)); + } + +@@ -387,19 +387,21 @@ static int uvd_v7_0_sw_init(void *handle) + { + struct amdgpu_ring *ring; + struct drm_sched_rq *rq; +- int i, r; ++ int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- /* UVD TRAP */ +- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst->irq); +- if (r) +- return r; +- +- /* UVD ENC TRAP */ +- for (i = 0; i < adev->uvd.num_enc_rings; ++i) { +- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst->irq); ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ++ /* UVD TRAP */ ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst[j].irq); + if (r) + return r; ++ ++ /* UVD ENC TRAP */ ++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst[j].irq); ++ if (r) ++ return r; ++ } + } + + r = amdgpu_uvd_sw_init(adev); +@@ -416,43 +418,48 @@ static int uvd_v7_0_sw_init(void *handle) + DRM_INFO("PSP loading UVD firmware\n"); + } + +- ring = &adev->uvd.inst->ring_enc[0]; +- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; +- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, +- rq, amdgpu_sched_jobs, NULL); +- if (r) { +- DRM_ERROR("Failed setting up UVD ENC run queue.\n"); +- return r; ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ++ ring = &adev->uvd.inst[j].ring_enc[0]; ++ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; ++ r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, ++ rq, amdgpu_sched_jobs, NULL); ++ if (r) { ++ DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); ++ return r; ++ } + } + + r = amdgpu_uvd_resume(adev); + if (r) + return r; +- if (!amdgpu_sriov_vf(adev)) { +- ring = &adev->uvd.inst->ring; +- sprintf(ring->name, "uvd"); +- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); +- if (r) +- return r; +- } + +- for (i = 0; i < adev->uvd.num_enc_rings; ++i) { +- ring = &adev->uvd.inst->ring_enc[i]; +- sprintf(ring->name, "uvd_enc%d", i); +- if (amdgpu_sriov_vf(adev)) { +- ring->use_doorbell = true; +- +- /* currently only use the first enconding ring for +- * sriov, so set unused location for other unused rings. +- */ +- if (i == 0) +- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; +- else +- ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ++ if (!amdgpu_sriov_vf(adev)) { ++ ring = &adev->uvd.inst[j].ring; ++ sprintf(ring->name, "uvd<%d>", j); ++ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); ++ if (r) ++ return r; ++ } ++ ++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ++ ring = &adev->uvd.inst[j].ring_enc[i]; ++ sprintf(ring->name, "uvd_enc%d<%d>", i, j); ++ if (amdgpu_sriov_vf(adev)) { ++ ring->use_doorbell = true; ++ ++ /* currently only use the first enconding ring for ++ * sriov, so set unused location for other unused rings. ++ */ ++ if (i == 0) ++ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; ++ else ++ ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; ++ } ++ r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); ++ if (r) ++ return r; + } +- r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); +- if (r) +- return r; + } + + r = amdgpu_virt_alloc_mm_table(adev); +@@ -464,7 +471,7 @@ static int uvd_v7_0_sw_init(void *handle) + + static int uvd_v7_0_sw_fini(void *handle) + { +- int i, r; ++ int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_virt_free_mm_table(adev); +@@ -473,11 +480,12 @@ static int uvd_v7_0_sw_fini(void *handle) + if (r) + return r; + +- drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); +- +- for (i = 0; i < adev->uvd.num_enc_rings; ++i) +- amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); ++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { ++ drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); + ++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) ++ amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); ++ } + return amdgpu_uvd_sw_fini(adev); + } + +@@ -491,9 +499,9 @@ static int uvd_v7_0_sw_fini(void *handle) + static int uvd_v7_0_hw_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- struct amdgpu_ring *ring = &adev->uvd.inst->ring; ++ struct amdgpu_ring *ring; + uint32_t tmp; +- int i, r; ++ int i, j, r; + + if (amdgpu_sriov_vf(adev)) + r = uvd_v7_0_sriov_start(adev); +@@ -502,57 +510,60 @@ static int uvd_v7_0_hw_init(void *handle) + if (r) + goto done; + +- if (!amdgpu_sriov_vf(adev)) { +- ring->ready = true; +- r = amdgpu_ring_test_ring(ring); +- if (r) { +- ring->ready = false; +- goto done; ++ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { ++ ring = &adev->uvd.inst[j].ring; ++ ++ if (!amdgpu_sriov_vf(adev)) { ++ ring->ready = true; ++ r = amdgpu_ring_test_ring(ring); ++ if (r) { ++ ring->ready = false; ++ goto done; ++ } ++ ++ r = amdgpu_ring_alloc(ring, 10); ++ if (r) { ++ DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); ++ goto done; ++ } ++ ++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, ++ mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); ++ amdgpu_ring_write(ring, tmp); ++ amdgpu_ring_write(ring, 0xFFFFF); ++ ++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, ++ mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); ++ amdgpu_ring_write(ring, tmp); ++ amdgpu_ring_write(ring, 0xFFFFF); ++ ++ tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, ++ mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); ++ amdgpu_ring_write(ring, tmp); ++ amdgpu_ring_write(ring, 0xFFFFF); ++ ++ /* Clear timeout status bits */ ++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, ++ mmUVD_SEMA_TIMEOUT_STATUS), 0)); ++ amdgpu_ring_write(ring, 0x8); ++ ++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, ++ mmUVD_SEMA_CNTL), 0)); ++ amdgpu_ring_write(ring, 3); ++ ++ amdgpu_ring_commit(ring); + } + +- r = amdgpu_ring_alloc(ring, 10); +- if (r) { +- DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); +- goto done; ++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ++ ring = &adev->uvd.inst[j].ring_enc[i]; ++ ring->ready = true; ++ r = amdgpu_ring_test_ring(ring); ++ if (r) { ++ ring->ready = false; ++ goto done; ++ } + } +- +- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, +- mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); +- amdgpu_ring_write(ring, tmp); +- amdgpu_ring_write(ring, 0xFFFFF); +- +- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, +- mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); +- amdgpu_ring_write(ring, tmp); +- amdgpu_ring_write(ring, 0xFFFFF); +- +- tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, +- mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); +- amdgpu_ring_write(ring, tmp); +- amdgpu_ring_write(ring, 0xFFFFF); +- +- /* Clear timeout status bits */ +- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, +- mmUVD_SEMA_TIMEOUT_STATUS), 0)); +- amdgpu_ring_write(ring, 0x8); +- +- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, +- mmUVD_SEMA_CNTL), 0)); +- amdgpu_ring_write(ring, 3); +- +- amdgpu_ring_commit(ring); + } +- +- for (i = 0; i < adev->uvd.num_enc_rings; ++i) { +- ring = &adev->uvd.inst->ring_enc[i]; +- ring->ready = true; +- r = amdgpu_ring_test_ring(ring); +- if (r) { +- ring->ready = false; +- goto done; +- } +- } +- + done: + if (!r) + DRM_INFO("UVD and UVD ENC initialized successfully.\n"); +@@ -570,7 +581,7 @@ static int uvd_v7_0_hw_init(void *handle) + static int uvd_v7_0_hw_fini(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +- struct amdgpu_ring *ring = &adev->uvd.inst->ring; ++ int i; + + if (!amdgpu_sriov_vf(adev)) + uvd_v7_0_stop(adev); +@@ -579,7 +590,8 @@ static int uvd_v7_0_hw_fini(void *handle) + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + } + +- ring->ready = false; ++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) ++ adev->uvd.inst[i].ring.ready = false; + + return 0; + } +@@ -619,48 +631,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) + { + uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); + uint32_t offset; ++ int i; + +- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); +- offset = 0; +- } else { +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +- lower_32_bits(adev->uvd.inst->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +- upper_32_bits(adev->uvd.inst->gpu_addr)); +- offset = size; +- } ++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { ++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, ++ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, ++ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); ++ offset = 0; ++ } else { ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, ++ lower_32_bits(adev->uvd.inst[i].gpu_addr)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, ++ upper_32_bits(adev->uvd.inst[i].gpu_addr)); ++ offset = size; ++ } + +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, +- AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); +- +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, +- lower_32_bits(adev->uvd.inst->gpu_addr + offset)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, +- upper_32_bits(adev->uvd.inst->gpu_addr + offset)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); +- +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, +- lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, +- upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, +- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); +- +- WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, +- adev->gfx.config.gb_addr_config); +- +- WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, ++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); ++ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, ++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, ++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); ++ ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, ++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); ++ WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, ++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, ++ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); ++ ++ WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG, ++ adev->gfx.config.gb_addr_config); ++ ++ WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); ++ } + } + + static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, +@@ -670,6 +685,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, + uint64_t addr = table->gpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; + uint32_t size; ++ int i; + + size = header->header_size + header->vce_table_size + header->uvd_table_size; + +@@ -689,11 +705,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, + /* 4, set resp to zero */ + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); + +- WDOORBELL32(adev->uvd.inst->ring_enc[0].doorbell_index, 0); +- adev->wb.wb[adev->uvd.inst->ring_enc[0].wptr_offs] = 0; +- adev->uvd.inst->ring_enc[0].wptr = 0; +- adev->uvd.inst->ring_enc[0].wptr_old = 0; +- ++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { ++ WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0); ++ adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0; ++ adev->uvd.inst[i].ring_enc[0].wptr = 0; ++ adev->uvd.inst[i].ring_enc[0].wptr_old = 0; ++ } + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); + +@@ -726,6 +743,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) + struct mmsch_v1_0_cmd_end end = { {0} }; + uint32_t *init_table = adev->virt.mm_table.cpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; ++ uint8_t i = 0; + + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; +@@ -743,120 +761,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) + + init_table += header->uvd_table_offset; + +- ring = &adev->uvd.inst->ring; +- ring->wptr = 0; +- size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); +- +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), +- 0xFFFFFFFF, 0x00000004); +- /* mc resume*/ +- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), +- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), +- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); +- offset = 0; +- } else { +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), +- lower_32_bits(adev->uvd.inst->gpu_addr)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), +- upper_32_bits(adev->uvd.inst->gpu_addr)); +- offset = size; ++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { ++ ring = &adev->uvd.inst[i].ring; ++ ring->wptr = 0; ++ size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); ++ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), ++ 0xFFFFFFFF, 0x00000004); ++ /* mc resume*/ ++ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), ++ lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), ++ upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); ++ offset = 0; ++ } else { ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), ++ lower_32_bits(adev->uvd.inst[i].gpu_addr)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), ++ upper_32_bits(adev->uvd.inst[i].gpu_addr)); ++ offset = size; ++ } ++ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), ++ AMDGPU_UVD_FIRMWARE_OFFSET >> 3); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); ++ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), ++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), ++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); ++ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), ++ lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), ++ upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), ++ AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); ++ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); ++ /* mc resume end*/ ++ ++ /* disable clock gating */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL), ++ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); ++ ++ /* disable interupt */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), ++ ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); ++ ++ /* stall UMC and register bus before resetting VCPU */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ ++ /* put LMI, VCPU, RBC etc... into reset */ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), ++ (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); ++ ++ /* initialize UVD memory controller */ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL), ++ (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | ++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | ++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | ++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | ++ UVD_LMI_CTRL__REQ_MODE_MASK | ++ 0x00100000L)); ++ ++ /* take all subblocks out of reset, except VCPU */ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); ++ ++ /* enable VCPU clock */ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), ++ UVD_VCPU_CNTL__CLK_EN_MASK); ++ ++ /* enable master interrupt */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), ++ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), ++ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); ++ ++ /* clear the bit 4 of UVD_STATUS */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), ++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); ++ ++ /* force RBC into idle state */ ++ size = order_base_2(ring->ring_size); ++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); ++ ++ ring = &adev->uvd.inst[i].ring_enc[0]; ++ ring->wptr = 0; ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4); ++ ++ /* boot up the VCPU */ ++ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0); ++ ++ /* enable UMC */ ++ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); ++ ++ MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02); + } +- +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), +- AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); +- +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), +- lower_32_bits(adev->uvd.inst->gpu_addr + offset)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), +- upper_32_bits(adev->uvd.inst->gpu_addr + offset)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); +- +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), +- lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), +- upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), +- AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); +- +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); +- /* mc resume end*/ +- +- /* disable clock gating */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), +- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); +- +- /* disable interupt */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), +- ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); +- +- /* stall UMC and register bus before resetting VCPU */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, +- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); +- +- /* put LMI, VCPU, RBC etc... into reset */ +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), +- (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | +- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | +- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | +- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | +- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | +- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | +- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); +- +- /* initialize UVD memory controller */ +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), +- (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | +- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | +- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | +- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | +- UVD_LMI_CTRL__REQ_MODE_MASK | +- 0x00100000L)); +- +- /* take all subblocks out of reset, except VCPU */ +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); +- +- /* enable VCPU clock */ +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), +- UVD_VCPU_CNTL__CLK_EN_MASK); +- +- /* enable master interrupt */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), +- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), +- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); +- +- /* clear the bit 4 of UVD_STATUS */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), +- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); +- +- /* force RBC into idle state */ +- size = order_base_2(ring->ring_size); +- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); +- +- ring = &adev->uvd.inst->ring_enc[0]; +- ring->wptr = 0; +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); +- +- /* boot up the VCPU */ +- MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); +- +- /* enable UMC */ +- MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); +- +- MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); +- + /* add end packet */ + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; +@@ -875,15 +894,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) + */ + static int uvd_v7_0_start(struct amdgpu_device *adev) + { +- struct amdgpu_ring *ring = &adev->uvd.inst->ring; ++ struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + uint32_t mp_swap_cntl; +- int i, j, r; ++ int i, j, k, r; + +- /* disable DPG */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, +- ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); ++ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { ++ /* disable DPG */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0, ++ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); ++ } + + /* disable byte swapping */ + lmi_swap_cntl = 0; +@@ -891,157 +912,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) + + uvd_v7_0_mc_resume(adev); + +- /* disable clock gating */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, +- ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); +- +- /* disable interupt */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, +- ~UVD_MASTINT_EN__VCPU_EN_MASK); +- +- /* stall UMC and register bus before resetting VCPU */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), +- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); +- mdelay(1); +- +- /* put LMI, VCPU, RBC etc... into reset */ +- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, +- UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | +- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | +- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | +- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | +- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | +- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | +- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); +- mdelay(5); ++ for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { ++ ring = &adev->uvd.inst[k].ring; ++ /* disable clock gating */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0, ++ ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); + +- /* initialize UVD memory controller */ +- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, +- (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | +- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | +- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | +- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | +- UVD_LMI_CTRL__REQ_MODE_MASK | +- 0x00100000L); ++ /* disable interupt */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0, ++ ~UVD_MASTINT_EN__VCPU_EN_MASK); ++ ++ /* stall UMC and register bus before resetting VCPU */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), ++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ mdelay(1); ++ ++ /* put LMI, VCPU, RBC etc... into reset */ ++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, ++ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | ++ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); ++ mdelay(5); ++ ++ /* initialize UVD memory controller */ ++ WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL, ++ (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | ++ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | ++ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | ++ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | ++ UVD_LMI_CTRL__REQ_MODE_MASK | ++ 0x00100000L); + + #ifdef __BIG_ENDIAN +- /* swap (8 in 32) RB and IB */ +- lmi_swap_cntl = 0xa; +- mp_swap_cntl = 0; ++ /* swap (8 in 32) RB and IB */ ++ lmi_swap_cntl = 0xa; ++ mp_swap_cntl = 0; + #endif +- WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); +- WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); +- +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); +- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); +- +- /* take all subblocks out of reset, except VCPU */ +- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); +- mdelay(5); ++ WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); ++ WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); + +- /* enable VCPU clock */ +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, +- UVD_VCPU_CNTL__CLK_EN_MASK); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0); ++ WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88); + +- /* enable UMC */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ /* take all subblocks out of reset, except VCPU */ ++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); ++ mdelay(5); + +- /* boot up the VCPU */ +- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); +- mdelay(10); ++ /* enable VCPU clock */ ++ WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL, ++ UVD_VCPU_CNTL__CLK_EN_MASK); + +- for (i = 0; i < 10; ++i) { +- uint32_t status; ++ /* enable UMC */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0, ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + +- for (j = 0; j < 100; ++j) { +- status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); ++ /* boot up the VCPU */ ++ WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0); ++ mdelay(10); ++ ++ for (i = 0; i < 10; ++i) { ++ uint32_t status; ++ ++ for (j = 0; j < 100; ++j) { ++ status = RREG32_SOC15(UVD, k, mmUVD_STATUS); ++ if (status & 2) ++ break; ++ mdelay(10); ++ } ++ r = 0; + if (status & 2) + break; ++ ++ DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k); ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, ++ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); ++ mdelay(10); ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0, ++ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); ++ r = -1; + } +- r = 0; +- if (status & 2) +- break; + +- DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, +- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); +- mdelay(10); +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, +- ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); +- mdelay(10); +- r = -1; +- } +- +- if (r) { +- DRM_ERROR("UVD not responding, giving up!!!\n"); +- return r; +- } +- /* enable master interrupt */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), +- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), +- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); +- +- /* clear the bit 4 of UVD_STATUS */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, +- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); +- +- /* force RBC into idle state */ +- rb_bufsz = order_base_2(ring->ring_size); +- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); +- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); +- +- /* set the write pointer delay */ +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); +- +- /* set the wb address */ +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, +- (upper_32_bits(ring->gpu_addr) >> 2)); +- +- /* programm the RB_BASE for ring buffer */ +- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, +- lower_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, +- upper_32_bits(ring->gpu_addr)); +- +- /* Initialize the ring buffer's read and write pointers */ +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); +- +- ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, +- lower_32_bits(ring->wptr)); +- +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, +- ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); +- +- ring = &adev->uvd.inst->ring_enc[0]; +- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); ++ if (r) { ++ DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k); ++ return r; ++ } ++ /* enable master interrupt */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), ++ (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), ++ ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + +- ring = &adev->uvd.inst->ring_enc[1]; +- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); +- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); +- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); ++ /* clear the bit 4 of UVD_STATUS */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0, ++ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + ++ /* force RBC into idle state */ ++ rb_bufsz = order_base_2(ring->ring_size); ++ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); ++ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); ++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp); ++ ++ /* set the write pointer delay */ ++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0); ++ ++ /* set the wb address */ ++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR, ++ (upper_32_bits(ring->gpu_addr) >> 2)); ++ ++ /* programm the RB_BASE for ring buffer */ ++ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, ++ lower_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, ++ upper_32_bits(ring->gpu_addr)); ++ ++ /* Initialize the ring buffer's read and write pointers */ ++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0); ++ ++ ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR); ++ WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR, ++ lower_32_bits(ring->wptr)); ++ ++ WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0, ++ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); ++ ++ ring = &adev->uvd.inst[k].ring_enc[0]; ++ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr); ++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4); ++ ++ ring = &adev->uvd.inst[k].ring_enc[1]; ++ WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr); ++ WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); ++ WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4); ++ } + return 0; + } + +@@ -1054,26 +1077,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) + */ + static void uvd_v7_0_stop(struct amdgpu_device *adev) + { +- /* force RBC into idle state */ +- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); +- +- /* Stall UMC and register bus before resetting VCPU */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), +- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); +- mdelay(1); +- +- /* put VCPU into reset */ +- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, +- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); +- mdelay(5); ++ uint8_t i = 0; ++ ++ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { ++ /* force RBC into idle state */ ++ WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101); + +- /* disable VCPU clock */ +- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); ++ /* Stall UMC and register bus before resetting VCPU */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), ++ UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ mdelay(1); + +- /* Unstall UMC and register bus */ +- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, +- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ /* put VCPU into reset */ ++ WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET, ++ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); ++ mdelay(5); ++ ++ /* disable VCPU clock */ ++ WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0); ++ ++ /* Unstall UMC and register bus */ ++ WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, ++ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); ++ } + } + + /** +@@ -1092,26 +1119,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 2); + } + +@@ -1181,7 +1208,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) + unsigned i; + int r; + +- WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); ++ WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", +@@ -1189,11 +1216,11 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) + return r; + } + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { +- tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); ++ tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); +@@ -1225,17 +1252,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); + amdgpu_ring_write(ring, vmid); + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0)); + amdgpu_ring_write(ring, ib->length_dw); + } + +@@ -1263,13 +1290,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, val); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 8); + } + +@@ -1279,16 +1306,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, val); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0)); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); ++ PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 12); + } + +@@ -1313,7 +1340,7 @@ static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) + struct amdgpu_device *adev = ring->adev; + + for (i = 0; i < count; i++) +- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); ++ amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0)); + + } + +@@ -1381,16 +1408,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle) + + if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || + REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || +- (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & ++ (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) & + AMDGPU_UVD_STATUS_BUSY_MASK)) + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, + SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); + + if (srbm_soft_reset) { +- adev->uvd.inst->srbm_soft_reset = srbm_soft_reset; ++ adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset; + return true; + } else { +- adev->uvd.inst->srbm_soft_reset = 0; ++ adev->uvd.inst[ring->me].srbm_soft_reset = 0; + return false; + } + } +@@ -1399,7 +1426,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- if (!adev->uvd.inst->srbm_soft_reset) ++ if (!adev->uvd.inst[ring->me].srbm_soft_reset) + return 0; + + uvd_v7_0_stop(adev); +@@ -1411,9 +1438,9 @@ static int uvd_v7_0_soft_reset(void *handle) + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + +- if (!adev->uvd.inst->srbm_soft_reset) ++ if (!adev->uvd.inst[ring->me].srbm_soft_reset) + return 0; +- srbm_soft_reset = adev->uvd.inst->srbm_soft_reset; ++ srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; +@@ -1441,7 +1468,7 @@ static int uvd_v7_0_post_soft_reset(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +- if (!adev->uvd.inst->srbm_soft_reset) ++ if (!adev->uvd.inst[ring->me].srbm_soft_reset) + return 0; + + mdelay(5); +@@ -1463,17 +1490,29 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) + { ++ uint32_t ip_instance; ++ ++ switch (entry->client_id) { ++ case SOC15_IH_CLIENTID_UVD: ++ ip_instance = 0; ++ break; ++ default: ++ DRM_ERROR("Unhandled client id: %d\n", entry->client_id); ++ return 0; ++ } ++ + DRM_DEBUG("IH: UVD TRAP\n"); ++ + switch (entry->src_id) { + case 124: +- amdgpu_fence_process(&adev->uvd.inst->ring); ++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring); + break; + case 119: +- amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]); ++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]); + break; + case 120: + if (!amdgpu_sriov_vf(adev)) +- amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]); ++ amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", +@@ -1489,9 +1528,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) + { + uint32_t data, data1, data2, suvd_flags; + +- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); +- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); +- data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); ++ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL); ++ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); ++ data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL); + + data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | + UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); +@@ -1535,18 +1574,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) + UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); + data1 |= suvd_flags; + +- WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); +- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); +- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); +- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); ++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data); ++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0); ++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); ++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2); + } + + static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) + { + uint32_t data, data1, cgc_flags, suvd_flags; + +- data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); +- data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); ++ data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE); ++ data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); + + cgc_flags = UVD_CGC_GATE__SYS_MASK | + UVD_CGC_GATE__UDEC_MASK | +@@ -1578,8 +1617,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) + data |= cgc_flags; + data1 |= suvd_flags; + +- WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); +- WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); ++ WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data); ++ WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); + } + + static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) +@@ -1638,7 +1677,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, + if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) + return 0; + +- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); ++ WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); + + if (state == AMD_PG_STATE_GATE) { + uvd_v7_0_stop(adev); +@@ -1742,18 +1781,27 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { + + static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) + { +- adev->uvd.inst->ring.funcs = &uvd_v7_0_ring_vm_funcs; +- DRM_INFO("UVD is enabled in VM mode\n"); ++ int i; ++ ++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) { ++ adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs; ++ adev->uvd.inst[i].ring.me = i; ++ DRM_INFO("UVD(%d) is enabled in VM mode\n", i); ++ } + } + + static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) + { +- int i; ++ int i, j; + +- for (i = 0; i < adev->uvd.num_enc_rings; ++i) +- adev->uvd.inst->ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; ++ for (j = 0; j < adev->uvd.num_uvd_inst; j++) { ++ for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ++ adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; ++ adev->uvd.inst[j].ring_enc[i].me = j; ++ } + +- DRM_INFO("UVD ENC is enabled in VM mode\n"); ++ DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j); ++ } + } + + static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { +@@ -1763,8 +1811,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { + + static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) + { +- adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1; +- adev->uvd.inst->irq.funcs = &uvd_v7_0_irq_funcs; ++ int i; ++ ++ for (i = 0; i < adev->uvd.num_uvd_inst; i++) { ++ adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1; ++ adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs; ++ } + } + + const struct amdgpu_ip_block_version uvd_v7_0_ip_block = +-- +2.7.4 + |