diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch new file mode 100644 index 00000000..bb9b2f59 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch @@ -0,0 +1,216 @@ +From 7787d21b7ec2cc40a0849a6b8d58ec3fe845e2b1 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Thu, 2 Nov 2017 16:43:36 -0400 +Subject: [PATCH 2855/4131] drm/amdkfd: Hardware DWORD size is 4 bytes + +Don't use sizeof(uint32_t) or similar types for hardware or firmware +DWORD size. The hardware and firmware don't care about Linux types. + +Change-Id: Idd093ba19276c9207dc9e9083d8d0dcd93f4a2c3 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 14 +++++--------- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 10 ++++------ + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 7 +++---- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 9 ++++----- + 6 files changed, 18 insertions(+), 26 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +index af6d736..df9b346 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +@@ -97,7 +97,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + ib_packet->bitfields3.ib_base_hi = largep->u.high_part; + + ib_packet->control = (1 << 23) | (1 << 31) | +- ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); ++ ((size_in_bytes / 4) & 0xfffff); + + ib_packet->bitfields5.pasid = pasid; + +@@ -133,8 +133,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + + rm_packet->header.opcode = IT_RELEASE_MEM; + rm_packet->header.type = PM4_TYPE_3; +- rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / +- sizeof(unsigned int) - 2; ++ rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / 4 - 2; + + rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + rm_packet->bitfields2.event_index = +@@ -667,8 +666,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.reg_offset = +- GRBM_GFX_INDEX / (sizeof(uint32_t)) - +- USERCONFIG_REG_BASE; ++ GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; + + packets_vec[0].bitfields2.insert_vmid = 0; + packets_vec[0].reg_data[0] = reg_gfx_index.u32All; +@@ -676,8 +674,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + packets_vec[1].header.count = 1; + packets_vec[1].header.opcode = IT_SET_CONFIG_REG; + packets_vec[1].header.type = PM4_TYPE_3; +- packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - +- AMD_CONFIG_REG_BASE; ++ packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; + + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; + packets_vec[1].bitfields2.insert_vmid = 1; +@@ -693,8 +690,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.reg_offset = +- GRBM_GFX_INDEX / (sizeof(uint32_t)) - +- USERCONFIG_REG_BASE; ++ GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; + + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[2].reg_data[0] = reg_gfx_index.u32All; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index 047b048..8cf9d44 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -221,7 +221,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; + queue_address = (unsigned int *)kq->pq_kernel_addr; +- queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); ++ queue_size_dwords = kq->queue->properties.queue_size / 4; + + pr_debug("rptr: %d\n", rptr); + pr_debug("wptr: %d\n", wptr); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +index ecf4a33..9022ecb 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +@@ -106,7 +106,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) + + header.u32All = 0; + header.opcode = opcode; +- header.count = packet_size/sizeof(uint32_t) - 2; ++ header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 5398705..a628e28 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -217,7 +217,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, + { + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); +- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); ++ uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, +@@ -251,8 +251,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + * Calculating queue size which is log base 2 of actual queue size -1 + * dwords and another -1 for ffs + */ +- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) +- - 1 - 1; ++ m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); +@@ -293,7 +292,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct cik_sdma_rlc_registers *m; + + m = get_sdma_mqd(mqd); +- m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) ++ m->sdma_rlc_rb_cntl = (ffs(q->queue_size / 4) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | +@@ -434,8 +433,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + * Calculating queue size which is log base 2 of actual queue + * size -1 dwords + */ +- m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) +- - 1 - 1; ++ m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +index 30edcda..e65725b 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +@@ -203,8 +203,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; +- m->cp_hqd_pq_control |= +- ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; ++ m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); +@@ -231,7 +230,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, +- ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); ++ ffs(q->eop_ring_buffer_size / 4) - 1 - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = +@@ -402,7 +401,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct v9_sdma_mqd *m; + + m = get_sdma_mqd(mqd); +- m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) ++ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / 4) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index 973ea94..2e6fc62 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -191,7 +191,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, + { + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); +- uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); ++ uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, +@@ -209,8 +209,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | + atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT | + mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; +- m->cp_hqd_pq_control |= +- ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; ++ m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); +@@ -242,7 +241,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control |= min(0xA, +- ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); ++ ffs(q->eop_ring_buffer_size / 4) - 1 - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = +@@ -414,7 +413,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct vi_sdma_mqd *m; + + m = get_sdma_mqd(mqd); +- m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) ++ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / 4) - 1) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | +-- +2.7.4 + |