diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch new file mode 100644 index 00000000..1e4795d1 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch @@ -0,0 +1,286 @@ +From c95a7ced9bd72a1f224ed09dac2d5dbe2a75a1c6 Mon Sep 17 00:00:00 2001 +From: Kent Russell <kent.russell@amd.com> +Date: Thu, 18 Aug 2016 13:15:39 -0400 +Subject: [PATCH 1502/4131] drm/amdkfd Fix CU masking for more than 32 CUs + +The initial implementation of CU masking used a single uint32 instead of +an array, which limited the number of CUs that could be masked to 32. +Match the thunk/kernel spec and pass in the cu_mask_count (number of +bits total) and a uint32 array to mask properly. +BUG:KFD-277 + +Change-Id: I61d17685809d9beb62fdc9a47a1c19d8a2107a54 +Signed-off-by: Kent Russell <kent.russell@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 36 +++++++++++-- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 59 +++++++++++----------- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 59 +++++++++++----------- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +- + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 7 +++ + 6 files changed, 101 insertions(+), 64 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 831f63f..c144752 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -391,14 +391,44 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, + void *data) + { + int retval; ++ const int max_num_cus = 1024; + struct kfd_ioctl_set_cu_mask_args *args = data; + struct queue_properties properties; + uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; ++ size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); + +- if (get_user(properties.cu_mask, cu_mask_ptr)) ++ if ((args->num_cu_mask % 32) != 0) { ++ pr_debug("kfd: num_cu_mask (0x%x) must be a multiple of 32", ++ args->num_cu_mask); ++ return -EINVAL; ++ } ++ ++ properties.cu_mask_count = args->num_cu_mask; ++ if (properties.cu_mask_count == 0) { ++ pr_debug("kfd: CU Mask cannot be 0"); ++ return -EINVAL; ++ } ++ ++ /* To prevent an unreasonably large CU mask size, set an arbitrary ++ * limit of max_num_cus bits. We can then just drop any CU mask bits ++ * past max_num_cus bits and just use the first max_num_cus bits. ++ */ ++ if (properties.cu_mask_count > max_num_cus) { ++ pr_debug("kfd: CU mask cannot be greater than 1024 bits"); ++ properties.cu_mask_count = max_num_cus; ++ cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); ++ } ++ ++ properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); ++ if (!properties.cu_mask) ++ return -ENOMEM; ++ ++ retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); ++ if (retval) { ++ pr_debug("kfd: Could not copy cu mask from userspace"); ++ kfree(properties.cu_mask); + return -EFAULT; +- if (properties.cu_mask == 0) +- return 0; ++ } + + down_write(&p->lock); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index 162a83f..f19f2b3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; + prop.eop_ring_buffer_address = kq->eop_gpu_addr; + prop.eop_ring_buffer_size = PAGE_SIZE; ++ prop.cu_mask = NULL; + + if (init_queue(&kq->queue, &prop) != 0) + goto err_init_queue; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 1badce1..959a7f1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, + { + struct cik_mqd *m; + struct kfd_cu_info cu_info; +- uint32_t mgmt_se_mask; +- uint32_t cu_sh_mask, cu_sh_shift; +- uint32_t cu_mask; +- int se, sh; ++ uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ ++ uint32_t cu_mask_count = q->cu_mask_count; ++ const uint32_t *cu_mask = q->cu_mask; ++ int se, cu_per_sh, cu_index, i; + +- if (q->cu_mask == 0) ++ if (WARN_ON(cu_mask_count == 0)) + return; + + m = get_mqd(mqd); +@@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, + m->compute_static_thread_mgmt_se3 = 0; + + mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); +- cu_mask = q->cu_mask; +- for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) { +- mgmt_se_mask = 0; +- for (sh = 0; sh < 2 && cu_mask; sh++) { +- cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]); +- cu_sh_mask = (1 << cu_sh_shift) - 1; +- mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16); +- cu_mask >>= cu_sh_shift; +- } +- switch (se) { +- case 0: +- m->compute_static_thread_mgmt_se0 = mgmt_se_mask; +- break; +- case 1: +- m->compute_static_thread_mgmt_se1 = mgmt_se_mask; +- break; +- case 2: +- m->compute_static_thread_mgmt_se2 = mgmt_se_mask; +- break; +- case 3: +- m->compute_static_thread_mgmt_se3 = mgmt_se_mask; +- break; +- default: +- break; +- } ++ ++ /* If # CU mask bits > # CUs, set it to the # of CUs */ ++ if (cu_mask_count > cu_info.cu_active_number) ++ cu_mask_count = cu_info.cu_active_number; ++ ++ cu_index = 0; ++ for (se = 0; se < cu_info.num_shader_engines; se++) { ++ cu_per_sh = 0; ++ ++ /* Get the number of CUs on this Shader Engine */ ++ for (i = 0; i < 4; i++) ++ cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]); ++ ++ se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32); ++ if ((cu_per_sh + (cu_index % 32)) > 32) ++ se_mask[se] |= cu_mask[(cu_index / 32) + 1] ++ << (32 - (cu_index % 32)); ++ se_mask[se] &= (1 << cu_per_sh) - 1; ++ cu_index += cu_per_sh; + } ++ m->compute_static_thread_mgmt_se0 = se_mask[0]; ++ m->compute_static_thread_mgmt_se1 = se_mask[1]; ++ m->compute_static_thread_mgmt_se2 = se_mask[2]; ++ m->compute_static_thread_mgmt_se3 = se_mask[3]; ++ + pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index d78964c..59bc27e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, + { + struct vi_mqd *m; + struct kfd_cu_info cu_info; +- uint32_t mgmt_se_mask; +- uint32_t cu_sh_mask, cu_sh_shift; +- uint32_t cu_mask; +- int se, sh; ++ uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ ++ uint32_t cu_mask_count = q->cu_mask_count; ++ const uint32_t *cu_mask = q->cu_mask; ++ int se, cu_per_sh, cu_index, i; + +- if (q->cu_mask == 0) ++ if (WARN_ON(cu_mask_count == 0)) + return; + + m = get_mqd(mqd); +@@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, + m->compute_static_thread_mgmt_se3 = 0; + + mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); +- cu_mask = q->cu_mask; +- for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) { +- mgmt_se_mask = 0; +- for (sh = 0; sh < 2 && cu_mask; sh++) { +- cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]); +- cu_sh_mask = (1 << cu_sh_shift) - 1; +- mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16); +- cu_mask >>= cu_sh_shift; +- } +- switch (se) { +- case 0: +- m->compute_static_thread_mgmt_se0 = mgmt_se_mask; +- break; +- case 1: +- m->compute_static_thread_mgmt_se1 = mgmt_se_mask; +- break; +- case 2: +- m->compute_static_thread_mgmt_se2 = mgmt_se_mask; +- break; +- case 3: +- m->compute_static_thread_mgmt_se3 = mgmt_se_mask; +- break; +- default: +- break; +- } ++ ++ /* If # CU mask bits > # CUs, set it to the # of CUs */ ++ if (cu_mask_count > cu_info.cu_active_number) ++ cu_mask_count = cu_info.cu_active_number; ++ ++ cu_index = 0; ++ for (se = 0; se < cu_info.num_shader_engines; se++) { ++ cu_per_sh = 0; ++ ++ /* Get the number of CUs on this Shader Engine */ ++ for (i = 0; i < 4; i++) ++ cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]); ++ ++ se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32); ++ if ((cu_per_sh + (cu_index % 32)) > 32) ++ se_mask[se] |= cu_mask[(cu_index / 32) + 1] ++ << (32 - (cu_index % 32)); ++ se_mask[se] &= (1 << cu_per_sh) - 1; ++ cu_index += cu_per_sh; + } ++ m->compute_static_thread_mgmt_se0 = se_mask[0]; ++ m->compute_static_thread_mgmt_se1 = se_mask[1]; ++ m->compute_static_thread_mgmt_se2 = se_mask[2]; ++ m->compute_static_thread_mgmt_se3 = se_mask[3]; ++ + pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n", + m->compute_static_thread_mgmt_se0, + m->compute_static_thread_mgmt_se1, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 2bfe761..0a2afa7 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -425,7 +425,8 @@ struct queue_properties { + uint64_t tba_addr; + uint64_t tma_addr; + /* Relevant for CU */ +- uint32_t cu_mask; ++ uint32_t cu_mask_count; /* Must be a multiple of 32 */ ++ uint32_t *cu_mask; + }; + + /** +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index cf08e824..b68776e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -337,6 +337,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) + + if (pqn->q) { + dqm = pqn->q->device->dqm; ++ kfree(pqn->q->properties.cu_mask); + retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); + if (retval != 0) { + if (retval == -ETIME) +@@ -400,6 +401,12 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, + return -EFAULT; + } + ++ /* Free the old CU mask memory if it is already allocated, then ++ * allocate memory for the new CU mask. ++ */ ++ kfree(pqn->q->properties.cu_mask); ++ ++ pqn->q->properties.cu_mask_count = p->cu_mask_count; + pqn->q->properties.cu_mask = p->cu_mask; + + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, +-- +2.7.4 + |