From c95a7ced9bd72a1f224ed09dac2d5dbe2a75a1c6 Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 18 Aug 2016 13:15:39 -0400 Subject: [PATCH 1502/4131] drm/amdkfd Fix CU masking for more than 32 CUs The initial implementation of CU masking used a single uint32 instead of an array, which limited the number of CUs that could be masked to 32. Match the thunk/kernel spec and pass in the cu_mask_count (number of bits total) and a uint32 array to mask properly. BUG:KFD-277 Change-Id: I61d17685809d9beb62fdc9a47a1c19d8a2107a54 Signed-off-by: Kent Russell --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 36 +++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 59 +++++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 59 +++++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 7 +++ 6 files changed, 101 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 831f63f..c144752 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -391,14 +391,44 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, void *data) { int retval; + const int max_num_cus = 1024; struct kfd_ioctl_set_cu_mask_args *args = data; struct queue_properties properties; uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; + size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); - if (get_user(properties.cu_mask, cu_mask_ptr)) + if ((args->num_cu_mask % 32) != 0) { + pr_debug("kfd: num_cu_mask (0x%x) must be a multiple of 32", + args->num_cu_mask); + return -EINVAL; + } + + properties.cu_mask_count = args->num_cu_mask; + if (properties.cu_mask_count == 0) { + pr_debug("kfd: CU Mask cannot be 0"); + return -EINVAL; + } + + /* To prevent an unreasonably large CU mask size, set an arbitrary + * limit of max_num_cus bits. We can then just drop any CU mask bits + * past max_num_cus bits and just use the first max_num_cus bits. + */ + if (properties.cu_mask_count > max_num_cus) { + pr_debug("kfd: CU mask cannot be greater than 1024 bits"); + properties.cu_mask_count = max_num_cus; + cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); + } + + properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); + if (!properties.cu_mask) + return -ENOMEM; + + retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); + if (retval) { + pr_debug("kfd: Could not copy cu mask from userspace"); + kfree(properties.cu_mask); return -EFAULT; - if (properties.cu_mask == 0) - return 0; + } down_write(&p->lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 162a83f..f19f2b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; prop.eop_ring_buffer_address = kq->eop_gpu_addr; prop.eop_ring_buffer_size = PAGE_SIZE; + prop.cu_mask = NULL; if (init_queue(&kq->queue, &prop) != 0) goto err_init_queue; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 1badce1..959a7f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, { struct cik_mqd *m; struct kfd_cu_info cu_info; - uint32_t mgmt_se_mask; - uint32_t cu_sh_mask, cu_sh_shift; - uint32_t cu_mask; - int se, sh; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + uint32_t cu_mask_count = q->cu_mask_count; + const uint32_t *cu_mask = q->cu_mask; + int se, cu_per_sh, cu_index, i; - if (q->cu_mask == 0) + if (WARN_ON(cu_mask_count == 0)) return; m = get_mqd(mqd); @@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3 = 0; mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); - cu_mask = q->cu_mask; - for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) { - mgmt_se_mask = 0; - for (sh = 0; sh < 2 && cu_mask; sh++) { - cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]); - cu_sh_mask = (1 << cu_sh_shift) - 1; - mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16); - cu_mask >>= cu_sh_shift; - } - switch (se) { - case 0: - m->compute_static_thread_mgmt_se0 = mgmt_se_mask; - break; - case 1: - m->compute_static_thread_mgmt_se1 = mgmt_se_mask; - break; - case 2: - m->compute_static_thread_mgmt_se2 = mgmt_se_mask; - break; - case 3: - m->compute_static_thread_mgmt_se3 = mgmt_se_mask; - break; - default: - break; - } + + /* If # CU mask bits > # CUs, set it to the # of CUs */ + if (cu_mask_count > cu_info.cu_active_number) + cu_mask_count = cu_info.cu_active_number; + + cu_index = 0; + for (se = 0; se < cu_info.num_shader_engines; se++) { + cu_per_sh = 0; + + /* Get the number of CUs on this Shader Engine */ + for (i = 0; i < 4; i++) + cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]); + + se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32); + if ((cu_per_sh + (cu_index % 32)) > 32) + se_mask[se] |= cu_mask[(cu_index / 32) + 1] + << (32 - (cu_index % 32)); + se_mask[se] &= (1 << cu_per_sh) - 1; + cu_index += cu_per_sh; } + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n", m->compute_static_thread_mgmt_se0, m->compute_static_thread_mgmt_se1, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index d78964c..59bc27e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, { struct vi_mqd *m; struct kfd_cu_info cu_info; - uint32_t mgmt_se_mask; - uint32_t cu_sh_mask, cu_sh_shift; - uint32_t cu_mask; - int se, sh; + uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + uint32_t cu_mask_count = q->cu_mask_count; + const uint32_t *cu_mask = q->cu_mask; + int se, cu_per_sh, cu_index, i; - if (q->cu_mask == 0) + if (WARN_ON(cu_mask_count == 0)) return; m = get_mqd(mqd); @@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3 = 0; mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); - cu_mask = q->cu_mask; - for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) { - mgmt_se_mask = 0; - for (sh = 0; sh < 2 && cu_mask; sh++) { - cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]); - cu_sh_mask = (1 << cu_sh_shift) - 1; - mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16); - cu_mask >>= cu_sh_shift; - } - switch (se) { - case 0: - m->compute_static_thread_mgmt_se0 = mgmt_se_mask; - break; - case 1: - m->compute_static_thread_mgmt_se1 = mgmt_se_mask; - break; - case 2: - m->compute_static_thread_mgmt_se2 = mgmt_se_mask; - break; - case 3: - m->compute_static_thread_mgmt_se3 = mgmt_se_mask; - break; - default: - break; - } + + /* If # CU mask bits > # CUs, set it to the # of CUs */ + if (cu_mask_count > cu_info.cu_active_number) + cu_mask_count = cu_info.cu_active_number; + + cu_index = 0; + for (se = 0; se < cu_info.num_shader_engines; se++) { + cu_per_sh = 0; + + /* Get the number of CUs on this Shader Engine */ + for (i = 0; i < 4; i++) + cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]); + + se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32); + if ((cu_per_sh + (cu_index % 32)) > 32) + se_mask[se] |= cu_mask[(cu_index / 32) + 1] + << (32 - (cu_index % 32)); + se_mask[se] &= (1 << cu_per_sh) - 1; + cu_index += cu_per_sh; } + m->compute_static_thread_mgmt_se0 = se_mask[0]; + m->compute_static_thread_mgmt_se1 = se_mask[1]; + m->compute_static_thread_mgmt_se2 = se_mask[2]; + m->compute_static_thread_mgmt_se3 = se_mask[3]; + pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n", m->compute_static_thread_mgmt_se0, m->compute_static_thread_mgmt_se1, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2bfe761..0a2afa7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -425,7 +425,8 @@ struct queue_properties { uint64_t tba_addr; uint64_t tma_addr; /* Relevant for CU */ - uint32_t cu_mask; + uint32_t cu_mask_count; /* Must be a multiple of 32 */ + uint32_t *cu_mask; }; /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index cf08e824..b68776e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -337,6 +337,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; + kfree(pqn->q->properties.cu_mask); retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval != 0) { if (retval == -ETIME) @@ -400,6 +401,12 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, return -EFAULT; } + /* Free the old CU mask memory if it is already allocated, then + * allocate memory for the new CU mask. + */ + kfree(pqn->q->properties.cu_mask); + + pqn->q->properties.cu_mask_count = p->cu_mask_count; pqn->q->properties.cu_mask = p->cu_mask; retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, -- 2.7.4