From f4b68e1a33b68b090254cf5dc4648fa622877085 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 25 Jul 2017 16:38:03 -0400 Subject: [PATCH 1328/4131] drm/amdkfd: Cosmetic changes to match upstream Reduce cosmetic differences with upstream to increase the signal to noise ratio for upstreaming. Change-Id: I83fe855f59df25994f7ed035723130207b9f2818 Signed-off-by: Felix Kuehling Conflicts[4.12]: drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c Conflicts: drivers/gpu/drm/amd/amdkfd/kfd_device.c --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 203 ++++------ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 423 +++++++++------------ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 32 +- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 114 ++---- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 58 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 26 +- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 55 ++- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 228 +++++------ drivers/gpu/drm/amd/amdkfd/kfd_events.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 31 +- drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 23 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 +- 24 files changed, 540 insertions(+), 738 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e22dde3..29fca8d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -34,11 +34,7 @@ #include #include #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -#include -#else -#include -#endif +#include #include #include @@ -46,7 +42,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "kfd_ipc.h" -#include "cik_regs.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -546,26 +541,25 @@ static int kfd_ioctl_set_trap_handler(struct file *filep, return err; } -static int -kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) +static int kfd_ioctl_dbg_register(struct file *filep, + struct kfd_process *p, void *data) { - long status = -EFAULT; struct kfd_ioctl_dbg_register_args *args = data; struct kfd_dev *dev; struct kfd_dbgmgr *dbgmgr_ptr; struct kfd_process_device *pdd; - bool create_ok = false; + bool create_ok; + long status = 0; dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_err("Getting device by id failed in %s\n", __func__); - return status; - } + if (!dev) + return -EINVAL; mutex_lock(&p->mutex); - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); - /* make sure that we have pdd, if this the first queue created for + /* + * make sure that we have pdd, if this the first queue created for * this process */ pdd = kfd_bind_process_to_device(dev, p); @@ -576,7 +570,6 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) if (!dev->dbgmgr) { /* In case of a legal call, we have no dbgmgr yet */ - create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); if (create_ok) { status = kfd_dbgmgr_register(dbgmgr_ptr, p); @@ -585,10 +578,13 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) else dev->dbgmgr = dbgmgr_ptr; } + } else { + pr_debug("debugger already registered\n"); + status = -EINVAL; } out: - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); mutex_unlock(&p->mutex); return status; @@ -597,25 +593,28 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) static int kfd_ioctl_dbg_unregister(struct file *filep, struct kfd_process *p, void *data) { - long status = -EFAULT; struct kfd_ioctl_dbg_unregister_args *args = data; struct kfd_dev *dev; + long status; dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_err("Getting device by id failed in %s\n", __func__); - return status; + if (!dev) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); + return -EINVAL; } - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_unregister(dev->dbgmgr, p); - if (status == 0) { + if (!status) { kfd_dbgmgr_destroy(dev->dbgmgr); dev->dbgmgr = NULL; } - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); return status; } @@ -624,33 +623,28 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, * Parse and generate variable size data structure for address watch. * Total size of the buffer and # watch points is limited in order * to prevent kernel abuse. (no bearing to the much smaller HW limitation - * which is enforced by dbgdev module. + * which is enforced by dbgdev module) * please also note that the watch address itself are not "copied from user", * since it be set into the HW in user mode values. * */ - -static int -kfd_ioctl_dbg_address_watch(struct file *filep, - struct kfd_process *p, - void *data) +static int kfd_ioctl_dbg_address_watch(struct file *filep, + struct kfd_process *p, void *data) { - long status = -EFAULT; struct kfd_ioctl_dbg_address_watch_args *args = data; struct kfd_dev *dev; struct dbg_address_watch_info aw_info; - unsigned char *args_buff = NULL; - unsigned int args_idx = 0; + unsigned char *args_buff; + long status; void __user *cmd_from_user; uint64_t watch_mask_value = 0; + unsigned int args_idx = 0; memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_err("Getting device by id failed in %s\n", __func__); - return -EFAULT; - } + if (!dev) + return -EINVAL; cmd_from_user = (void __user *) args->content_ptr; @@ -660,31 +654,26 @@ kfd_ioctl_dbg_address_watch(struct file *filep, /* this is the actual buffer to work with */ args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); - + args->buf_size_in_bytes - sizeof(*args)); if (IS_ERR(args_buff)) return PTR_ERR(args_buff); aw_info.process = p; - aw_info.num_watch_points = - *((uint32_t *)(&args_buff[args_idx])); + aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); args_idx += sizeof(aw_info.num_watch_points); - aw_info.watch_mode = - (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; - args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * - aw_info.num_watch_points; + aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; + args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; - /* set watch address base pointer to point on the array base + /* + * set watch address base pointer to point on the array base * within args_buff */ - aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; /* skip over the addresses buffer */ - args_idx += sizeof(aw_info.watch_address) * - aw_info.num_watch_points; + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; if (args_idx >= args->buf_size_in_bytes) { status = -EINVAL; @@ -694,21 +683,18 @@ kfd_ioctl_dbg_address_watch(struct file *filep, watch_mask_value = (uint64_t) args_buff[args_idx]; if (watch_mask_value > 0) { - /* there is an array of masks */ - - /* set watch mask base pointer to point on the array - * base within args_buff + /* + * There is an array of masks. + * set watch mask base pointer to point on the array base + * within args_buff */ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; /* skip over the masks buffer */ args_idx += sizeof(aw_info.watch_mask) * - aw_info.num_watch_points; - } - - else + aw_info.num_watch_points; + } else { /* just the NULL mask, set to NULL and skip over it */ - { aw_info.watch_mask = NULL; args_idx += sizeof(aw_info.watch_mask); } @@ -721,11 +707,11 @@ kfd_ioctl_dbg_address_watch(struct file *filep, /* Currently HSA Event is not supported for DBG */ aw_info.watch_event = NULL; - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); out: kfree(args_buff); @@ -733,29 +719,22 @@ kfd_ioctl_dbg_address_watch(struct file *filep, return status; } -/* - * Parse and generate fixed size data structure for wave control. - * Buffer is generated in a "packed" form, for avoiding structure - * packing/pending dependencies. - */ - -static int -kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, - void *data) +/* Parse and generate fixed size data structure for wave control */ +static int kfd_ioctl_dbg_wave_control(struct file *filep, + struct kfd_process *p, void *data) { - long status = -EFAULT; struct kfd_ioctl_dbg_wave_control_args *args = data; struct kfd_dev *dev; struct dbg_wave_control_info wac_info; - unsigned char *args_buff = NULL; - unsigned int args_idx = 0; - void __user *cmd_from_user; + unsigned char *args_buff; uint32_t computed_buff_size; + long status; + void __user *cmd_from_user; + unsigned int args_idx = 0; memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); /* we use compact form, independent of the packing attribute value */ - computed_buff_size = sizeof(*args) + sizeof(wac_info.mode) + sizeof(wac_info.operand) + @@ -763,71 +742,58 @@ kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, sizeof(wac_info.dbgWave_msg.MemoryVA) + sizeof(wac_info.trapId); - dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_err("Getting device by id failed in %s\n", __func__); - return -EFAULT; - } + if (!dev) + return -EINVAL; /* input size must match the computed "compact" size */ - if (args->buf_size_in_bytes != computed_buff_size) { - pr_err("Size mismatch, computed %u : actual %u\n", + pr_debug("size mismatch, computed : actual %u : %u\n", args->buf_size_in_bytes, computed_buff_size); return -EINVAL; } cmd_from_user = (void __user *) args->content_ptr; + if (cmd_from_user == NULL) + return -EINVAL; + /* copy the entire buffer from user */ args_buff = memdup_user(cmd_from_user, - args->buf_size_in_bytes - sizeof(*args)); + args->buf_size_in_bytes - sizeof(*args)); if (IS_ERR(args_buff)) return PTR_ERR(args_buff); - if (copy_from_user(args_buff, - (void __user *) args->content_ptr, - args->buf_size_in_bytes - sizeof(*args))) { - pr_err("copy_from_user failed\n"); - status = -EFAULT; - goto out; - } - /* move ptr to the start of the "pay-load" area */ - wac_info.process = p; - wac_info.operand = - *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); + wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.operand); - wac_info.mode = - *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); + wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.mode); wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); args_idx += sizeof(wac_info.trapId); wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = - *((uint32_t *)(&args_buff[args_idx])); + *((uint32_t *)(&args_buff[args_idx])); wac_info.dbgWave_msg.MemoryVA = NULL; - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); - pr_info("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", - wac_info.process, wac_info.operand, wac_info.mode, - wac_info.trapId, + pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", + wac_info.process, wac_info.operand, + wac_info.mode, wac_info.trapId, wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); - pr_info("Returned status of dbg manager is %ld\n", status); + pr_debug("Returned status of dbg manager is %ld\n", status); - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); -out: kfree(args_buff); return status; @@ -1024,8 +990,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, return 0; } -static int -kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) +static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, + void *data) { struct kfd_ioctl_create_event_args *args = data; struct kfd_dev *kfd; @@ -1079,35 +1045,34 @@ kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, void *data) out_upwrite: mutex_unlock(&p->mutex); return err; - } -static int -kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, void *data) +static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, + void *data) { struct kfd_ioctl_destroy_event_args *args = data; return kfd_event_destroy(p, args->event_id); } -static int -kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, void *data) +static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, + void *data) { struct kfd_ioctl_set_event_args *args = data; return kfd_set_event(p, args->event_id); } -static int -kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, void *data) +static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, + void *data) { struct kfd_ioctl_reset_event_args *args = data; return kfd_reset_event(p, args->event_id); } -static int -kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, void *data) +static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, + void *data) { struct kfd_ioctl_wait_events_args *args = data; enum kfd_event_wait_result wait_result; @@ -2117,8 +2082,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) } else goto err_i1; - dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx\n", - nr, arg); + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); if (IS_ERR(process)) { @@ -2173,8 +2137,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) kfree(kdata); if (retcode) - dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, failed %d\n", - nr, arg, retcode); + dev_dbg(kfd_device, "ret = %d\n", retcode); return retcode; } @@ -2217,5 +2180,3 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) return -EFAULT; } - - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 4c267a9..af6d736 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -50,45 +50,42 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, uint32_t *packet_buff, size_t size_in_bytes, bool sync) { - int status = 0; - unsigned int *ib_packet_buff = NULL; struct pm4_mec_release_mem *rm_packet; struct pm4__indirect_buffer_pasid *ib_packet; - struct kernel_queue *kq = dbgdev->kq; - size_t pq_packets_size_in_bytes = - sizeof(struct pm4__indirect_buffer_pasid); struct kfd_mem_obj *mem_obj; - - uint64_t *rm_state = NULL; - + size_t pq_packets_size_in_bytes; union ULARGE_INTEGER *largep; union ULARGE_INTEGER addr; + struct kernel_queue *kq; + uint64_t *rm_state; + unsigned int *ib_packet_buff; + int status; - if (!kq || !packet_buff || (size_in_bytes == 0)) { - pr_err("Illegal packet parameters\n"); + if (WARN_ON(!size_in_bytes)) return -EINVAL; - } - /* todo - enter proper locking to be multithreaded safe */ - /* We acquire a buffer from DIQ - * The receive packet buff will be sitting on the Indirect - * Buffer and in the PQ we put the IB packet + sync packet(s). - */ + kq = dbgdev->kq; + + pq_packets_size_in_bytes = sizeof(struct pm4__indirect_buffer_pasid); if (sync) - pq_packets_size_in_bytes += - sizeof(struct pm4_mec_release_mem); + pq_packets_size_in_bytes += sizeof(struct pm4_mec_release_mem); + + /* + * We acquire a buffer from DIQ + * The receive packet buff will be sitting on the Indirect Buffer + * and in the PQ we put the IB packet + sync packet(s). + */ status = kq->ops.acquire_packet_buffer(kq, - pq_packets_size_in_bytes / sizeof(uint32_t), - &ib_packet_buff); - if (status != 0) { + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); + if (status) { pr_err("acquire_packet_buffer failed\n"); return status; } memset(ib_packet_buff, 0, pq_packets_size_in_bytes); - ib_packet = - (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); + ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); ib_packet->header.count = 3; ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; @@ -119,12 +116,12 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, */ rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff + (sizeof(struct pm4__indirect_buffer_pasid) / - sizeof(unsigned int))); + sizeof(unsigned int))); status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), &mem_obj); - if (status != 0) { + if (status) { pr_err("Failed to allocate GART memory\n"); kq->ops.rollback_packet(kq); return status; @@ -137,11 +134,12 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->header.opcode = IT_RELEASE_MEM; rm_packet->header.type = PM4_TYPE_3; rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / - sizeof(unsigned int) - 2; + sizeof(unsigned int) - 2; rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; rm_packet->bitfields2.event_index = - event_index___release_mem__end_of_pipe; + event_index___release_mem__end_of_pipe; + rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; rm_packet->bitfields2.atc = 0; rm_packet->bitfields2.tc_wb_action_ena = 1; @@ -152,31 +150,33 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->address_hi = addr.u.high_part; rm_packet->bitfields3.data_sel = - data_sel___release_mem__send_64_bit_data; + data_sel___release_mem__send_64_bit_data; + rm_packet->bitfields3.int_sel = - int_sel___release_mem__send_data_after_write_confirm; + int_sel___release_mem__send_data_after_write_confirm; + rm_packet->bitfields3.dst_sel = - dst_sel___release_mem__memory_controller; + dst_sel___release_mem__memory_controller; rm_packet->data_lo = QUEUESTATE__ACTIVE; kq->ops.submit_packet(kq); /* Wait till CP writes sync code: */ + status = amdkfd_fence_wait_timeout( + (unsigned int *) rm_state, + QUEUESTATE__ACTIVE, 1500); - status = amdkfd_fence_wait_timeout((unsigned int *) rm_state, - QUEUESTATE__ACTIVE, 1500); - - if (rm_state) - kfd_gtt_sa_free(dbgdev->dev, mem_obj); + kfd_gtt_sa_free(dbgdev->dev, mem_obj); return status; } static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) { - /* no action is needed in this case, just make sure diq will not - * be used + /* + * no action is needed in this case, + * just make sure diq will not be used */ dbgdev->kq = NULL; @@ -186,25 +186,17 @@ static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) { - - int status = 0; - struct kernel_queue *kq = NULL; struct queue_properties properties; unsigned int qid; - struct process_queue_manager *pqm = dbgdev->pqm; - - if (!pqm) { - pr_err("No PQM\n"); - return -EFAULT; - } + struct kernel_queue *kq = NULL; + int status; properties.type = KFD_QUEUE_TYPE_DIQ; - status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - &properties, &qid); + &properties, &qid); - if (status != 0) { - pr_err("Create Queue failed\n"); + if (status) { + pr_err("Failed to create DIQ\n"); return status; } @@ -213,10 +205,11 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) kq = pqm_get_kernel_queue(dbgdev->pqm, qid); if (!kq) { - pr_err("Error getting Kernel Queue\n"); + pr_err("Error getting DIQ\n"); pqm_destroy_queue(dbgdev->pqm, qid); - return -ENOMEM; + return -EFAULT; } + dbgdev->kq = kq; return status; @@ -225,24 +218,19 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) { /* disable watch address */ - dbgdev_address_watch_disable_nodiq(dbgdev->dev); return 0; } static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) { - /* todo - if needed, kill wavefronts and disable watch */ - int status = 0; + /* todo - disable address watch */ + int status; + + status = pqm_destroy_queue(dbgdev->pqm, + dbgdev->kq->queue->properties.queue_id); + dbgdev->kq = NULL; - if (!dbgdev || !dbgdev->pqm || !dbgdev->kq) { - pr_err("Can't destroy diq\n"); - status = -EFAULT; - } else { - pqm_destroy_queue(dbgdev->pqm, - dbgdev->kq->queue->properties.queue_id); - dbgdev->kq = NULL; - } return status; } @@ -277,7 +265,7 @@ static void dbgdev_address_watch_set_registers( cntl->bitfields.mode = adw_info->watch_mode[index]; cntl->bitfields.vmid = (uint32_t) vmid; - /* for APU assume it is an ATC address. */ + /* for APU assume it is an ATC address */ if (is_apu) cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); @@ -285,29 +273,22 @@ static void dbgdev_address_watch_set_registers( addrHi->bitfields.addr); pr_debug("\t\t%20s %08x\n", "set reg add low :", addrLo->bitfields.addr); - } static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info) { - union TCP_WATCH_ADDR_H_BITS addrHi; union TCP_WATCH_ADDR_L_BITS addrLo; union TCP_WATCH_CNTL_BITS cntl; - - unsigned int vmid; - unsigned int i; - struct kfd_process_device *pdd; + unsigned int i; - /* taking the vmid for that process on the safe way - * using pdd - */ + /* taking the vmid for that process on the safe way using pdd */ pdd = kfd_get_process_device_data(dbgdev->dev, adw_info->process); if (!pdd) { - pr_err("No PDD available\n"); + pr_err("Failed to get pdd for wave control no DIQ\n"); return -EFAULT; } @@ -315,27 +296,25 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, addrLo.u32All = 0; cntl.u32All = 0; - vmid = pdd->qpd.vmid; - if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0) || !adw_info->watch_mode) + (adw_info->num_watch_points == 0)) { + pr_err("num_watch_points is invalid\n"); return -EINVAL; + } - for (i = 0; i < adw_info->num_watch_points; i++) { + if (!adw_info->watch_mode || !adw_info->watch_address) { + pr_err("adw_info fields are not valid\n"); + return -EINVAL; + } - dbgdev_address_watch_set_registers( - adw_info, - &addrHi, - &addrLo, - &cntl, - i, - vmid, - dbgdev->dev->device_info->is_need_iommu_device - ); + for (i = 0; i < adw_info->num_watch_points; i++) { + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, + &cntl, i, pdd->qpd.vmid, + dbgdev->dev->device_info->is_need_iommu_device); pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); pr_debug("\t\t%20s %08x\n", "register index :", i); - pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); + pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr); pr_debug("\t\t%20s %08x\n", "Address high is :", @@ -366,39 +345,41 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info) { - - int status = 0; - unsigned int i = 0; + struct pm4__set_config_reg *packets_vec; union TCP_WATCH_ADDR_H_BITS addrHi; union TCP_WATCH_ADDR_L_BITS addrLo; union TCP_WATCH_CNTL_BITS cntl; - + unsigned int aw_reg_add_dword; + uint32_t *packet_buff_uint; + uint64_t packet_buff_gpu_addr; + unsigned int i; + int status; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; /* we do not control the vmid in DIQ mode, just a place holder */ unsigned int vmid = 0; - uint32_t *packet_buff_uint = NULL; - uint64_t packet_buff_gpu_addr = 0; - - struct pm4__set_config_reg *packets_vec = NULL; - - size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; - - unsigned int aw_reg_add_dword; - addrHi.u32All = 0; addrLo.u32All = 0; cntl.u32All = 0; if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || - (adw_info->num_watch_points == 0) || - !adw_info->watch_mode || !adw_info->watch_address) + (adw_info->num_watch_points == 0)) { + pr_err("num_watch_points is invalid\n"); return -EINVAL; + } + + if (!adw_info->watch_mode || !adw_info->watch_address) { + pr_err("adw_info fields are not valid\n"); + return -EINVAL; + } status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ib_size/sizeof(uint32_t), &packet_buff_uint, &packet_buff_gpu_addr); - if (status != 0) + if (status) { + pr_err("Failed to allocate IB from DIQ ring\n"); return status; + } memset(packet_buff_uint, 0, ib_size); packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); @@ -406,29 +387,20 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, packets_vec[0].header.count = 1; packets_vec[0].header.opcode = IT_SET_CONFIG_REG; packets_vec[0].header.type = PM4_TYPE_3; - packets_vec[0].bitfields2.vmid_shift = - ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; packets_vec[0].bitfields2.insert_vmid = 1; packets_vec[1].ordinal1 = packets_vec[0].ordinal1; packets_vec[1].bitfields2.insert_vmid = 0; packets_vec[2].ordinal1 = packets_vec[0].ordinal1; packets_vec[2].bitfields2.insert_vmid = 0; packets_vec[3].ordinal1 = packets_vec[0].ordinal1; - packets_vec[3].bitfields2.vmid_shift = - ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; packets_vec[3].bitfields2.insert_vmid = 1; for (i = 0; i < adw_info->num_watch_points; i++) { - - dbgdev_address_watch_set_registers( - adw_info, - &addrHi, - &addrLo, - &cntl, - i, - vmid, - dbgdev->dev->device_info->is_need_iommu_device - ); + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, + &cntl, i, vmid, + dbgdev->dev->device_info->is_need_iommu_device); pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); pr_debug("\t\t%20s %08x\n", "register index :", i); @@ -452,38 +424,34 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( + dbgdev->dev->kfd2kgd->address_watch_get_offset( dbgdev->dev->kgd, i, ADDRESS_WATCH_REG_CNTL); packets_vec[0].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; + packets_vec[0].reg_data[0] = cntl.u32All; aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( + dbgdev->dev->kfd2kgd->address_watch_get_offset( dbgdev->dev->kgd, i, ADDRESS_WATCH_REG_ADDR_HI); - packets_vec[1].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[1].reg_data[0] = addrHi.u32All; aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( + dbgdev->dev->kfd2kgd->address_watch_get_offset( dbgdev->dev->kgd, i, ADDRESS_WATCH_REG_ADDR_LO); - packets_vec[2].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[2].reg_data[0] = addrLo.u32All; /* enable watch flag if address is not zero*/ @@ -493,15 +461,13 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, cntl.bitfields.valid = 0; aw_reg_add_dword = - dbgdev->dev->kfd2kgd - ->address_watch_get_offset( + dbgdev->dev->kfd2kgd->address_watch_get_offset( dbgdev->dev->kgd, i, ADDRESS_WATCH_REG_CNTL); - packets_vec[3].bitfields2.reg_offset = - aw_reg_add_dword - CONFIG_REG_BASE; + aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[3].reg_data[0] = cntl.u32All; status = dbgdev_diq_submit_ib( @@ -511,15 +477,13 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size, true); - if (status != 0) { - pr_err("Failed to submit DIQ packet\n"); + if (status) { + pr_err("Failed to submit IB to DIQ\n"); return status; } - } return status; - } static int dbgdev_wave_control_set_registers( @@ -538,9 +502,11 @@ static int dbgdev_wave_control_set_registers( pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; switch (wac_info->mode) { - /* Send command to single wave */ + /* Send command to single wave */ case HSA_DBG_WAVEMODE_SINGLE: - /* limit access to the process waves only,by setting vmid check + /* + * Limit access to the process waves only, + * by setting vmid check */ reg_sq_cmd.bits.check_vmid = 1; reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; @@ -553,18 +519,18 @@ static int dbgdev_wave_control_set_registers( break; - /* Send command to all waves with matching VMID */ + /* Send command to all waves with matching VMID */ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: - reg_gfx_index.bits.sh_broadcast_writes = 1; reg_gfx_index.bits.se_broadcast_writes = 1; reg_gfx_index.bits.instance_broadcast_writes = 1; reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + break; - /* Send command to all CU waves with matching VMID */ + /* Send command to all CU waves with matching VMID */ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: reg_sq_cmd.bits.check_vmid = 1; @@ -626,38 +592,37 @@ static int dbgdev_wave_control_set_registers( } if (status == 0) { - *in_reg_sq_cmd = reg_sq_cmd; + *in_reg_sq_cmd = reg_sq_cmd; *in_reg_gfx_index = reg_gfx_index; } - return status; + return status; } static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, struct dbg_wave_control_info *wac_info) { - int status = 0; + int status; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; - uint32_t *packet_buff_uint = NULL; - uint64_t packet_buff_gpu_addr = 0; - struct pm4__set_config_reg *packets_vec = NULL; + uint32_t *packet_buff_uint; + uint64_t packet_buff_gpu_addr; + struct pm4__set_config_reg *packets_vec; size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; reg_sq_cmd.u32All = 0; - status = dbgdev_wave_control_set_registers(wac_info, - ®_sq_cmd, - ®_gfx_index, - dbgdev->dev->device_info->asic_family); + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index, dbgdev->dev->device_info->asic_family); + if (status) { + pr_err("Failed to set wave control registers\n"); + return status; + } - /* we do not control the VMID in DIQ, so reset it to a - * known value - */ + /* we do not control the VMID in DIQ, so reset it to a known value */ reg_sq_cmd.bits.vm_id = 0; - if (status != 0) - return status; + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); pr_debug("\t\t mode is: %u\n", wac_info->mode); @@ -681,8 +646,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, reg_gfx_index.bitfields.instance_index); pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", - reg_gfx_index.bitfields.se_index); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes); @@ -692,10 +656,10 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq, ib_size / sizeof(uint32_t), &packet_buff_uint, &packet_buff_gpu_addr); - - if (status != 0) + if (status) { + pr_err("Failed to allocate IB from DIQ ring\n"); return status; - + } memset(packet_buff_uint, 0, ib_size); packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; @@ -703,15 +667,18 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; packets_vec[0].header.type = PM4_TYPE_3; packets_vec[0].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + packets_vec[0].bitfields2.insert_vmid = 0; packets_vec[0].reg_data[0] = reg_gfx_index.u32All; packets_vec[1].header.count = 1; packets_vec[1].header.opcode = IT_SET_CONFIG_REG; packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = - SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE; + packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - + AMD_CONFIG_REG_BASE; + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; packets_vec[1].bitfields2.insert_vmid = 1; packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; @@ -726,7 +693,9 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[2].ordinal1 = packets_vec[0].ordinal1; packets_vec[2].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + packets_vec[2].bitfields2.insert_vmid = 0; packets_vec[2].reg_data[0] = reg_gfx_index.u32All; @@ -737,8 +706,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size, false); - if (status != 0) - pr_debug("%s\n", " Critical Error ! Submit diq packet failed "); + if (status) + pr_err("Failed to submit IB to DIQ\n"); return status; } @@ -746,85 +715,64 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, struct dbg_wave_control_info *wac_info) { - int status = 0; - unsigned int vmid = 0xffff; + int status; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; - - struct kfd_process_device *pdd = NULL; + struct kfd_process_device *pdd; reg_sq_cmd.u32All = 0; - status = 0; /* taking the VMID for that process on the safe way using PDD */ pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); - if (pdd) { - status = dbgdev_wave_control_set_registers(wac_info, - ®_sq_cmd, - ®_gfx_index, - dbgdev->dev->device_info->asic_family); - if (status == 0) { - - /* for non DIQ we need to patch the VMID: */ - - vmid = pdd->qpd.vmid; - reg_sq_cmd.bits.vm_id = vmid; - - pr_debug("\t\t %30s\n", - "* * * * * * * * * * * * * * * * * *"); - - pr_debug("\t\t mode is: %u\n", wac_info->mode); - pr_debug("\t\t operand is: %u\n", wac_info->operand); - pr_debug("\t\t trap id is: %u\n", wac_info->trapId); - pr_debug("\t\t msg value is: %u\n", - wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); - pr_debug("\t\t vmid is: %u\n", vmid); - - pr_debug("\t\t chk_vmid is : %u\n", - reg_sq_cmd.bitfields.check_vmid); - pr_debug("\t\t command is : %u\n", - reg_sq_cmd.bitfields.cmd); - pr_debug("\t\t queue id is : %u\n", - reg_sq_cmd.bitfields.queue_id); - pr_debug("\t\t simd id is : %u\n", - reg_sq_cmd.bitfields.simd_id); - pr_debug("\t\t mode is : %u\n", - reg_sq_cmd.bitfields.mode); - pr_debug("\t\t vm_id is : %u\n", - reg_sq_cmd.bitfields.vm_id); - pr_debug("\t\t wave_id is : %u\n", - reg_sq_cmd.bitfields.wave_id); - - pr_debug("\t\t ibw is : %u\n", - reg_gfx_index.bitfields.instance_broadcast_writes); - pr_debug("\t\t ii is : %u\n", - reg_gfx_index.bitfields.instance_index); - pr_debug("\t\t sebw is : %u\n", - reg_gfx_index.bitfields.se_broadcast_writes); - pr_debug("\t\t se_ind is : %u\n", - reg_gfx_index.bitfields.se_index); - pr_debug("\t\t sh_ind is : %u\n", - reg_gfx_index.bitfields.sh_index); - pr_debug("\t\t sbw is : %u\n", - reg_gfx_index.bitfields.sh_broadcast_writes); - - pr_debug("\t\t %30s\n", - "* * * * * * * * * * * * * * * * * *"); - - dbgdev->dev->kfd2kgd - ->wave_control_execute(dbgdev->dev->kgd, - reg_gfx_index.u32All, - reg_sq_cmd.u32All); - } else { - status = -EINVAL; - } - } else { - status = -EFAULT; + if (!pdd) { + pr_err("Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index, dbgdev->dev->device_info->asic_family); + if (status) { + pr_err("Failed to set wave control registers\n"); + return status; } - return status; + /* for non DIQ we need to patch the VMID: */ + reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); } int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) @@ -895,6 +843,7 @@ void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, pdbgdev->kq = NULL; pdbgdev->type = type; pdbgdev->pqm = NULL; + switch (type) { case DBGDEV_TYPE_NODIQ: pdbgdev->dbgdev_register = dbgdev_register_nodiq; @@ -904,12 +853,10 @@ void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, break; case DBGDEV_TYPE_DIQ: default: - pdbgdev->dbgdev_register = dbgdev_register_diq; pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; - break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h index 75883e0..583aaa9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -23,10 +23,6 @@ #ifndef KFD_DBGDEV_H_ #define KFD_DBGDEV_H_ -/* - * SQ_IND_CMD_CMD enum - */ - enum { SQ_CMD_VMID_OFFSET = 28, ADDRESS_WATCH_CNTL_OFFSET = 24 @@ -52,9 +48,9 @@ enum { /* CONFIG reg space definition */ enum { - CONFIG_REG_BASE = 0x2000, /* in dwords */ - CONFIG_REG_END = 0x2B00, - CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE + AMD_CONFIG_REG_BASE = 0x2000, /* in dwords */ + AMD_CONFIG_REG_END = 0x2B00, + AMD_CONFIG_REG_SIZE = AMD_CONFIG_REG_END - AMD_CONFIG_REG_BASE }; /* SH reg space definition */ @@ -88,7 +84,7 @@ enum SQ_IND_CMD_CMD { SQ_IND_CMD_CMD_RESUME = 0x00000002, SQ_IND_CMD_CMD_KILL = 0x00000003, SQ_IND_CMD_CMD_DEBUG = 0x00000004, - SQ_IND_CMD_CMD_TRAP = 0x00000005 + SQ_IND_CMD_CMD_TRAP = 0x00000005, }; enum SQ_IND_CMD_MODE { @@ -128,30 +124,30 @@ union SQ_IND_CMD_BITS { union SQ_CMD_BITS { struct { uint32_t cmd:3; - uint32_t:1; + uint32_t:1; uint32_t mode:3; uint32_t check_vmid:1; uint32_t trap_id:3; - uint32_t:5; + uint32_t:5; uint32_t wave_id:4; uint32_t simd_id:2; - uint32_t:2; + uint32_t:2; uint32_t queue_id:3; - uint32_t:1; + uint32_t:1; uint32_t vm_id:4; } bitfields, bits; struct { uint32_t cmd:3; - uint32_t:1; + uint32_t:1; uint32_t mode:3; uint32_t check_vmid:1; uint32_t data:3; - uint32_t:5; + uint32_t:5; uint32_t wave_id:4; uint32_t simd_id:2; - uint32_t:2; + uint32_t:2; uint32_t queue_id:3; - uint32_t:1; + uint32_t:1; uint32_t vm_id:4; } bitfields_sethalt, bits_sethalt; uint32_t u32All; @@ -205,7 +201,7 @@ union TCP_WATCH_ADDR_L_BITS { }; enum { - QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ + QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ QUEUESTATE__ACTIVE_COMPLETION_PENDING, QUEUESTATE__ACTIVE }; @@ -226,4 +222,4 @@ union ULARGE_INTEGER { void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, enum DBGDEV_TYPE type); -#endif /* KFD_DBGDEV_H_ */ +#endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index d1157ff..9d4af96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -37,14 +37,16 @@ static DEFINE_MUTEX(kfd_dbgmgr_mutex); -struct mutex *get_dbgmgr_mutex(void) +struct mutex *kfd_get_dbgmgr_mutex(void) { return &kfd_dbgmgr_mutex; } + static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) { kfree(pmgr->dbgdev); + pmgr->dbgdev = NULL; pmgr->pasid = 0; pmgr->dev = NULL; @@ -55,7 +57,6 @@ void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) if (pmgr) { kfd_dbgmgr_uninitialize(pmgr); kfree(pmgr); - pmgr = NULL; } } @@ -64,6 +65,9 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; struct kfd_dbgmgr *new_buff; + if (WARN_ON(!pdev->init_complete)) + return false; + new_buff = kfd_alloc_struct(new_buff); if (!new_buff) { pr_err("Failed to allocate dbgmgr instance\n"); @@ -74,7 +78,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) new_buff->dev = pdev; new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); if (!new_buff->dbgdev) { - pr_err("Failed to allocate dbgdev\n"); + pr_err("Failed to allocate dbgdev instance\n"); kfree(new_buff); return false; } @@ -91,138 +95,64 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - if (!pmgr || !pmgr->dev || !pmgr->dbgdev) - return -EINVAL; - if (pmgr->pasid != 0) { - /* HW debugger is already active. */ + pr_debug("H/W debugger is already active using pasid %d\n", + pmgr->pasid); return -EBUSY; } /* remember pasid */ - pmgr->pasid = p->pasid; /* provide the pqm for diq generation */ - pmgr->dbgdev->pqm = &p->pqm; /* activate the actual registering */ - /* todo: you should lock with the process mutex here */ pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); - /* todo: you should unlock with the process mutex here */ return 0; } long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - - if (!pmgr || !pmgr->dev || !pmgr->dbgdev || !p) - return -EINVAL; - + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != p->pasid) { - /* Is the requests coming from the already registered - * process? - */ + pr_debug("H/W debugger is not registered by calling pasid %d\n", + p->pasid); return -EINVAL; } - /* todo: you should lock with the process mutex here */ - pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); - /* todo: you should unlock with the process mutex here */ - pmgr->pasid = 0; return 0; } long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info) + struct dbg_wave_control_info *wac_info) { - if (!pmgr || !pmgr->dev || !pmgr->dbgdev || !wac_info || - !wac_info->process) - return -EINVAL; - - /* Is the requests coming from the already registered - * process? - */ + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != wac_info->process->pasid) { - /* HW debugger support was not registered for - * requester process - */ + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + wac_info->process->pasid); return -EINVAL; } - return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, - wac_info); + return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); } long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info) + struct dbg_address_watch_info *adw_info) { - if (!pmgr || !pmgr->dev || !pmgr->dbgdev || !adw_info || - !adw_info->process) - return -EINVAL; - - /* Is the requests coming from the already registered - * process? - */ + /* Is the requests coming from the already registered process? */ if (pmgr->pasid != adw_info->process->pasid) { - /* HW debugger support was not registered for - * requester process - */ + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + adw_info->process->pasid); return -EINVAL; } return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, - adw_info); + adw_info); } - -/* - * Handle abnormal process termination - * if we are in the midst of a debug session, we should kill all pending waves - * of the debugged process and unregister the process from the Debugger. - */ -long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, - struct kfd_process *process) -{ - long status = 0; - struct dbg_wave_control_info wac_info; - - if (!pmgr || !pmgr->dev || !pmgr->dbgdev) - return -EINVAL; - - /* first, we kill all the wavefronts of this process */ - wac_info.process = process; - wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; - wac_info.operand = HSA_DBG_WAVEOP_KILL; - - /* not used for KILL */ - wac_info.trapId = 0x0; - wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; - wac_info.dbgWave_msg.MemoryVA = NULL; - - status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, - &wac_info); - - if (status != 0) { - pr_err("wave control failed, status is: %ld\n", status); - return status; - } - if (pmgr->pasid == wac_info.process->pasid) { - /* if terminated process was registered for debug, - * then unregister it - */ - status = kfd_dbgmgr_unregister(pmgr, process); - pmgr->pasid = 0; - } - if (status != 0) - pr_err("unregister failed, status is: %ld debugger can not be reused\n", - status); - - return status; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h index b9a769a..a04a1fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -26,7 +26,7 @@ #include "kfd_priv.h" -/* must align with hsakmttypes definition. */ +/* must align with hsakmttypes definition */ #pragma pack(push, 4) enum HSA_DBG_WAVEOP { @@ -42,13 +42,14 @@ enum HSA_DBG_WAVEOP { enum HSA_DBG_WAVEMODE { /* send command to a single wave */ HSA_DBG_WAVEMODE_SINGLE = 0, - /* Broadcast to all wavefronts of all processes is not supported for - * HSA user mode + /* + * Broadcast to all wavefronts of all processes is not + * supported for HSA user mode */ /* send to waves within current process */ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, - /* send to waves within current process on CU */ + /* send to waves within current process on CU */ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, HSA_DBG_NUM_WAVEMODE = 3, HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF @@ -74,7 +75,7 @@ enum HSA_DBG_WATCH_MODE { /* This structure is hardware specific and may change in the future */ struct HsaDbgWaveMsgAMDGen2 { union { - struct { + struct ui32 { uint32_t UserData:8; /* user data */ uint32_t ShaderArray:1; /* Shader array */ uint32_t Priv:1; /* Privileged */ @@ -88,9 +89,7 @@ struct HsaDbgWaveMsgAMDGen2 { } ui32; uint32_t Value; }; - uint32_t Reserved2; - }; union HsaDbgWaveMessageAMD { @@ -103,7 +102,8 @@ struct HsaDbgWaveMessage { union HsaDbgWaveMessageAMD DbgWaveMsg; }; -/* TODO: This definitions to be MOVED to kfd_event, once it is implemented. +/* + * TODO: This definitions to be MOVED to kfd_event, once it is implemented. * * HSA sync primitive, Event and HW Exception notification API definitions. * The API functions allow the runtime to define a so-called sync-primitive, @@ -122,33 +122,34 @@ struct HsaDbgWaveMessage { /* these are the new definitions for events */ enum HSA_EVENTTYPE { - HSA_EVENTTYPE_SIGNAL = 0, /* User-mode generated GPU signal */ - HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ + HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change - * ( start/stop ) + * (start/stop) */ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state - *(EOP pm4) + * (EOP pm4) */ - /* ... */ + /* ... */ HSA_EVENTTYPE_MAXID, HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF }; -/* Subdefinitions for various event types: Syncvar */ +/* Sub-definitions for various event types: Syncvar */ struct HsaSyncVar { - union { - void *UserData; /* pointer to user mode data */ + union SyncVar { + void *UserData; /* pointer to user mode data */ uint64_t UserDataPtrValue; /* 64bit compatibility of value */ } SyncVar; uint64_t SyncVarSize; }; -/* Subdefinitions for various event types: NodeChange */ +/* Sub-definitions for various event types: NodeChange */ + enum HSA_EVENTTYPE_NODECHANGE_FLAGS { HSA_EVENTTYPE_NODECHANGE_ADD = 0, HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, @@ -161,7 +162,6 @@ struct HsaNodeChange { }; /* Sub-definitions for various event types: DeviceStateChange */ - enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { /* device started (and available) */ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, @@ -183,10 +183,11 @@ struct HsaDeviceStateChange { }; struct HsaEventData { - enum HSA_EVENTTYPE EventType; /* event type */ - union { - /* return data associated with HSA_EVENTTYPE_SIGNAL and other - * events + enum HSA_EVENTTYPE EventType; /* event type */ + union EventData { + /* + * return data associated with HSA_EVENTTYPE_SIGNAL + * and other events */ struct HsaSyncVar SyncVar; @@ -223,7 +224,6 @@ struct HsaEvent { struct HsaEventData EventData; }; - #pragma pack(pop) enum DBGDEV_TYPE { @@ -268,9 +268,9 @@ struct kfd_dbgdev { int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info); + struct dbg_address_watch_info *adw_info); int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, - struct dbg_wave_control_info *wac_info); + struct dbg_wave_control_info *wac_info); }; @@ -281,15 +281,13 @@ struct kfd_dbgmgr { }; /* prototypes for debug manager functions */ -struct mutex *get_dbgmgr_mutex(void); +struct mutex *kfd_get_dbgmgr_mutex(void); void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, - struct dbg_wave_control_info *wac_info); + struct dbg_wave_control_info *wac_info); long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, - struct dbg_address_watch_info *adw_info); -long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, - struct kfd_process *process); + struct dbg_address_watch_info *adw_info); #endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index d8b6489..5e6c989 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -210,7 +210,6 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_pci_atomics = false, }; - static const struct kfd_device_info raven_device_info = { .asic_family = CHIP_RAVEN, .max_pasid_bits = 16, @@ -230,19 +229,6 @@ struct kfd_deviceid { const struct kfd_device_info *device_info; }; -/* - * // -// TONGA/AMETHYST device IDs (performance segment) -// -#define DEVICE_ID_VI_TONGA_P_6920 0x6920 // unfused -#define DEVICE_ID_VI_TONGA_P_6921 0x6921 // Amethyst XT -#define DEVICE_ID_VI_TONGA_P_6928 0x6928 // Tonga GL XT -#define DEVICE_ID_VI_TONGA_P_692B 0x692B // Tonga GL PRO -#define DEVICE_ID_VI_TONGA_P_692F 0x692F // Tonga GL PRO VF -#define DEVICE_ID_VI_TONGA_P_6938 0x6938 // Tonga XT -#define DEVICE_ID_VI_TONGA_P_6939 0x6939 // Tonga PRO - * - */ /* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) @@ -349,8 +335,7 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { if (supported_devices[i].did == did) { - WARN(!supported_devices[i].device_info, - "Cannot look up device info, Device Info is NULL"); + WARN_ON(!supported_devices[i].device_info); return supported_devices[i].device_info; } } @@ -469,10 +454,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, flags); dev = kfd_device_by_pci_dev(pdev); - if (WARN_ON(!dev)) - return -ENODEV; - - kfd_signal_iommu_event(dev, pasid, address, + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); return AMD_IOMMU_INV_PRI_RSP_INVALID; @@ -617,7 +600,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - if (kfd_topology_add_device(kfd) != 0) { + if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device %x:%x to topology\n", kfd->pdev->vendor, kfd->pdev->device); @@ -738,7 +721,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd) return 0; return kfd_resume(kfd); - } static int kfd_resume(struct kfd_dev *kfd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8dbbbeb..020aa9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -90,13 +90,11 @@ unsigned int get_queues_num(struct device_queue_manager *dqm) unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) { - BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_queue_per_pipe; } unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) { - BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_pipe_per_mec; } @@ -248,7 +246,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (list_empty(&qpd->queues_list)) { retval = allocate_vmid(dqm, qpd, q); - if (retval != 0) + if (retval) goto out_unlock; } *allocated_vmid = qpd->vmid; @@ -270,7 +268,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) retval = create_sdma_queue_nocpsch(dqm, q, qpd); - if (retval != 0) { + if (retval) { if (list_empty(&qpd->queues_list)) { deallocate_vmid(dqm, qpd, q); *allocated_vmid = 0; @@ -350,7 +348,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, return -ENOMEM; retval = allocate_hqd(dqm, q); - if (retval != 0) + if (retval) return retval; retval = allocate_doorbell(qpd, q); @@ -359,7 +357,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) + if (retval) goto out_deallocate_doorbell; pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", @@ -373,7 +371,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, q->process->mm); - if (retval != 0) + if (retval) goto out_uninit_mqd; return 0; @@ -505,7 +503,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); - if (retval != 0) { + if (retval) { pr_err("unmap queue failed"); goto out_unlock; } @@ -513,7 +511,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); - if (retval != 0) { + if (retval) { pr_err("destroy mqd failed"); goto out_unlock; } @@ -527,6 +525,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) &q->properties, q->process->mm); } /* + * * check active state vs. the previous state * and modify counter accordingly */ @@ -862,7 +861,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, return -ENOMEM; retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval != 0) + if (retval) return retval; q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; @@ -879,11 +878,11 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) + if (retval) goto out_deallocate_doorbell; retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); - if (retval != 0) + if (retval) goto out_uninit_mqd; return 0; @@ -956,13 +955,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_runlist = false; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; retval = dqm->asic_ops.init_cpsch(dqm); - if (retval != 0) - goto fail_init_pipelines; - - return 0; + if (retval) + mutex_destroy(&dqm->lock); -fail_init_pipelines: - mutex_destroy(&dqm->lock); return retval; } @@ -973,11 +968,11 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; retval = pm_init(&dqm->packets, dqm, dqm->dev->mec_fw_version); - if (retval != 0) + if (retval) goto fail_packet_manager_init; retval = set_sched_resources(dqm); - if (retval != 0) + if (retval) goto fail_set_sched_resources; pr_debug("Allocating fence memory\n"); @@ -986,7 +981,7 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), &dqm->fence_mem); - if (retval != 0) + if (retval) goto fail_allocate_vidmem; dqm->fence_addr = dqm->fence_mem->cpu_ptr; @@ -1091,7 +1086,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval != 0) + if (retval) goto out_unlock; q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; @@ -1125,7 +1120,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.tma_addr = qpd->tma_addr; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) + if (retval) goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); @@ -1203,7 +1198,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) } retval = pm_send_runlist(&dqm->packets, &dqm->queues); - if (retval != 0) { + if (retval) { pr_err("failed to execute runlist"); return retval; } @@ -1234,7 +1229,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, reset, 0); - if (retval != 0) + if (retval) return retval; *dqm->fence_addr = KFD_FENCE_INIT; @@ -1243,7 +1238,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval != 0) { + if (retval) { pr_err("%s queues failed.", reset ? "Resetting" : "Unmapping"); return retval; } @@ -1267,7 +1262,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; retval = unmap_queues_cpsch(dqm, filter, 0, reset); - if (retval != 0) { + if (retval) { pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); return retval; } @@ -1632,7 +1627,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.get_wave_state = get_wave_state; break; default: - pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); + WARN(1, "Invalid scheduling policy %d", dqm->sched_policy); goto out_free; } @@ -1727,7 +1722,7 @@ int device_queue_manager_debugfs_hqds(struct seq_file *m, void *data) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { r = dqm->dev->kfd2kgd->hqd_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); - if (r != 0) + if (r) break; seq_printf(m, " CP Pipe %d, Queue %d\n", @@ -1742,7 +1737,7 @@ int device_queue_manager_debugfs_hqds(struct seq_file *m, void *data) for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { r = dqm->dev->kfd2kgd->hqd_sdma_dump( dqm->dev->kgd, pipe, queue, &dump, &n_regs); - if (r != 0) + if (r) break; seq_printf(m, " SDMA Engine %d, RLC %d\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 55a0fda..008d258 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -145,7 +145,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("Mapping doorbell page in kfd_doorbell_mmap\n" + pr_debug("Mapping doorbell page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 21712fa..c345c50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -34,10 +34,11 @@ #include "kfd_events.h" #include -/* A task can only be on a single wait_queue at a time, but we need to support +/* + * A task can only be on a single wait_queue at a time, but we need to support * waiting on multiple events (any/all). - * Instead of each event simply having a wait_queue with sleeping tasks, it has - * a singly-linked list of tasks. + * Instead of each event simply having a wait_queue with sleeping tasks, it + * has a singly-linked list of tasks. * A thread that wants to sleep creates an array of these, one for each event * and adds one to each event's waiter chain. */ @@ -56,11 +57,12 @@ struct kfd_event_waiter { #define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT #define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) -/* Over-complicated pooled allocator for event notification slots. +/* + * Over-complicated pooled allocator for event notification slots. * - * Each signal event needs a 64-bit signal slot where the signaler will write a - * 1 before sending an interrupt.l (This is needed because some interrupts do - * not contain enough spare data bits to identify an event.) + * Each signal event needs a 64-bit signal slot where the signaler will write + * a 1 before sending an interrupt.l (This is needed because some interrupts + * do not contain enough spare data bits to identify an event.) * We get whole pages from vmalloc and map them to the process VA. * Individual signal events are then allocated a slot in a page. */ @@ -94,10 +96,9 @@ static uint64_t *page_slots(struct signal_page *page) return page->kernel_address; } -static bool -allocate_free_slot(struct kfd_process *process, - struct signal_page **out_page, - unsigned int *out_slot_index) +static bool allocate_free_slot(struct kfd_process *process, + struct signal_page **out_page, + unsigned int *out_slot_index) { struct signal_page *page; @@ -151,7 +152,8 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) /* prevent user-mode info leaks */ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, - KFD_SIGNAL_EVENT_LIMIT * 8); + KFD_SIGNAL_EVENT_LIMIT * 8); + page->kernel_address = backing_store; /* Set bits of debug events to prevent allocation */ @@ -183,10 +185,10 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) return false; } -static bool -allocate_event_notification_slot(struct file *devkfd, struct kfd_process *p, - struct signal_page **page, - unsigned int *signal_slot_index) +static bool allocate_event_notification_slot(struct file *devkfd, + struct kfd_process *p, + struct signal_page **page, + unsigned int *signal_slot_index) { bool ret; @@ -200,9 +202,8 @@ allocate_event_notification_slot(struct file *devkfd, struct kfd_process *p, return ret; } -static bool -allocate_signal_page_dgpu(struct kfd_process *p, - uint64_t *kernel_address, uint64_t handle) +static bool allocate_signal_page_dgpu(struct kfd_process *p, + uint64_t *kernel_address, uint64_t handle) { struct signal_page *my_page; @@ -248,11 +249,10 @@ void kfd_free_signal_page_dgpu(struct kfd_process *p, uint64_t handle) } } -static bool -allocate_debug_event_notification_slot(struct file *devkfd, - struct kfd_process *p, - struct signal_page **out_page, - unsigned int *out_slot_index) +static bool allocate_debug_event_notification_slot(struct file *devkfd, + struct kfd_process *p, + struct signal_page **out_page, + unsigned int *out_slot_index) { struct signal_page *page; unsigned int slot; @@ -299,8 +299,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, { struct signal_page *page; - /* This is safe because we don't delete signal pages until the process - * exits. + /* + * This is safe because we don't delete signal pages until the + * process exits. */ list_for_each_entry(page, &p->signal_event_pages, event_pages) if (page->page_index == page_index) @@ -309,8 +310,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, return NULL; } -/* Assumes that p->event_mutex is held and of course that p is not going away - * (current or locked). +/* + * Assumes that p->event_mutex is held and of course that p is not going + * away (current or locked). */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { @@ -333,26 +335,28 @@ static u32 make_signal_event_id(struct signal_page *page, unsigned int signal_slot_index) { return page->page_index | - (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); + (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT); } -/* Produce a kfd event id for a nonsignal event. - * These are arbitrary numbers, so we do a sequential search through the hash - * table for an unused number. +/* + * Produce a kfd event id for a nonsignal event. + * These are arbitrary numbers, so we do a sequential search through + * the hash table for an unused number. */ static u32 make_nonsignal_event_id(struct kfd_process *p) { u32 id; for (id = p->next_nonsignal_event_id; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id); + id++) ; if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { - /* What if id == LAST_NONSIGNAL_EVENT_ID - 1? + /* + * What if id == LAST_NONSIGNAL_EVENT_ID - 1? * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so * the first loop fails immediately and we proceed with the * wraparound loop below. @@ -363,9 +367,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) } for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; - id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id); - id++) + id < KFD_LAST_NONSIGNAL_EVENT_ID && + lookup_event_by_id(p, id); + id++) ; @@ -373,20 +377,21 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) p->next_nonsignal_event_id = id + 1; return id; } + p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; return 0; } -static struct kfd_event * -lookup_event_by_page_slot(struct kfd_process *p, - struct signal_page *page, unsigned int signal_slot) +static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, + struct signal_page *page, + unsigned int signal_slot) { return lookup_event_by_id(p, make_signal_event_id(page, signal_slot)); } -static int -create_signal_event(struct file *devkfd, struct kfd_process *p, - struct kfd_event *ev) +static int create_signal_event(struct file *devkfd, + struct kfd_process *p, + struct kfd_event *ev) { if ((ev->type == KFD_EVENT_TYPE_SIGNAL) && (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT)) { @@ -430,10 +435,10 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, } ev->user_signal_address = - &ev->signal_page->user_address[ev->signal_slot_index]; + &ev->signal_page->user_address[ev->signal_slot_index]; - ev->event_id = - make_signal_event_id(ev->signal_page, ev->signal_slot_index); + ev->event_id = make_signal_event_id(ev->signal_page, + ev->signal_slot_index); pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, @@ -442,12 +447,12 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, return 0; } -/* No non-signal events are supported yet. - * We create them as events that never signal. Set event calls from user-mode - * are failed. +/* + * No non-signal events are supported yet. + * We create them as events that never signal. + * Set event calls from user-mode are failed. */ -static int -create_other_event(struct kfd_process *p, struct kfd_event *ev) +static int create_other_event(struct kfd_process *p, struct kfd_event *ev) { ev->event_id = make_nonsignal_event_id(p); if (ev->event_id == 0) @@ -481,8 +486,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) } } - /* Abandon the list of waiters. Individual waiting threads will clean - * up their own data. + /* + * Abandon the list of waiters. Individual waiting threads will + * clean up their own data. */ list_del(&ev->waiters); @@ -506,8 +512,9 @@ static void destroy_events(struct kfd_process *p) destroy_event(p, ev); } -/* We assume that the process is being destroyed and there is no need to unmap - * the pages or keep bookkeeping data in order. +/* + * We assume that the process is being destroyed and there is no need to + * unmap the pages or keep bookkeeping data in order. */ static void shutdown_signal_pages(struct kfd_process *p) { @@ -515,10 +522,9 @@ static void shutdown_signal_pages(struct kfd_process *p) list_for_each_entry_safe(page, tmp, &p->signal_event_pages, event_pages) { - if (page->user_address) { + if (page->user_address) free_pages((unsigned long)page->kernel_address, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); - } kfree(page); } } @@ -547,7 +553,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, void *kern_addr) { int ret = 0; - struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) @@ -689,7 +694,7 @@ static bool is_slot_signaled(struct signal_page *page, unsigned int index) } static void set_event_from_interrupt(struct kfd_process *p, - struct kfd_event *ev) + struct kfd_event *ev) { if (ev && event_can_be_gpu_signaled(ev)) { acknowledge_signal(p, ev); @@ -702,7 +707,8 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, { struct kfd_event *ev; - /* Because we are called from arbitrary context (workqueue) as opposed + /* + * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are * running so the lookup function increments the process ref count. */ @@ -720,9 +726,10 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, ev = lookup_event_by_id(p, partial_id); set_event_from_interrupt(p, ev); } else { - /* Partial ID is in fact partial. For now we completely ignore - * it, but we could use any bits we did receive to search - * faster. + /* + * Partial ID is in fact partial. For now we completely + * ignore it, but we could use any bits we did receive to + * search faster. */ struct signal_page *page; unsigned int i; @@ -746,14 +753,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) uint32_t i; event_waiters = kmalloc_array(num_events, - sizeof(struct kfd_event_waiter), GFP_KERNEL); + sizeof(struct kfd_event_waiter), + GFP_KERNEL); - if (event_waiters) { - for (i = 0; i < num_events; i++) { - INIT_LIST_HEAD(&event_waiters[i].waiters); - event_waiters[i].sleeping_task = current; - event_waiters[i].activated = false; - } + for (i = 0; (event_waiters) && (i < num_events) ; i++) { + INIT_LIST_HEAD(&event_waiters[i].waiters); + event_waiters[i].sleeping_task = current; + event_waiters[i].activated = false; } return event_waiters; @@ -789,7 +795,7 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) } static bool test_event_condition(bool all, uint32_t num_events, - struct kfd_event_waiter *event_waiters) + struct kfd_event_waiter *event_waiters) { uint32_t i; uint32_t activated_count = 0; @@ -814,15 +820,23 @@ static bool copy_signaled_event_data(uint32_t num_events, struct kfd_event_waiter *event_waiters, struct kfd_event_data __user *data) { + struct kfd_hsa_memory_exception_data *src; + struct kfd_hsa_memory_exception_data __user *dst; + struct kfd_event_waiter *waiter; + struct kfd_event *event; uint32_t i; - for (i = 0; i < num_events; i++) - if (event_waiters[i].activated && - event_waiters[i].event->type == KFD_EVENT_TYPE_MEMORY) - if (copy_to_user(&data[event_waiters[i].input_index].memory_exception_data, - &event_waiters[i].event->memory_exception_data, + for (i = 0; i < num_events; i++) { + waiter = &event_waiters[i]; + event = waiter->event; + if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { + dst = &data[waiter->input_index].memory_exception_data; + src = &event->memory_exception_data; + if (copy_to_user(dst, src, sizeof(struct kfd_hsa_memory_exception_data))) return false; + } + } return true; @@ -838,7 +852,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms) if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE) return MAX_SCHEDULE_TIMEOUT; - /* msecs_to_jiffies interprets all values above 2^31-1 as infinite, + /* + * msecs_to_jiffies interprets all values above 2^31-1 as infinite, * but we consider them finite. * This hack is wrong, but nobody is likely to notice. */ @@ -866,7 +881,6 @@ int kfd_wait_on_events(struct kfd_process *p, (struct kfd_event_data __user *) data; uint32_t i; int ret = 0; - struct kfd_event_waiter *event_waiters = NULL; long timeout = user_timeout_to_jiffies(user_timeout_ms); @@ -931,15 +945,14 @@ int kfd_wait_on_events(struct kfd_process *p, * This is wrong when a nonzero, non-infinite timeout * is specified. We need to use * ERESTARTSYS_RESTARTBLOCK, but struct restart_block - * contains a union with data for each user and it's in - * generic kernel code that I don't want to touch yet. + * contains a union with data for each user and it's + * in generic kernel code that I don't want to + * touch yet. */ ret = -ERESTARTSYS; break; } - set_current_state(TASK_INTERRUPTIBLE); - if (test_event_condition(all, num_events, event_waiters)) { if (copy_signaled_event_data(num_events, event_waiters, events)) @@ -954,7 +967,7 @@ int kfd_wait_on_events(struct kfd_process *p, break; } - timeout = schedule_timeout(timeout); + timeout = schedule_timeout_interruptible(timeout); } __set_current_state(TASK_RUNNING); @@ -1027,16 +1040,20 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) static void lookup_events_by_type_and_signal(struct kfd_process *p, int type, void *event_data) { + struct kfd_hsa_memory_exception_data *ev_data; struct kfd_event *ev; int bkt; bool send_signal = true; #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) struct hlist_node *node; + ev_data = (struct kfd_hsa_memory_exception_data *) event_data; - hash_for_each(p->events, bkt, node, ev, events) { + hash_for_each(p->events, bkt, node, ev, events) #else - hash_for_each(p->events, bkt, ev, events) { + ev_data = (struct kfd_hsa_memory_exception_data *) event_data; + + hash_for_each(p->events, bkt, ev, events) #endif if (ev->type == type) { send_signal = false; @@ -1044,11 +1061,9 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, "Event found: id %X type %d", ev->event_id, ev->type); set_event(ev); - if (ev->type == KFD_EVENT_TYPE_MEMORY && event_data) - ev->memory_exception_data = - *(struct kfd_hsa_memory_exception_data *)event_data; + if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) + ev->memory_exception_data = *ev_data; } - } if (type == KFD_EVENT_TYPE_MEMORY) { dev_warn(kfd_device, @@ -1108,24 +1123,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, memory_exception_data.gpu_id = dev->id; memory_exception_data.va = address; /* Set failure reason */ - memory_exception_data.failure.NotPresent = true; - memory_exception_data.failure.NoExecute = false; - memory_exception_data.failure.ReadOnly = false; + memory_exception_data.failure.NotPresent = 1; + memory_exception_data.failure.NoExecute = 0; + memory_exception_data.failure.ReadOnly = 0; if (vma) { if (vma->vm_start > address) { - memory_exception_data.failure.NotPresent = true; - memory_exception_data.failure.NoExecute = false; - memory_exception_data.failure.ReadOnly = false; + memory_exception_data.failure.NotPresent = 1; + memory_exception_data.failure.NoExecute = 0; + memory_exception_data.failure.ReadOnly = 0; } else { - memory_exception_data.failure.NotPresent = false; + memory_exception_data.failure.NotPresent = 0; if (is_write_requested && !(vma->vm_flags & VM_WRITE)) - memory_exception_data.failure.ReadOnly = true; + memory_exception_data.failure.ReadOnly = 1; else - memory_exception_data.failure.ReadOnly = false; + memory_exception_data.failure.ReadOnly = 0; if (is_execute_requested && !(vma->vm_flags & VM_EXEC)) - memory_exception_data.failure.NoExecute = true; + memory_exception_data.failure.NoExecute = 1; else - memory_exception_data.failure.NoExecute = false; + memory_exception_data.failure.NoExecute = 0; } } @@ -1185,12 +1200,12 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, if (info) { memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; memory_exception_data.failure.NotPresent = - info->prot_valid ? true : false; + info->prot_valid ? 1 : 0; memory_exception_data.failure.NoExecute = - info->prot_exec ? true : false; + info->prot_exec ? 1 : 0; memory_exception_data.failure.ReadOnly = - info->prot_write ? true : false; - memory_exception_data.failure.imprecise = false; + info->prot_write ? 1 : 0; + memory_exception_data.failure.imprecise = 0; } mutex_lock(&p->event_mutex); @@ -1208,4 +1223,3 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); kfd_unref_process(p); } - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index d7987eb..28f6838 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -34,7 +34,8 @@ #define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK #define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX -/* Written into kfd_signal_slot_t to indicate that the event is not signaled. +/* + * Written into kfd_signal_slot_t to indicate that the event is not signaled. * Since the event protocol may need to write the event ID into memory, this * must not be a valid event ID. * For the sake of easy memset-ing, this must be a byte pattern. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index fcab9c6..09595a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -33,11 +33,7 @@ #include #include "kfd_priv.h" #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -#include -#else -#include -#endif +#include #include /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 6dc7e36..903ef25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -220,8 +220,9 @@ static int acquire_packet_buffer(struct kernel_queue *kq, queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); - pr_debug("rptr: %d\n wptr: %d\n queue_address 0x%p\n", rptr, wptr, - queue_address); + pr_debug("rptr: %d\n", rptr); + pr_debug("wptr: %d\n", wptr); + pr_debug("queue_address 0x%p\n", queue_address); available_size = (rptr + queue_size_dwords - 1 - wptr) % queue_size_dwords; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index beb8732..367cbdf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -188,7 +188,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - WARN(1, "queue type %d\n", q->properties.type); + WARN(1, "queue type %d", q->properties.type); return -EINVAL; } packet->bitfields3.doorbell_offset = @@ -233,7 +233,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; default: - WARN(1, "queue type %d\n", type); + WARN(1, "queue type %d", type); return -EINVAL; } @@ -266,7 +266,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, queue_sel__mes_unmap_queues__unmap_all_non_static_queues; break; default: - WARN(1, "filter %d\n", filter); + WARN(1, "filter %d", filter); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index 5fbc5a0..b123c4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -193,7 +193,7 @@ int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - WARN(1, "queue type %d\n", q->properties.type); + WARN(1, "queue type %d", q->properties.type); return -EINVAL; } packet->bitfields3.doorbell_offset = @@ -266,7 +266,7 @@ int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; default: - WARN(1, "queue type %d\n", type); + WARN(1, "queue type %d", type); return -EINVAL; } @@ -299,7 +299,7 @@ int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, queue_sel__mes_unmap_queues__unmap_all_non_static_queues; break; default: - WARN(1, "filter %d\n", filter); + WARN(1, "filter %d", filter); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 24587d7..aba3e9d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -33,7 +33,6 @@ #define KFD_DRIVER_MAJOR 2 #define KFD_DRIVER_MINOR 0 #define KFD_DRIVER_PATCHLEVEL 0 -#define KFD_DRIVER_RC_LEVEL "" static const struct kgd2kfd_calls kgd2kfd = { .exit = kgd2kfd_exit, @@ -188,5 +187,4 @@ MODULE_DESCRIPTION(KFD_DRIVER_DESC); MODULE_LICENSE("GPL and additional rights"); MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "." __stringify(KFD_DRIVER_MINOR) "." - __stringify(KFD_DRIVER_PATCHLEVEL) - KFD_DRIVER_RC_LEVEL); + __stringify(KFD_DRIVER_PATCHLEVEL)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 71e7521..b96f684 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -30,7 +30,7 @@ #include "cik_regs.h" #include "cik_structs.h" #include "oss/oss_2_4_sh_mask.h" -#include "asic_reg/gca/gfx_7_2_sh_mask.h" +#include "gca/gfx_7_2_sh_mask.h" static inline struct cik_mqd *get_mqd(void *mqd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index e698fc1..a01e703 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -28,8 +28,8 @@ #include "kfd_priv.h" #include "kfd_mqd_manager.h" #include "vi_structs.h" -#include "asic_reg/gca/gfx_8_0_sh_mask.h" -#include "asic_reg/gca/gfx_8_0_enum.h" +#include "gca/gfx_8_0_sh_mask.h" +#include "gca/gfx_8_0_enum.h" #include "oss/oss_3_0_sh_mask.h" #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 0206d54..9fcb6fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -33,8 +33,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, { unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); - WARN_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); - + WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, + "Runlist IB overflow"); *wptr = temp; } @@ -102,7 +102,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); - if (retval != 0) { + if (retval) { pr_err("Failed to allocate runlist IB\n"); goto out; } @@ -124,20 +124,20 @@ static int pm_create_runlist_ib(struct packet_manager *pm, uint64_t *rl_gpu_addr, size_t *rl_size_bytes) { - unsigned int alloc_size_bytes = 0; + unsigned int alloc_size_bytes; unsigned int *rl_buffer, rl_wptr, i; int retval, proccesses_mapped; struct device_process_node *cur; struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription = false; + bool is_over_subscription; rl_wptr = retval = proccesses_mapped = 0; retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription); - if (retval != 0) + if (retval) return retval; *rl_size_bytes = alloc_size_bytes; @@ -157,7 +157,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, } retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); - if (retval != 0) + if (retval) return retval; proccesses_mapped++; @@ -175,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); - if (retval != 0) + if (retval) return retval; inc_wptr(&rl_wptr, @@ -194,7 +194,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, &rl_buffer[rl_wptr], q, qpd->is_debug); - if (retval != 0) + if (retval) return retval; inc_wptr(&rl_wptr, @@ -268,8 +268,8 @@ int pm_send_set_resources(struct packet_manager *pm, size = pm->pmf->get_set_resources_packet_size(); mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, - size / sizeof(uint32_t), - (unsigned int **)&buffer); + size / sizeof(uint32_t), + (unsigned int **)&buffer); if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; @@ -297,7 +297,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, &rl_ib_size); - if (retval != 0) + if (retval) goto fail_create_runlist_ib; pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); @@ -308,12 +308,12 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); - if (retval != 0) + if (retval) goto fail_acquire_packet_buffer; retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); - if (retval != 0) + if (retval) goto fail_create_runlist; pm->priv_queue->ops.submit_packet(pm->priv_queue); @@ -327,8 +327,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: - if (pm->allocated) - pm_release_ib(pm); + pm_release_ib(pm); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 595d35d..1e06de0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -92,7 +92,6 @@ unsigned int kfd_pasid_alloc(void) void kfd_pasid_free(unsigned int pasid) { - if (WARN_ON(pasid == 0 || pasid >= pasid_limit)) - return; - clear_bit(pasid, pasid_bitmap); + if (!WARN_ON(pasid == 0 || pasid >= pasid_limit)) + clear_bit(pasid, pasid_bitmap); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index 31cef21..e50f73d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -21,8 +21,8 @@ * */ -#ifndef KFD_PM4_HEADERS_CIK_H_ -#define KFD_PM4_HEADERS_CIK_H_ +#ifndef KFD_PM4_HEADERS_H_ +#define KFD_PM4_HEADERS_H_ #ifndef PM4_MES_HEADER_DEFINED #define PM4_MES_HEADER_DEFINED diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bbd5464..4d17b29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -765,7 +765,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, - struct kfd_process *p); + struct kfd_process *p); #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) int kfd_bind_processes_to_device(struct kfd_dev *dev); void kfd_unbind_processes_from_device(struct kfd_dev *dev); @@ -804,8 +804,8 @@ int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd); struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); struct kfd_process_device *kfd_get_next_process_device_data( - struct kfd_process *p, - struct kfd_process_device *pdd); + struct kfd_process *p, + struct kfd_process_device *pdd); bool kfd_has_process_device_data(struct kfd_process *p); /* PASIDs */ @@ -873,9 +873,6 @@ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, uint64_t base, uint64_t limit); /* Queue Context Management */ -inline uint32_t lower_32(uint64_t x); -inline uint32_t upper_32(uint64_t x); - int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); @@ -995,7 +992,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -/* Following PM funcs can be shared among KV and VI */ +/* Following PM funcs can be shared among CIK and VI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain); @@ -1054,7 +1051,7 @@ int kfd_wait_on_events(struct kfd_process *p, bool all, uint32_t user_timeout_ms, enum kfd_event_wait_result *wait_result); void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, - uint32_t valid_id_bits); + uint32_t valid_id_bits); #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, @@ -1064,10 +1061,10 @@ void kfd_signal_hw_exception_event(unsigned int pasid); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_event_create(struct file *devkfd, struct kfd_process *p, - uint32_t event_type, bool auto_reset, uint32_t node_id, - uint32_t *event_id, uint32_t *event_trigger_data, - uint64_t *event_page_offset, uint32_t *event_slot_index, - void *kern_addr); + uint32_t event_type, bool auto_reset, uint32_t node_id, + uint32_t *event_id, uint32_t *event_trigger_data, + uint64_t *event_page_offset, uint32_t *event_slot_index, + void *kern_addr); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); void kfd_free_signal_page_dgpu(struct kfd_process *p, uint64_t handle); @@ -1078,9 +1075,7 @@ void kfd_flush_tlb(struct kfd_dev *dev, uint32_t pasid); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); -#define KFD_SCRATCH_CZ_FW_VER 600 #define KFD_SCRATCH_KV_FW_VER 413 -#define KFD_CWSR_CZ_FW_VER 625 /* PeerDirect support */ void kfd_init_peer_direct(void); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a1cad551..3e966a3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -33,14 +33,8 @@ #endif #include #include -#include -#include +#include #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) -#include -#else -#include -#endif #include "kfd_ipc.h" struct mm_struct; @@ -77,7 +71,7 @@ static struct workqueue_struct *kfd_process_wq; #define MAX_IDR_ID 0 /*0 - for unlimited*/ static struct kfd_process *find_process(const struct task_struct *thread, - bool lock); + bool ref); static void kfd_process_ref_release(struct kref *ref); static struct kfd_process *create_process(const struct task_struct *thread, struct file *filep); @@ -466,7 +460,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { dev = pdd->dev; - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); if (dev && dev->dbgmgr && (dev->dbgmgr->pasid == p->pasid)) { @@ -476,7 +470,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, dev->dbgmgr = NULL; } } - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); } kfd_process_dequeue_from_all_devices(p); @@ -642,8 +636,7 @@ static struct kfd_process *create_process(const struct task_struct *thread, err_process_pqm_init: hash_del_rcu(&process->kfd_processes); synchronize_rcu(); - mmu_notifier_unregister_no_release(&process->mmu_notifier, - process->mm); + mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); err_mmu_notifier: mutex_destroy(&process->mutex); kfd_pasid_free(process->pasid); @@ -846,7 +839,7 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) pr_debug("Unbinding process %d from IOMMU\n", pasid); - mutex_lock(get_dbgmgr_mutex()); + mutex_lock(kfd_get_dbgmgr_mutex()); if (dev->dbgmgr && (dev->dbgmgr->pasid == p->pasid)) { @@ -856,7 +849,7 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) } } - mutex_unlock(get_dbgmgr_mutex()); + mutex_unlock(kfd_get_dbgmgr_mutex()); mutex_lock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 32e782d..543a3fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -241,8 +241,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq, &pdd->qpd); break; default: - pr_err("Invalid queue type %d\n", type); - return -EINVAL; + WARN(1, "Invalid queue type %d", type); + retval = -EINVAL; } if (retval != 0) { @@ -290,6 +290,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) int retval; dqm = NULL; + retval = 0; pqn = get_queue_by_qid(pqm, qid); @@ -303,10 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->kq->dev; if (pqn->q) dev = pqn->q->device; - if (!dev) { - pr_err("Cannot destroy queue, kfd device is NULL\n"); + if (WARN_ON(!dev)) return -ENODEV; - } pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b2dde30..2e53311 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -37,7 +37,7 @@ #include "kfd_device_queue_manager.h" /* topology_device_list - Master list of all topology devices */ -struct list_head topology_device_list; +static struct list_head topology_device_list; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); @@ -626,10 +626,8 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, int ret; uint32_t i; - if (dev->kobj_node) { - pr_err("Cannot build sysfs node entry, kobj_node is not NULL\n"); - return -EINVAL; - } + if (WARN_ON(dev->kobj_node)) + return -EEXIST; /* * Creating the sysfs folders -- 2.7.4