aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch3983
1 files changed, 3983 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch
new file mode 100644
index 00000000..204bae8b
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1282-drm-amdkfd-Clean-up-KFD-style-errors-and-warnings.patch
@@ -0,0 +1,3983 @@
+From 5dcb370deaa5f73307d2d24ae9bd4fbf3a2aeae1 Mon Sep 17 00:00:00 2001
+From: Kent Russell <kent.russell@amd.com>
+Date: Tue, 6 Dec 2016 13:10:34 -0500
+Subject: [PATCH 1282/4131] drm/amdkfd: Clean up KFD style errors and warnings
+
+Using checkpatch.pl -f <file> showed a number of style issues. This
+patch addresses as many of them as possible. Some long lines have been
+left for readability, but attempts to minimize them have been made.
+Also clean up usage of do..while(0) loops, which are mostly for
+debugging anyways
+
+Change-Id: Ie8511447981a051f01b16a06833a70d9df0a85df
+Signed-off-by: Kent Russell <kent.russell@amd.com>
+
+ Conflicts:
+ drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+---
+ drivers/gpu/drm/amd/amdkfd/cik_int.h | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 280 +++----
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 187 ++---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 803 +++++++++++----------
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h | 12 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 288 ++++----
+ drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 292 ++++----
+ drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 6 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 +-
+ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
+ .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 6 +-
+ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 6 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 137 ++--
+ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 8 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 2 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 5 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 1 -
+ drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h | 16 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h | 10 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 58 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 28 +-
+ .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_rdma.c | 2 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 71 +-
+ 30 files changed, 1184 insertions(+), 1071 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
+index 9054068..ff8255d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
++++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
+@@ -34,7 +34,8 @@ struct cik_ih_ring_entry {
+ uint32_t reserved3:4;
+
+ /* pipeid, meid and unused3 are officially called RINGID,
+- * but for our purposes, they always decode into pipe and ME. */
++ * but for our purposes, they always decode into pipe and ME.
++ */
+ uint32_t pipeid:2;
+ uint32_t meid:2;
+ uint32_t reserved4:4;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 0c4ea11..8b35b70 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -557,14 +557,17 @@ kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data)
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev) {
+- dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__);
++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n",
++ __func__);
+ return status;
+ }
+
+ down_write(&p->lock);
+ mutex_lock(get_dbgmgr_mutex());
+
+- /* make sure that we have pdd, if this the first queue created for this process */
++ /* make sure that we have pdd, if this the first queue created for
++ * this process
++ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ mutex_unlock(get_dbgmgr_mutex());
+@@ -599,12 +602,10 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
+ struct kfd_dev *dev;
+
+ dev = kfd_device_by_id(args->gpu_id);
+- if (dev == NULL)
+- return -EINVAL;
+-
+- if (dev->device_info->asic_family == CHIP_CARRIZO) {
+- pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
+- return -EINVAL;
++ if (!dev) {
++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n",
++ __func__);
++ return status;
+ }
+
+ mutex_lock(get_dbgmgr_mutex());
+@@ -646,92 +647,90 @@ kfd_ioctl_dbg_address_watch(struct file *filep,
+
+ memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
+
+- do {
+- dev = kfd_device_by_id(args->gpu_id);
+- if (!dev) {
+- dev_info(NULL,
+- "Error! kfd: In func %s >> get device by id failed\n",
+- __func__);
+- break;
+- }
+-
+- cmd_from_user = (void __user *) args->content_ptr;
+-
+- if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) {
+- status = -EINVAL;
+- break;
+- }
+-
+- if (args->buf_size_in_bytes <= sizeof(*args)) {
+- status = -EINVAL;
+- break;
+- }
++ dev = kfd_device_by_id(args->gpu_id);
++ if (!dev) {
++ dev_info(NULL,
++ "Error! kfd: In func %s >> get device by id failed\n",
++ __func__);
++ return -EFAULT;
++ }
+
+- /* this is the actual buffer to work with */
++ cmd_from_user = (void __user *) args->content_ptr;
+
+- args_buff = memdup_user(cmd_from_user,
+- args->buf_size_in_bytes - sizeof(*args));
+- if (IS_ERR(args_buff))
+- return PTR_ERR(args_buff);
++ if (args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE ||
++ (args->buf_size_in_bytes <= sizeof(*args)))
++ return -EINVAL;
+
+- aw_info.process = p;
++ /* this is the actual buffer to work with */
++ args_buff = memdup_user(cmd_from_user,
++ args->buf_size_in_bytes - sizeof(*args));
+
+- aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
+- args_idx += sizeof(aw_info.num_watch_points);
++ if (IS_ERR(args_buff))
++ return PTR_ERR(args_buff);
+
+- aw_info.watch_mode = (HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
+- args_idx += sizeof(HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
++ aw_info.process = p;
+
+- /* set watch address base pointer to point on the array base within args_buff */
++ aw_info.num_watch_points =
++ *((uint32_t *)(&args_buff[args_idx]));
++ args_idx += sizeof(aw_info.num_watch_points);
+
+- aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
++ aw_info.watch_mode =
++ (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
++ args_idx += sizeof(enum HSA_DBG_WATCH_MODE) *
++ aw_info.num_watch_points;
+
+- /*skip over the addresses buffer */
+- args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
++ /* set watch address base pointer to point on the array base
++ * within args_buff
++ */
+
+- if (args_idx >= args->buf_size_in_bytes) {
+- status = -EINVAL;
+- break;
+- }
++ aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
+
+- watch_mask_value = (uint64_t) args_buff[args_idx];
++ /* skip over the addresses buffer */
++ args_idx += sizeof(aw_info.watch_address) *
++ aw_info.num_watch_points;
+
+- if (watch_mask_value > 0) {
+- /* there is an array of masks */
++ if (args_idx >= args->buf_size_in_bytes) {
++ status = -EINVAL;
++ goto out;
++ }
+
+- /* set watch mask base pointer to point on the array base within args_buff */
+- aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
++ watch_mask_value = (uint64_t) args_buff[args_idx];
+
+- /*skip over the masks buffer */
+- args_idx += sizeof(aw_info.watch_mask) * aw_info.num_watch_points;
+- }
++ if (watch_mask_value > 0) {
++ /* there is an array of masks */
+
+- else
+- /* just the NULL mask, set to NULL and skip over it */
+- {
+- aw_info.watch_mask = NULL;
+- args_idx += sizeof(aw_info.watch_mask);
+- }
++ /* set watch mask base pointer to point on the array
++ * base within args_buff
++ */
++ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
+
+- if (args_idx > args->buf_size_in_bytes) {
+- status = -EINVAL;
+- break;
+- }
++ /* skip over the masks buffer */
++ args_idx += sizeof(aw_info.watch_mask) *
++ aw_info.num_watch_points;
++ }
+
+- aw_info.watch_event = NULL; /* Currently HSA Event is not supported for DBG */
+- status = 0;
++ else
++ /* just the NULL mask, set to NULL and skip over it */
++ {
++ aw_info.watch_mask = NULL;
++ args_idx += sizeof(aw_info.watch_mask);
++ }
+
+- } while (0);
++ if (args_idx > args->buf_size_in_bytes) {
++ status = -EINVAL;
++ goto out;
++ }
+
+- if (status == 0) {
+- mutex_lock(get_dbgmgr_mutex());
++ /* Currently HSA Event is not supported for DBG */
++ aw_info.watch_event = NULL;
+
+- status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
++ mutex_lock(get_dbgmgr_mutex());
+
+- mutex_unlock(get_dbgmgr_mutex());
++ status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
+
+- }
++ mutex_unlock(get_dbgmgr_mutex());
+
++out:
+ kfree(args_buff);
+
+ return status;
+@@ -739,11 +738,13 @@ kfd_ioctl_dbg_address_watch(struct file *filep,
+
+ /*
+ * Parse and generate fixed size data structure for wave control.
+- * Buffer is generated in a "packed" form, for avoiding structure packing/pending dependencies.
++ * Buffer is generated in a "packed" form, for avoiding structure
++ * packing/pending dependencies.
+ */
+
+ static int
+-kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data)
++kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p,
++ void *data)
+ {
+ long status = -EFAULT;
+ struct kfd_ioctl_dbg_wave_control_args *args = data;
+@@ -768,78 +769,76 @@ kfd_ioctl_dbg_wave_control(struct file *filep, struct kfd_process *p, void *data
+
+ dev_info(NULL, "kfd: In func %s - start\n", __func__);
+
+- do {
+- dev = kfd_device_by_id(args->gpu_id);
+- if (!dev) {
+- dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n", __func__);
+- break;
+- }
+-
+- /* input size must match the computed "compact" size */
+-
+- if (args->buf_size_in_bytes != computed_buff_size) {
+- dev_info(NULL,
+- "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n",
+- __func__, args->buf_size_in_bytes, computed_buff_size);
+- status = -EINVAL;
+- break;
+- }
+-
+- cmd_from_user = (void __user *) args->content_ptr;
+-
+- /* copy the entire buffer from user */
++ dev = kfd_device_by_id(args->gpu_id);
++ if (!dev) {
++ dev_info(NULL, "Error! kfd: In func %s >> getting device by id failed\n",
++ __func__);
++ return -EFAULT;
++ }
+
+- args_buff = memdup_user(cmd_from_user,
+- args->buf_size_in_bytes - sizeof(*args));
+- if (IS_ERR(args_buff))
+- return PTR_ERR(args_buff);
++ /* input size must match the computed "compact" size */
+
+- if (copy_from_user(args_buff,
+- (void __user *) args->content_ptr,
+- args->buf_size_in_bytes - sizeof(*args))) {
+- dev_info(NULL,
+- "Error! kfd: In func %s >> copy_from_user failed\n",
+- __func__);
+- break;
+- }
++ if (args->buf_size_in_bytes != computed_buff_size) {
++ dev_info(NULL,
++ "Error! kfd: In func %s >> size mismatch, computed : actual %u : %u\n",
++ __func__, args->buf_size_in_bytes,
++ computed_buff_size);
++ return -EINVAL;
++ }
+
+- /* move ptr to the start of the "pay-load" area */
++ cmd_from_user = (void __user *) args->content_ptr;
+
++ /* copy the entire buffer from user */
+
+- wac_info.process = p;
++ args_buff = memdup_user(cmd_from_user,
++ args->buf_size_in_bytes - sizeof(*args));
++ if (IS_ERR(args_buff))
++ return PTR_ERR(args_buff);
+
+- wac_info.operand = (HSA_DBG_WAVEOP) *((HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
+- args_idx += sizeof(wac_info.operand);
++ if (copy_from_user(args_buff,
++ (void __user *) args->content_ptr,
++ args->buf_size_in_bytes - sizeof(*args))) {
++ dev_info(NULL,
++ "Error! kfd: In func %s >> copy_from_user failed\n",
++ __func__);
++ goto out;
++ }
+
+- wac_info.mode = (HSA_DBG_WAVEMODE) *((HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
+- args_idx += sizeof(wac_info.mode);
++ /* move ptr to the start of the "pay-load" area */
+
+- wac_info.trapId = (uint32_t) *((uint32_t *)(&args_buff[args_idx]));
+- args_idx += sizeof(wac_info.trapId);
++ wac_info.process = p;
+
+- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = *((uint32_t *)(&args_buff[args_idx]));
+- wac_info.dbgWave_msg.MemoryVA = NULL;
++ wac_info.operand =
++ *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
++ args_idx += sizeof(wac_info.operand);
+
++ wac_info.mode =
++ *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
++ args_idx += sizeof(wac_info.mode);
+
+- status = 0;
++ wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
++ args_idx += sizeof(wac_info.trapId);
+
+- } while (0);
+- if (status == 0) {
+- mutex_lock(get_dbgmgr_mutex());
++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
++ *((uint32_t *)(&args_buff[args_idx]));
++ wac_info.dbgWave_msg.MemoryVA = NULL;
+
+- dev_info(NULL,
+- "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
+- __func__, wac_info.process, wac_info.operand, wac_info.mode, wac_info.trapId,
+- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
++ mutex_lock(get_dbgmgr_mutex());
+
+- status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
++ dev_info(NULL,
++ "kfd: In func %s >> calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
++ __func__, wac_info.process, wac_info.operand,
++ wac_info.mode, wac_info.trapId,
++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+
+- dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n", __func__, status);
++ status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
+
+- mutex_unlock(get_dbgmgr_mutex());
++ dev_info(NULL, "kfd: In func %s >> returned status of dbg manager is %ld\n",
++ __func__, status);
+
+- }
++ mutex_unlock(get_dbgmgr_mutex());
+
++out:
+ kfree(args_buff);
+
+ return status;
+@@ -935,7 +934,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
+ "scratch_limit %llX\n", pdd->scratch_limit);
+
+ args->num_of_nodes++;
+- } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
++ } while ((pdd = kfd_get_next_process_device_data(p, pdd)) !=
++ NULL &&
+ (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+ }
+
+@@ -957,7 +957,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+
+ if (args->num_of_nodes == 0) {
+ /* Return number of nodes, so that user space can alloacate
+- * sufficient memory */
++ * sufficient memory
++ */
+ down_write(&p->lock);
+
+ if (!kfd_has_process_device_data(p)) {
+@@ -978,7 +979,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+
+ /* Fill in process-aperture information for all available
+ * nodes, but not more than args->num_of_nodes as that is
+- * the amount of memory allocated by user */
++ * the amount of memory allocated by user
++ */
+ pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+ args->num_of_nodes), GFP_KERNEL);
+ if (!pa)
+@@ -1290,7 +1292,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+ pdd->vm);
+
+ /* If freeing the buffer failed, leave the handle in place for
+- * clean-up during process tear-down. */
++ * clean-up during process tear-down.
++ */
+ if (ret == 0) {
+ down_write(&p->lock);
+ kfd_process_device_remove_obj_handle(
+@@ -1553,14 +1556,16 @@ static int kfd_ioctl_open_graphic_handle(struct file *filep,
+
+ down_write(&p->lock);
+ /*TODO: When open_graphic_handle is implemented, we need to create
+- * the corresponding interval tree. We need to know the size of
+- * the buffer through open_graphic_handle(). We use 1 for now.*/
++ * the corresponding interval tree. We need to know the size of
++ * the buffer through open_graphic_handle(). We use 1 for now.
++ */
+ idr_handle = kfd_process_device_create_obj_handle(pdd, mem,
+ args->va_addr, 1, NULL);
+ up_write(&p->lock);
+ if (idr_handle < 0) {
+ /* FIXME: destroy_process_gpumem doesn't seem to be
+- * implemented anywhere */
++ * implemented anywhere
++ */
+ dev->kfd2kgd->destroy_process_gpumem(dev->kgd, mem);
+ return -EFAULT;
+ }
+@@ -1606,7 +1611,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
+ struct kgd_dev *dma_buf_kgd;
+ void *metadata_buffer = NULL;
+ uint32_t flags;
+- unsigned i;
++ unsigned int i;
+ int r;
+
+ /* Find a KFD GPU device that supports the get_dmabuf_info query */
+@@ -2003,7 +2008,8 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ #endif
+
+ #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
+- [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
++ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
++ .cmd_drv = 0, .name = #ioctl}
+
+ /** Ioctl table */
+ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+index ab35190..55c5e4e 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -12,11 +12,13 @@
+ /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
+ * GPU processor ID are expressed with Bit[31]=1.
+ * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
+- * used in the CRAT. */
++ * used in the CRAT.
++ */
+ static uint32_t gpu_processor_id_low = 0x80001000;
+
+ /* Return the next available gpu_processor_id and increment it for next GPU
+- * @total_cu_count - Total CUs present in the GPU including ones masked off
++ * @total_cu_count - Total CUs present in the GPU including ones
++ * masked off
+ */
+ static inline unsigned int get_and_inc_gpu_processor_id(
+ unsigned int total_cu_count)
+@@ -33,7 +35,8 @@ struct kfd_gpu_cache_info {
+ uint32_t cache_level;
+ uint32_t flags;
+ /* Indicates how many Compute Units share this cache
+- * Value = 1 indicates the cache is not shared */
++ * Value = 1 indicates the cache is not shared
++ */
+ uint32_t num_cu_shared;
+ };
+
+@@ -104,7 +107,8 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+ };
+
+ /* NOTE: In future if more information is added to struct kfd_gpu_cache_info
+- * the following ASICs may need a separate table. */
++ * the following ASICs may need a separate table.
++ */
+ #define hawaii_cache_info kaveri_cache_info
+ #define tonga_cache_info carrizo_cache_info
+ #define fiji_cache_info carrizo_cache_info
+@@ -151,7 +155,7 @@ static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
+ }
+
+ /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
+- * topology device present in the device_list
++ * topology device present in the device_list
+ */
+ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
+ struct list_head *device_list)
+@@ -177,7 +181,7 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
+ }
+
+ /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
+- * topology device present in the device_list
++ * topology device present in the device_list
+ */
+ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+ struct list_head *device_list)
+@@ -195,9 +199,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+ if (props == NULL)
+ return -ENOMEM;
+
+- /*
+- * We're on GPU node
+- */
++ /* We're on GPU node */
+ if (dev->node_props.cpu_cores_count == 0) {
+ /* APU */
+ if (mem->visibility_type == 0)
+@@ -206,8 +208,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+ /* dGPU */
+ else
+ props->heap_type = mem->visibility_type;
+- }
+- else
++ } else
+ props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+
+ if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
+@@ -231,7 +232,7 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+ }
+
+ /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
+- * topology device present in the device_list
++ * topology device present in the device_list
+ */
+ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+ struct list_head *device_list)
+@@ -254,8 +255,8 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+ * Compute Unit. So map the cache using CPU core Id or SIMD
+ * (GPU) ID.
+ * TODO: This works because currently we can safely assume that
+- * Compute Units are parsed before caches are parsed. In future
+- * remove this dependency
++ * Compute Units are parsed before caches are parsed. In
++ * future, remove this dependency
+ */
+ if ((id >= dev->node_props.cpu_core_id_base &&
+ id <= dev->node_props.cpu_core_id_base +
+@@ -298,7 +299,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+ }
+
+ /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
+- * topology device present in the device_list
++ * topology device present in the device_list
+ */
+ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ struct list_head *device_list)
+@@ -313,7 +314,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ id_from = iolink->proximity_domain_from;
+ id_to = iolink->proximity_domain_to;
+
+- pr_debug("Found IO link entry in CRAT table with id_from=%d\n", id_from);
++ pr_debug("Found IO link entry in CRAT table with id_from=%d\n",
++ id_from);
+ list_for_each_entry(dev, device_list, list) {
+ if (id_from == dev->proximity_domain) {
+ props = kfd_alloc_struct(props);
+@@ -368,7 +370,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+ }
+
+ /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
+- * present in the device_list
++ * present in the device_list
+ * @sub_type_hdr - subtype section of crat_image
+ * @device_list - list of topology devices present in this crat_image
+ */
+@@ -397,15 +399,11 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
+ ret = kfd_parse_subtype_cache(cache, device_list);
+ break;
+ case CRAT_SUBTYPE_TLB_AFFINITY:
+- /*
+- * For now, nothing to do here
+- */
++ /* For now, nothing to do here */
+ pr_debug("Found TLB entry in CRAT table (not processing)\n");
+ break;
+ case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
+- /*
+- * For now, nothing to do here
+- */
++ /* For now, nothing to do here */
+ pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
+ break;
+ case CRAT_SUBTYPE_IOLINK_AFFINITY:
+@@ -421,12 +419,13 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
+ }
+
+ /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
+- * create a kfd_topology_device and add in to device_list. Also parse
+- * CRAT subtypes and attach it to appropriate kfd_topology_device
++ * create a kfd_topology_device and add in to device_list. Also parse
++ * CRAT subtypes and attach it to appropriate kfd_topology_device
+ * @crat_image - input image containing CRAT
+- * @device_list - [OUT] list of kfd_topology_device generated after parsing
+- * crat_image
++ * @device_list - [OUT] list of kfd_topology_device generated after
++ * parsing crat_image
+ * @proximity_domain - Proximity domain of the first device in the table
++ *
+ * Return - 0 if successful else -ve value
+ */
+ int kfd_parse_crat_table(void *crat_image,
+@@ -445,9 +444,8 @@ int kfd_parse_crat_table(void *crat_image,
+ if (!crat_image)
+ return -EINVAL;
+
+- if (!list_empty(device_list)) {
++ if (!list_empty(device_list))
+ pr_warn("Error device list should be empty\n");
+- }
+
+ num_nodes = crat_table->num_domains;
+ image_len = crat_table->length;
+@@ -465,7 +463,8 @@ int kfd_parse_crat_table(void *crat_image,
+ return -ENOMEM;
+
+ memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
+- memcpy(top_dev->oem_table_id, crat_table->oem_table_id, CRAT_OEMTABLEID_LENGTH);
++ memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
++ CRAT_OEMTABLEID_LENGTH);
+ top_dev->oem_revision = crat_table->oem_revision;
+
+ last_header_type = last_header_length = 0;
+@@ -527,7 +526,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
+
+ /* CU could be inactive. In case of shared cache find the first active
+ * CU. and incase of non-shared cache check if the CU is inactive. If
+- * inactive active skip it*/
++ * inactive active skip it
++ */
+ if (first_active_cu) {
+ memset(pcache, 0, sizeof(struct crat_subtype_cache));
+ pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
+@@ -539,7 +539,8 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
+ pcache->cache_size = pcache_info[cache_type].cache_size;
+
+ /* Sibling map is w.r.t processor_id_low, so shift out
+- * inactive CU */
++ * inactive CU
++ */
+ cu_sibling_map_mask =
+ cu_sibling_map_mask >> (first_active_cu - 1);
+
+@@ -555,9 +556,12 @@ static int fill_in_pcache(struct crat_subtype_cache *pcache,
+ return 1;
+ }
+
+-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info tables
++/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
++ * tables
++ *
+ * @kdev - [IN] GPU device
+- * @gpu_processor_id - [IN] GPU processor ID to which these caches associate
++ * @gpu_processor_id - [IN] GPU processor ID to which these caches
++ * associate
+ * @available_size - [IN] Amount of memory available in pcache
+ * @cu_info - [IN] Compute Unit info obtained from KGD
+ * @pcache - [OUT] memory into which cache data is to be filled in.
+@@ -674,15 +678,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
+
+ /*
+ * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
+- * copies CRAT from ACPI (if available).
+- *
++ * copies CRAT from ACPI (if available).
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+- * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
+- * *crat_image will be NULL
+- * @size: [OUT] size of crat_image
++ * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
++ * crat_image will be NULL
++ * @size: [OUT] size of crat_image
+ *
+- * Return 0 if successful else return -ve value
++ * Return 0 if successful else return -ve value
+ */
+ #ifdef CONFIG_ACPI
+ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+@@ -696,15 +699,14 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+
+ *crat_image = NULL;
+
+- /*
+- * Fetch the CRAT table from ACPI
+- */
++ /* Fetch the CRAT table from ACPI */
+ status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
+ if (status == AE_NOT_FOUND) {
+ pr_warn("CRAT table not found\n");
+ return -ENODATA;
+ } else if (ACPI_FAILURE(status)) {
+ const char *err = acpi_format_exception(status);
++
+ pr_err("CRAT table error: %s\n", err);
+ return -EINVAL;
+ }
+@@ -740,11 +742,11 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
+
+ /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
+ *
+- * @numa_node_id: CPU NUMA node id
+- * @avail_size: Available size in the memory
+- * @sub_type_hdr: Memory into which compute info will be filled in
++ * @numa_node_id: CPU NUMA node id
++ * @avail_size: Available size in the memory
++ * @sub_type_hdr: Memory into which compute info will be filled in
+ *
+- * Return 0 if successful else return -ve value
++ * Return 0 if successful else return -ve value
+ */
+ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+ int proximity_domain,
+@@ -779,11 +781,11 @@ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+
+ /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
+ *
+- * @numa_node_id: CPU NUMA node id
+- * @avail_size: Available size in the memory
+- * @sub_type_hdr: Memory into which compute info will be filled in
++ * @numa_node_id: CPU NUMA node id
++ * @avail_size: Available size in the memory
++ * @sub_type_hdr: Memory into which compute info will be filled in
+ *
+- * Return 0 if successful else return -ve value
++ * Return 0 if successful else return -ve value
+ */
+ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+ int proximity_domain,
+@@ -808,7 +810,8 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+
+ /* Unlike si_meminfo, si_meminfo_node is not exported. So
+ * the following lines are duplicated from si_meminfo_node
+- * function */
++ * function
++ */
+ pgdat = NODE_DATA(numa_node_id);
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+@@ -874,7 +877,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
+ *
+ * @pcrat_image: Fill in VCRAT for CPU
+ * @size: [IN] allocated size of crat_image.
+- * [OUT] actual size of data filled in crat_image
++ * [OUT] actual size of data filled in crat_image
+ */
+ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+ {
+@@ -902,7 +905,8 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+ return -ENOMEM;
+
+ memset(crat_table, 0, sizeof(struct crat_header));
+- memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature));
++ memcpy(&crat_table->signature, CRAT_SIGNATURE,
++ sizeof(crat_table->signature));
+ crat_table->length = sizeof(struct crat_header);
+
+ #ifdef CONFIG_ACPI
+@@ -911,8 +915,10 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+ pr_warn("DSDT table not found for OEM information\n");
+ else {
+ crat_table->oem_revision = acpi_table->revision;
+- memcpy(crat_table->oem_id, acpi_table->oem_id, CRAT_OEMID_LENGTH);
+- memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, CRAT_OEMTABLEID_LENGTH);
++ memcpy(crat_table->oem_id, acpi_table->oem_id,
++ CRAT_OEMID_LENGTH);
++ memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
++ CRAT_OEMTABLEID_LENGTH);
+ }
+ #else
+ crat_table->oem_revision = 0;
+@@ -974,8 +980,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+ /* TODO: Add cache Subtype for CPU.
+ * Currently, CPU cache information is available in function
+ * detect_cache_attributes(cpu) defined in the file
+- * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not exported
+- * and to get the same information the code needs to be duplicated.
++ * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
++ * exported and to get the same information the code needs to be
++ * duplicated.
+ */
+
+ *size = crat_table->length;
+@@ -1014,14 +1021,13 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
+ }
+
+ /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
+- * to its NUMA node
+- *
+- * @avail_size: Available size in the memory
+- * @kdev - [IN] GPU device
+- * @sub_type_hdr: Memory into which io link info will be filled in
+- * @proximity_domain - proximity domain of the GPU node
++ * to its NUMA node
++ * @avail_size: Available size in the memory
++ * @kdev - [IN] GPU device
++ * @sub_type_hdr: Memory into which io link info will be filled in
++ * @proximity_domain - proximity domain of the GPU node
+ *
+- * Return 0 if successful else return -ve value
++ * Return 0 if successful else return -ve value
+ */
+ static int kfd_fill_gpu_direct_io_link(int *avail_size,
+ struct kfd_dev *kdev,
+@@ -1040,7 +1046,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size,
+ sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+ /* Fill in IOLINK subtype.
+- * TODO: Fill-in other fields of iolink subtype */
++ * TODO: Fill-in other fields of iolink subtype
++ */
+ sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
+ sub_type_hdr->proximity_domain_from = proximity_domain;
+ #ifdef CONFIG_NUMA
+@@ -1076,8 +1083,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
+ struct amd_iommu_device_info iommu_info;
+ const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
+- AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
+- AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
++ AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
++ AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+ #endif
+ struct kfd_local_mem_info local_mem_info;
+
+@@ -1093,8 +1100,10 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+
+ memset(crat_table, 0, sizeof(struct crat_header));
+
+- memcpy(&crat_table->signature, CRAT_SIGNATURE, sizeof(crat_table->signature));
+- crat_table->length = sizeof(struct crat_header); /* Change length as we add more subtypes*/
++ memcpy(&crat_table->signature, CRAT_SIGNATURE,
++ sizeof(crat_table->signature));
++ /* Change length as we add more subtypes*/
++ crat_table->length = sizeof(struct crat_header);
+ crat_table->num_domains = 1;
+ crat_table->total_entries = 0;
+
+@@ -1135,11 +1144,13 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ cu->hsa_capability = 0;
+
+ /* Check if this node supports IOMMU. During parsing this flag will
+- * translate to HSA_CAP_ATS_PRESENT */
++ * translate to HSA_CAP_ATS_PRESENT
++ */
+ #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
+ iommu_info.flags = 0;
+- if (0 == amd_iommu_device_info(kdev->pdev, &iommu_info)) {
+- if ((iommu_info.flags & required_iommu_flags) == required_iommu_flags)
++ if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
++ if ((iommu_info.flags & required_iommu_flags) ==
++ required_iommu_flags)
+ cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
+ }
+ #endif
+@@ -1150,7 +1161,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ /* Fill in Subtype: Memory. Only on systems with large BAR (no
+ * private FB), report memory as public. On other systems
+ * report the total FB size (public+private) as a single
+- * private heap. */
++ * private heap.
++ */
+ kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info);
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+@@ -1180,7 +1192,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ crat_table->total_entries++;
+
+ /* TODO: Fill in cache information. This information is NOT readily
+- * available in KGD */
++ * available in KGD
++ */
+ sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+ sub_type_hdr->length);
+ ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
+@@ -1224,17 +1237,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ * @crat_image: VCRAT image created because ACPI does not have a
+- * CRAT for this device
++ * CRAT for this device
+ * @size: [OUT] size of virtual crat_image
+ * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device
+- * COMPUTE_UNIT_GPU - Create VCRAT for GPU
+- * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
+- * -- this option is not currently implemented. The assumption
+- * is that all AMD APUs will have CRAT
++ * COMPUTE_UNIT_GPU - Create VCRAT for GPU
++ * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
++ * -- this option is not currently implemented.
++ * The assumption is that all AMD APUs will have CRAT
+ * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
+ *
+- * Return 0 if successful else return -ve value
+-*/
++ * Return 0 if successful else return -ve value
++ */
+ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+ int flags, struct kfd_dev *kdev, uint32_t proximity_domain)
+ {
+@@ -1269,8 +1282,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+ ret = kfd_create_vcrat_image_gpu(pcrat_image, size,
+ kdev, proximity_domain);
+ break;
+- case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) :
+- /*TODO:*/
++ case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
++ /* TODO: */
+ ret = -EINVAL;
+ pr_err("VCRAT not implemented for APU\n");
+ break;
+@@ -1287,12 +1300,10 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+
+ /* kfd_destroy_crat_image
+ *
+- * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
++ * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
+ *
+ */
+ void kfd_destroy_crat_image(void *crat_image)
+ {
+- if (crat_image)
+- kfree(crat_image);
+- return;
++ kfree(crat_image);
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+index f01aea2..00de41f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+@@ -46,8 +46,8 @@
+ #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
+
+ /* Compute Unit flags */
+-#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
+-#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
++#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
++#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
+
+ struct crat_header {
+ uint32_t signature;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+index 5fea0d3..0fdc147 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+@@ -64,104 +64,112 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+ union ULARGE_INTEGER *largep;
+ union ULARGE_INTEGER addr;
+
+- do {
+- if ((kq == NULL) || (packet_buff == NULL) || (size_in_bytes == 0)) {
+- pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n", __func__);
+- status = -EINVAL;
+- break;
+- }
+- /* todo - enter proper locking to be multithreaded safe */
+-
+- /* We acquire a buffer from DIQ
+- * The receive packet buff will be sitting on the Indirect Buffer
+- * and in the PQ we put the IB packet + sync packet(s).
+- */
+- if (sync)
+- pq_packets_size_in_bytes +=
+- sizeof(struct pm4_mec_release_mem);
+- status = kq->ops.acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff);
+- if (status != 0) {
+- pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n", __func__);
+- break;
+- }
+-
+- memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
++ if ((kq == NULL) || (packet_buff == NULL) ||
++ (size_in_bytes == 0)) {
++ pr_debug("Error! kfd: In func %s >> Illegal packet parameters\n",
++ __func__);
++ return -EINVAL;
++ }
++ /* todo - enter proper locking to be multithreaded safe */
++
++ /* We acquire a buffer from DIQ
++ * The receive packet buff will be sitting on the Indirect
++ * Buffer and in the PQ we put the IB packet + sync packet(s).
++ */
++ if (sync)
++ pq_packets_size_in_bytes +=
++ sizeof(struct pm4_mec_release_mem);
++ status = kq->ops.acquire_packet_buffer(kq,
++ pq_packets_size_in_bytes / sizeof(uint32_t),
++ &ib_packet_buff);
++ if (status != 0) {
++ pr_debug("Error! kfd: In func %s >> acquire_packet_buffer failed\n",
++ __func__);
++ return status;
++ }
+
+- ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
++ memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
+
+- ib_packet->header.count = 3;
+- ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
+- ib_packet->header.type = PM4_TYPE_3;
++ ib_packet =
++ (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
+
+- largep = (union ULARGE_INTEGER *) &vmid0_address;
++ ib_packet->header.count = 3;
++ ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
++ ib_packet->header.type = PM4_TYPE_3;
+
+- ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
+- ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
++ largep = (union ULARGE_INTEGER *) &vmid0_address;
+
+- ib_packet->control = (1 << 23) | (1 << 31) |
+- ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
++ ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
++ ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
+
+- ib_packet->bitfields5.pasid = pasid;
++ ib_packet->control = (1 << 23) | (1 << 31) |
++ ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+
+- if (!sync) {
+- kq->ops.submit_packet(kq);
+- break;
+- }
++ ib_packet->bitfields5.pasid = pasid;
+
+- /*
+- * for now we use release mem for GPU-CPU synchronization
+- * Consider WaitRegMem + WriteData as a better alternative
+- * we get a GART allocations ( gpu/cpu mapping),
+- * for the sync variable, and wait until:
+- * (a) Sync with HW
+- * (b) Sync var is written by CP to mem.
+- */
+- rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff +
+- (sizeof(struct pm4__indirect_buffer_pasid) / sizeof(unsigned int)));
+-
+- status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
+- &mem_obj);
++ if (!sync) {
++ kq->ops.submit_packet(kq);
++ return status;
++ }
+
+- if (status == 0) {
++ /*
++ * for now we use release mem for GPU-CPU synchronization
++ * Consider WaitRegMem + WriteData as a better alternative
++ * we get a GART allocations ( gpu/cpu mapping),
++ * for the sync variable, and wait until:
++ * (a) Sync with HW
++ * (b) Sync var is written by CP to mem.
++ */
++ rm_packet = (struct pm4_mec_release_mem *) (ib_packet_buff +
++ (sizeof(struct pm4__indirect_buffer_pasid) /
++ sizeof(unsigned int)));
++
++ status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
++ &mem_obj);
++
++ if (status != 0) {
++ pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n",
++ __func__);
++ return status;
++ }
+
+- rm_state = (uint64_t *) mem_obj->cpu_ptr;
++ rm_state = (uint64_t *) mem_obj->cpu_ptr;
+
+- *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
++ *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
+
+- rm_packet->header.opcode = IT_RELEASE_MEM;
+- rm_packet->header.type = PM4_TYPE_3;
+- rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int) - 2;
++ rm_packet->header.opcode = IT_RELEASE_MEM;
++ rm_packet->header.type = PM4_TYPE_3;
++ rm_packet->header.count = sizeof(struct pm4_mec_release_mem) /
++ sizeof(unsigned int) - 2;
+
+- rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+- rm_packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+- rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+- rm_packet->bitfields2.atc = 0;
+- rm_packet->bitfields2.tc_wb_action_ena = 1;
++ rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
++ rm_packet->bitfields2.event_index =
++ event_index___release_mem__end_of_pipe;
++ rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
++ rm_packet->bitfields2.atc = 0;
++ rm_packet->bitfields2.tc_wb_action_ena = 1;
+
+- addr.quad_part = mem_obj->gpu_addr;
++ addr.quad_part = mem_obj->gpu_addr;
+
+- rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
+- rm_packet->address_hi = addr.u.high_part;
++ rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
++ rm_packet->address_hi = addr.u.high_part;
+
+- rm_packet->bitfields3.data_sel = data_sel___release_mem__send_64_bit_data;
+- rm_packet->bitfields3.int_sel = int_sel___release_mem__send_data_after_write_confirm;
+- rm_packet->bitfields3.dst_sel = dst_sel___release_mem__memory_controller;
++ rm_packet->bitfields3.data_sel =
++ data_sel___release_mem__send_64_bit_data;
++ rm_packet->bitfields3.int_sel =
++ int_sel___release_mem__send_data_after_write_confirm;
++ rm_packet->bitfields3.dst_sel =
++ dst_sel___release_mem__memory_controller;
+
+- rm_packet->data_lo = QUEUESTATE__ACTIVE;
++ rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+- kq->ops.submit_packet(kq);
++ kq->ops.submit_packet(kq);
+
+- /* Wait till CP writes sync code: */
++ /* Wait till CP writes sync code: */
+
+- status = amdkfd_fence_wait_timeout(
+- (unsigned int *) rm_state,
++ status = amdkfd_fence_wait_timeout((unsigned int *) rm_state,
+ QUEUESTATE__ACTIVE, 1500);
+
+- } else {
+- pr_debug("Error! kfd: In func %s >> failed to allocate GART memory\n", __func__);
+- }
+- } while (false);
+-
+ if (rm_state != NULL)
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+@@ -170,7 +178,9 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+
+ static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
+ {
+- /* no action is needed in this case, just make sure diq will not be used */
++ /* no action is needed in this case, just make sure diq will not
++ * be used
++ */
+
+ dbgdev->kq = NULL;
+
+@@ -186,37 +196,33 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
+ unsigned int qid;
+ struct process_queue_manager *pqm = dbgdev->pqm;
+
+- do {
+-
+- if (!pqm) {
+- pr_debug("Error! kfd: In func %s >> No PQM\n", __func__);
+- status = -EFAULT;
+- break;
+- }
+-
+- properties.type = KFD_QUEUE_TYPE_DIQ;
++ if (!pqm) {
++ pr_debug("Error! kfd: In func %s >> No PQM\n",
++ __func__);
++ return -EFAULT;
++ }
+
+- status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
+- &properties, &qid);
++ properties.type = KFD_QUEUE_TYPE_DIQ;
+
+- if (status != 0) {
+- pr_debug("Error! kfd: In func %s >> Create Queue failed\n", __func__);
+- break;
+- }
++ status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
++ &properties, &qid);
+
+- pr_debug("kfd: DIQ Created with queue id: %d\n", qid);
++ if (status != 0) {
++ pr_debug("Error! kfd: In func %s >> Create Queue failed\n",
++ __func__);
++ return status;
++ }
+
+- kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
++ pr_debug("kfd: DIQ Created with queue id: %d\n", qid);
+
+- if (kq == NULL) {
+- pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n", __func__);
+- status = -ENOMEM;
+- break;
+- }
++ kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
+
+- dbgdev->kq = kq;
+-
+- } while (false);
++ if (kq == NULL) {
++ pr_debug("Error! kfd: In func %s >> Error getting Kernel Queue\n",
++ __func__);
++ return -ENOMEM;
++ }
++ dbgdev->kq = kq;
+
+ return status;
+ }
+@@ -233,7 +239,9 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
+ {
+ /* todo - if needed, kill wavefronts and disable watch */
+ int status = 0;
+- if ((dbgdev == NULL) || (dbgdev->pqm == NULL) || (dbgdev->kq == NULL)) {
++
++ if ((dbgdev == NULL) || (dbgdev->pqm == NULL) ||
++ (dbgdev->kq == NULL)) {
+ pr_debug("kfd Err:In func %s >> can't destroy diq\n", __func__);
+ status = -EFAULT;
+ } else {
+@@ -260,13 +268,16 @@ static void dbgdev_address_watch_set_registers(
+ cntl->u32All = 0;
+
+ if (adw_info->watch_mask != NULL)
+- cntl->bitfields.mask = (uint32_t) (adw_info->watch_mask[index] & ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
++ cntl->bitfields.mask =
++ (uint32_t) (adw_info->watch_mask[index] &
++ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
+ else
+ cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+
+ addr.quad_part = (unsigned long long) adw_info->watch_address[index];
+
+- addrHi->bitfields.addr = addr.u.high_part & ADDRESS_WATCH_REG_ADDHIGH_MASK;
++ addrHi->bitfields.addr = addr.u.high_part &
++ ADDRESS_WATCH_REG_ADDHIGH_MASK;
+ addrLo->bitfields.addr =
+ (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
+
+@@ -276,8 +287,10 @@ static void dbgdev_address_watch_set_registers(
+ if (KFD_IS_DGPU(asic_family) == false)
+ cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
+ pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
+- pr_debug("\t\t%20s %08x\n", "set reg add high :", addrHi->bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "set reg add low :", addrLo->bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "set reg add high :",
++ addrHi->bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "set reg add low :",
++ addrLo->bitfields.addr);
+
+ }
+
+@@ -285,8 +298,6 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info)
+ {
+
+- int status = 0;
+-
+ union TCP_WATCH_ADDR_H_BITS addrHi;
+ union TCP_WATCH_ADDR_L_BITS addrLo;
+ union TCP_WATCH_CNTL_BITS cntl;
+@@ -296,68 +307,67 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+
+ struct kfd_process_device *pdd;
+
+- do {
+- /* taking the vmid for that process on the safe way using pdd */
+- pdd = kfd_get_process_device_data(dbgdev->dev,
+- adw_info->process);
+- if (!pdd) {
+- pr_debug("Error! kfd: In func %s >> no PDD available\n", __func__);
+- status = -EFAULT;
+- break;
+- }
+-
+- addrHi.u32All = 0;
+- addrLo.u32All = 0;
+- cntl.u32All = 0;
+-
+- vmid = pdd->qpd.vmid;
+-
+- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES)
+- || (adw_info->num_watch_points == 0)) {
+- status = -EINVAL;
+- break;
+- }
+-
+- if ((adw_info->watch_mode == NULL) || (adw_info->watch_address == NULL)) {
+- status = -EINVAL;
+- break;
+- }
++ /* taking the vmid for that process on the safe way
++ * using pdd
++ */
++ pdd = kfd_get_process_device_data(dbgdev->dev,
++ adw_info->process);
++ if (!pdd) {
++ pr_debug("Error! kfd: In func %s >> no PDD available\n",
++ __func__);
++ return -EFAULT;
++ }
+
+- for (i = 0; i < adw_info->num_watch_points; i++) {
++ addrHi.u32All = 0;
++ addrLo.u32All = 0;
++ cntl.u32All = 0;
+
+- dbgdev_address_watch_set_registers(
+- adw_info,
+- &addrHi,
+- &addrLo,
+- &cntl,
+- i,
+- vmid,
+- dbgdev->dev->device_info->asic_family
+- );
++ vmid = pdd->qpd.vmid;
+
+- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+- pr_debug("\t\t%20s %08x\n", "register index :", i);
+- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+- pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask);
+- pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode);
+- pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid);
+- pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc);
+- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+-
+- pdd->dev->kfd2kgd->address_watch_execute(
+- dbgdev->dev->kgd,
+- i,
+- cntl.u32All,
+- addrHi.u32All,
+- addrLo.u32All);
+- }
++ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
++ (adw_info->num_watch_points == 0) || (adw_info->watch_mode == NULL))
++ return -EINVAL;
+
+- } while (false);
++ for (i = 0; i < adw_info->num_watch_points; i++) {
++
++ dbgdev_address_watch_set_registers(
++ adw_info,
++ &addrHi,
++ &addrLo,
++ &cntl,
++ i,
++ vmid,
++ dbgdev->dev->device_info->asic_family
++ );
++
++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
++ pr_debug("\t\t%20s %08x\n", "register index :", i);
++ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
++ pr_debug("\t\t%20s %08x\n", "Address Low is :",
++ addrLo.bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "Address high is :",
++ addrHi.bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "Address high is :",
++ addrHi.bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
++ cntl.bitfields.mask);
++ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
++ cntl.bitfields.mode);
++ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
++ cntl.bitfields.vmid);
++ pr_debug("\t\t%20s %08x\n", "Control atc is :",
++ cntl.bitfields.atc);
++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
++
++ pdd->dev->kfd2kgd->address_watch_execute(
++ dbgdev->dev->kgd,
++ i,
++ cntl.u32All,
++ addrHi.u32All,
++ addrLo.u32All);
++ }
+
+- return status;
++ return 0;
+ }
+
+ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+@@ -386,131 +396,135 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ addrLo.u32All = 0;
+ cntl.u32All = 0;
+
+- do {
+-
+- if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) {
+- status = -EINVAL;
+- break;
+- }
++ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
++ (adw_info->num_watch_points == 0) ||
++ !adw_info->watch_mode || !adw_info->watch_address)
++ return -EINVAL;
+
+- if ((NULL == adw_info->watch_mode) || (NULL == adw_info->watch_address)) {
+- status = -EINVAL;
+- break;
+- }
++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ ib_size/sizeof(uint32_t),
++ &packet_buff_uint, &packet_buff_gpu_addr);
++ if (status != 0)
++ return status;
++ memset(packet_buff_uint, 0, ib_size);
++
++ packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
++
++ packets_vec[0].header.count = 1;
++ packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
++ packets_vec[0].header.type = PM4_TYPE_3;
++ packets_vec[0].bitfields2.vmid_shift =
++ ADDRESS_WATCH_CNTL_OFFSET;
++ packets_vec[0].bitfields2.insert_vmid = 1;
++ packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
++ packets_vec[1].bitfields2.insert_vmid = 0;
++ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
++ packets_vec[2].bitfields2.insert_vmid = 0;
++ packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
++ packets_vec[3].bitfields2.vmid_shift =
++ ADDRESS_WATCH_CNTL_OFFSET;
++ packets_vec[3].bitfields2.insert_vmid = 1;
++
++ for (i = 0; i < adw_info->num_watch_points; i++) {
++
++ dbgdev_address_watch_set_registers(
++ adw_info,
++ &addrHi,
++ &addrLo,
++ &cntl,
++ i,
++ vmid,
++ dbgdev->dev->device_info->asic_family
++ );
+
+- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
+- ib_size/sizeof(uint32_t),
+- &packet_buff_uint, &packet_buff_gpu_addr);
+-
+- if (status != 0)
+- break;
+-
+- memset(packet_buff_uint, 0, ib_size);
+-
+- packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+-
+- packets_vec[0].header.count = 1;
+- packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
+- packets_vec[0].header.type = PM4_TYPE_3;
+- packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+- packets_vec[0].bitfields2.insert_vmid = 1;
+- packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
+- packets_vec[1].bitfields2.insert_vmid = 0;
+- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+- packets_vec[2].bitfields2.insert_vmid = 0;
+- packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
+- packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+- packets_vec[3].bitfields2.insert_vmid = 1;
+-
+- for (i = 0; i < adw_info->num_watch_points; i++) {
+-
+- dbgdev_address_watch_set_registers(
+- adw_info,
+- &addrHi,
+- &addrLo,
+- &cntl,
++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
++ pr_debug("\t\t%20s %08x\n", "register index :", i);
++ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
++ pr_debug("\t\t%20s %p\n", "Add ptr is :",
++ adw_info->watch_address);
++ pr_debug("\t\t%20s %08llx\n", "Add is :",
++ adw_info->watch_address[i]);
++ pr_debug("\t\t%20s %08x\n", "Address Low is :",
++ addrLo.bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "Address high is :",
++ addrHi.bitfields.addr);
++ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
++ cntl.bitfields.mask);
++ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
++ cntl.bitfields.mode);
++ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
++ cntl.bitfields.vmid);
++ pr_debug("\t\t%20s %08x\n", "Control atc is :",
++ cntl.bitfields.atc);
++ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
++
++ aw_reg_add_dword =
++ dbgdev->dev->kfd2kgd
++ ->address_watch_get_offset(
++ dbgdev->dev->kgd,
+ i,
+- vmid,
+- dbgdev->dev->device_info->asic_family
+- );
+-
+- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+- pr_debug("\t\t%20s %08x\n", "register index :", i);
+- pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+- pr_debug("\t\t%20s %p\n", "Add ptr is :", adw_info->watch_address);
+- pr_debug("\t\t%20s %08llx\n", "Add is :", adw_info->watch_address[i]);
+- pr_debug("\t\t%20s %08x\n", "Address Low is :", addrLo.bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "Address high is :", addrHi.bitfields.addr);
+- pr_debug("\t\t%20s %08x\n", "Control Mask is :", cntl.bitfields.mask);
+- pr_debug("\t\t%20s %08x\n", "Control Mode is :", cntl.bitfields.mode);
+- pr_debug("\t\t%20s %08x\n", "Control Vmid is :", cntl.bitfields.vmid);
+- pr_debug("\t\t%20s %08x\n", "Control atc is :", cntl.bitfields.atc);
+- pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+-
+- aw_reg_add_dword =
+- dbgdev->dev->kfd2kgd
+- ->address_watch_get_offset(
+- dbgdev->dev->kgd,
+- i,
+- ADDRESS_WATCH_REG_CNTL);
++ ADDRESS_WATCH_REG_CNTL);
+
+- packets_vec[0].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE;
+- packets_vec[0].reg_data[0] = cntl.u32All;
+-
+- aw_reg_add_dword =
+- dbgdev->dev->kfd2kgd
+- ->address_watch_get_offset(
+- dbgdev->dev->kgd,
+- i,
+- ADDRESS_WATCH_REG_ADDR_HI);
++ packets_vec[0].bitfields2.reg_offset =
++ aw_reg_add_dword - CONFIG_REG_BASE;
++ packets_vec[0].reg_data[0] = cntl.u32All;
+
++ aw_reg_add_dword =
++ dbgdev->dev->kfd2kgd
++ ->address_watch_get_offset(
++ dbgdev->dev->kgd,
++ i,
++ ADDRESS_WATCH_REG_ADDR_HI);
+
+- packets_vec[1].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE;
+- packets_vec[1].reg_data[0] = addrHi.u32All;
+
+- aw_reg_add_dword =
+- dbgdev->dev->kfd2kgd
+- ->address_watch_get_offset(
+- dbgdev->dev->kgd,
+- i,
+- ADDRESS_WATCH_REG_ADDR_LO);
++ packets_vec[1].bitfields2.reg_offset =
++ aw_reg_add_dword - CONFIG_REG_BASE;
++ packets_vec[1].reg_data[0] = addrHi.u32All;
+
++ aw_reg_add_dword =
++ dbgdev->dev->kfd2kgd
++ ->address_watch_get_offset(
++ dbgdev->dev->kgd,
++ i,
++ ADDRESS_WATCH_REG_ADDR_LO);
+
+- packets_vec[2].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE;
+- packets_vec[2].reg_data[0] = addrLo.u32All;
+
+- /* enable watch flag if address is not zero*/
+- if (adw_info->watch_address[i] > 0)
+- cntl.bitfields.valid = 1;
+- else
+- cntl.bitfields.valid = 0;
++ packets_vec[2].bitfields2.reg_offset =
++ aw_reg_add_dword - CONFIG_REG_BASE;
++ packets_vec[2].reg_data[0] = addrLo.u32All;
+
+- aw_reg_add_dword =
+- dbgdev->dev->kfd2kgd
+- ->address_watch_get_offset(
+- dbgdev->dev->kgd,
+- i,
+- ADDRESS_WATCH_REG_CNTL);
++ /* enable watch flag if address is not zero*/
++ if (adw_info->watch_address[i] > 0)
++ cntl.bitfields.valid = 1;
++ else
++ cntl.bitfields.valid = 0;
+
++ aw_reg_add_dword =
++ dbgdev->dev->kfd2kgd
++ ->address_watch_get_offset(
++ dbgdev->dev->kgd,
++ i,
++ ADDRESS_WATCH_REG_CNTL);
+
+- packets_vec[3].bitfields2.reg_offset = aw_reg_add_dword - CONFIG_REG_BASE;
+- packets_vec[3].reg_data[0] = cntl.u32All;
+
+- status = dbgdev_diq_submit_ib(
+- dbgdev,
+- adw_info->process->pasid,
+- packet_buff_gpu_addr,
+- packet_buff_uint,
+- ib_size, true);
++ packets_vec[3].bitfields2.reg_offset =
++ aw_reg_add_dword - CONFIG_REG_BASE;
++ packets_vec[3].reg_data[0] = cntl.u32All;
+
+- if (status != 0) {
+- pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n", __func__);
+- break;
+- }
++ status = dbgdev_diq_submit_ib(
++ dbgdev,
++ adw_info->process->pasid,
++ packet_buff_gpu_addr,
++ packet_buff_uint,
++ ib_size, true);
+
++ if (status != 0) {
++ pr_debug("Error! kfd: In func %s >> failed to submit DIQ packet\n",
++ __func__);
++ return status;
+ }
+
+- } while (false);
++ }
+
+ return status;
+
+@@ -525,26 +539,30 @@ static int dbgdev_wave_control_set_registers(
+ int status = 0;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
++ struct HsaDbgWaveMsgAMDGen2 *pMsg;
+
+ reg_sq_cmd.u32All = 0;
+-
+ reg_gfx_index.u32All = 0;
++ pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
+
+ switch (wac_info->mode) {
+- case HSA_DBG_WAVEMODE_SINGLE: /* Send command to single wave */
+- /*limit access to the process waves only,by setting vmid check */
++ /* Send command to single wave */
++ case HSA_DBG_WAVEMODE_SINGLE:
++ /* limit access to the process waves only,by setting vmid check
++ */
+ reg_sq_cmd.bits.check_vmid = 1;
+- reg_sq_cmd.bits.simd_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.SIMD;
+- reg_sq_cmd.bits.wave_id = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.WaveId;
++ reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
++ reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
+
+- reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray;
+- reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine;
+- reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU;
++ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
++ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
++ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: /* Send command to all waves with matching VMID */
++ /* Send command to all waves with matching VMID */
++ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
+
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+@@ -554,14 +572,15 @@ static int dbgdev_wave_control_set_registers(
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+ break;
+
+- case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: /* Send command to all CU waves with matching VMID */
++ /* Send command to all CU waves with matching VMID */
++ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
+
+ reg_sq_cmd.bits.check_vmid = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+- reg_gfx_index.bits.sh_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderArray;
+- reg_gfx_index.bits.se_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.ShaderEngine;
+- reg_gfx_index.bits.instance_index = wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.ui32.HSACU;
++ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
++ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
++ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+@@ -636,91 +655,98 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+ reg_sq_cmd.u32All = 0;
+- do {
+
+- status = dbgdev_wave_control_set_registers(wac_info,
+- &reg_sq_cmd,
+- &reg_gfx_index,
+- dbgdev->dev->device_info->asic_family);
++ status = dbgdev_wave_control_set_registers(wac_info,
++ &reg_sq_cmd,
++ &reg_gfx_index,
++ dbgdev->dev->device_info->asic_family);
+
+- /* we do not control the VMID in DIQ,so reset it to a known value */
+- reg_sq_cmd.bits.vm_id = 0;
+- if (status != 0)
+- break;
+- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+-
+- pr_debug("\t\t mode is: %u\n", wac_info->mode);
+- pr_debug("\t\t operand is: %u\n", wac_info->operand);
+- pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+- pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+- pr_debug("\t\t vmid is: N/A\n");
+-
+- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+-
+- pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes);
+- pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index);
+- pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes);
+- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+- pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes);
+-
+- pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+-
+- status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
+- ib_size / sizeof(uint32_t),
+- &packet_buff_uint, &packet_buff_gpu_addr);
+-
+- if (status != 0)
+- break;
+-
+- memset(packet_buff_uint, 0, ib_size);
+-
+- packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
+- packets_vec[0].header.count = 1;
+- packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
+- packets_vec[0].header.type = PM4_TYPE_3;
+- packets_vec[0].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE;
+- packets_vec[0].bitfields2.insert_vmid = 0;
+- packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
+-
+- packets_vec[1].header.count = 1;
+- packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
+- packets_vec[1].header.type = PM4_TYPE_3;
+- packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE;
+- packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
+- packets_vec[1].bitfields2.insert_vmid = 1;
+- packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
+-
+- /* Restore the GRBM_GFX_INDEX register */
+-
+- reg_gfx_index.u32All = 0;
+- reg_gfx_index.bits.sh_broadcast_writes = 1;
+- reg_gfx_index.bits.instance_broadcast_writes = 1;
+- reg_gfx_index.bits.se_broadcast_writes = 1;
++ /* we do not control the VMID in DIQ, so reset it to a
++ * known value
++ */
++ reg_sq_cmd.bits.vm_id = 0;
++ if (status != 0)
++ return status;
++ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
++
++ pr_debug("\t\t mode is: %u\n", wac_info->mode);
++ pr_debug("\t\t operand is: %u\n", wac_info->operand);
++ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
++ pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
++ pr_debug("\t\t vmid is: N/A\n");
++
++ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
++ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
++ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
++ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
++ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
++ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
++ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
++
++ pr_debug("\t\t ibw is : %u\n",
++ reg_gfx_index.bitfields.instance_broadcast_writes);
++ pr_debug("\t\t ii is : %u\n",
++ reg_gfx_index.bitfields.instance_index);
++ pr_debug("\t\t sebw is : %u\n",
++ reg_gfx_index.bitfields.se_broadcast_writes);
++ pr_debug("\t\t se_ind is : %u\n",
++ reg_gfx_index.bitfields.se_index);
++ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
++ pr_debug("\t\t sbw is : %u\n",
++ reg_gfx_index.bitfields.sh_broadcast_writes);
++
++ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
++
++ status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
++ ib_size / sizeof(uint32_t),
++ &packet_buff_uint, &packet_buff_gpu_addr);
++
++ if (status != 0)
++ return status;
++
++ memset(packet_buff_uint, 0, ib_size);
++
++ packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
++ packets_vec[0].header.count = 1;
++ packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
++ packets_vec[0].header.type = PM4_TYPE_3;
++ packets_vec[0].bitfields2.reg_offset =
++ GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE;
++ packets_vec[0].bitfields2.insert_vmid = 0;
++ packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
++
++ packets_vec[1].header.count = 1;
++ packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
++ packets_vec[1].header.type = PM4_TYPE_3;
++ packets_vec[1].bitfields2.reg_offset =
++ SQ_CMD / (sizeof(uint32_t)) - CONFIG_REG_BASE;
++ packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
++ packets_vec[1].bitfields2.insert_vmid = 1;
++ packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
++
++ /* Restore the GRBM_GFX_INDEX register */
+
++ reg_gfx_index.u32All = 0;
++ reg_gfx_index.bits.sh_broadcast_writes = 1;
++ reg_gfx_index.bits.instance_broadcast_writes = 1;
++ reg_gfx_index.bits.se_broadcast_writes = 1;
+
+- packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+- packets_vec[2].bitfields2.reg_offset = GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE;
+- packets_vec[2].bitfields2.insert_vmid = 0;
+- packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+- status = dbgdev_diq_submit_ib(
+- dbgdev,
+- wac_info->process->pasid,
+- packet_buff_gpu_addr,
+- packet_buff_uint,
+- ib_size, false);
++ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
++ packets_vec[2].bitfields2.reg_offset =
++ GRBM_GFX_INDEX / (sizeof(uint32_t)) - USERCONFIG_REG_BASE;
++ packets_vec[2].bitfields2.insert_vmid = 0;
++ packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+- if (status != 0)
+- pr_debug("%s\n", " Critical Error ! Submit diq packet failed ");
++ status = dbgdev_diq_submit_ib(
++ dbgdev,
++ wac_info->process->pasid,
++ packet_buff_gpu_addr,
++ packet_buff_uint,
++ ib_size, false);
+
+- } while (false);
++ if (status != 0)
++ pr_debug("%s\n", " Critical Error ! Submit diq packet failed ");
+
+ return status;
+ }
+@@ -758,23 +784,37 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
+ pr_debug("\t\t mode is: %u\n", wac_info->mode);
+ pr_debug("\t\t operand is: %u\n", wac_info->operand);
+ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+- pr_debug("\t\t msg value is: %u\n", wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
++ pr_debug("\t\t msg value is: %u\n",
++ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+ pr_debug("\t\t vmid is: %u\n", vmid);
+
+- pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+- pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+- pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+- pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+- pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+- pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+- pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+-
+- pr_debug("\t\t ibw is : %u\n", reg_gfx_index.bitfields.instance_broadcast_writes);
+- pr_debug("\t\t ii is : %u\n", reg_gfx_index.bitfields.instance_index);
+- pr_debug("\t\t sebw is : %u\n", reg_gfx_index.bitfields.se_broadcast_writes);
+- pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+- pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+- pr_debug("\t\t sbw is : %u\n", reg_gfx_index.bitfields.sh_broadcast_writes);
++ pr_debug("\t\t chk_vmid is : %u\n",
++ reg_sq_cmd.bitfields.check_vmid);
++ pr_debug("\t\t command is : %u\n",
++ reg_sq_cmd.bitfields.cmd);
++ pr_debug("\t\t queue id is : %u\n",
++ reg_sq_cmd.bitfields.queue_id);
++ pr_debug("\t\t simd id is : %u\n",
++ reg_sq_cmd.bitfields.simd_id);
++ pr_debug("\t\t mode is : %u\n",
++ reg_sq_cmd.bitfields.mode);
++ pr_debug("\t\t vm_id is : %u\n",
++ reg_sq_cmd.bitfields.vm_id);
++ pr_debug("\t\t wave_id is : %u\n",
++ reg_sq_cmd.bitfields.wave_id);
++
++ pr_debug("\t\t ibw is : %u\n",
++ reg_gfx_index.bitfields.instance_broadcast_writes);
++ pr_debug("\t\t ii is : %u\n",
++ reg_gfx_index.bitfields.instance_index);
++ pr_debug("\t\t sebw is : %u\n",
++ reg_gfx_index.bitfields.se_broadcast_writes);
++ pr_debug("\t\t se_ind is : %u\n",
++ reg_gfx_index.bitfields.se_index);
++ pr_debug("\t\t sh_ind is : %u\n",
++ reg_gfx_index.bitfields.sh_index);
++ pr_debug("\t\t sbw is : %u\n",
++ reg_gfx_index.bitfields.sh_broadcast_writes);
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+@@ -814,7 +854,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+- * to check which VMID the current process is mapped to. */
++ * to check which VMID the current process is mapped to.
++ */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+@@ -854,7 +895,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+ }
+
+ void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+- DBGDEV_TYPE type)
++ enum DBGDEV_TYPE type)
+ {
+ pdbgdev->dev = pdev;
+ pdbgdev->kq = NULL;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
+index 82f48ff..75883e0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
+@@ -90,17 +90,14 @@ enum SQ_IND_CMD_CMD {
+ SQ_IND_CMD_CMD_DEBUG = 0x00000004,
+ SQ_IND_CMD_CMD_TRAP = 0x00000005
+ };
+-/*
+- * SQ_IND_CMD_MODE enum
+- */
+
+-typedef enum SQ_IND_CMD_MODE {
++enum SQ_IND_CMD_MODE {
+ SQ_IND_CMD_MODE_SINGLE = 0x00000000,
+ SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
+ SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
+ SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
+ SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
+-} SQ_IND_CMD_MODE;
++};
+
+ union SQ_IND_INDEX_BITS {
+ struct {
+@@ -208,7 +205,7 @@ union TCP_WATCH_ADDR_L_BITS {
+ };
+
+ enum {
+- QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
++ QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
+ QUEUESTATE__ACTIVE_COMPLETION_PENDING,
+ QUEUESTATE__ACTIVE
+ };
+@@ -226,6 +223,7 @@ union ULARGE_INTEGER {
+ #define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
+
+
+-void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, DBGDEV_TYPE type);
++void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
++ enum DBGDEV_TYPE type);
+
+ #endif /* KFD_DBGDEV_H_ */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+index 426f776..603cdc3 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+@@ -37,16 +37,12 @@
+
+ static DEFINE_MUTEX(kfd_dbgmgr_mutex);
+
+-struct mutex *
+-get_dbgmgr_mutex(void)
++struct mutex *get_dbgmgr_mutex(void)
+ {
+ return &kfd_dbgmgr_mutex;
+ }
+
+-/*===========================================================================*/
+-
+-static void
+-kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
++static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+ {
+ kfree(pmgr->dbgdev);
+ pmgr->dbgdev = NULL;
+@@ -54,10 +50,7 @@ kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+ pmgr->dev = NULL;
+ }
+
+-/*===========================================================================*/
+-
+-void
+-kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
++void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+ {
+ if (pmgr != NULL) {
+ kfd_dbgmgr_uninitialize(pmgr);
+@@ -66,21 +59,18 @@ kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+ }
+ }
+
+-/*===========================================================================*/
+-
+-bool
+-kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
++bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+ {
+- DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
++ enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
+ struct kfd_dbgmgr *new_buff;
+
+ BUG_ON(pdev == NULL);
+ BUG_ON(!pdev->init_complete);
+
+ new_buff = kfd_alloc_struct(new_buff);
+- if (!new_buff)
+- {
+- dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n", __func__);
++ if (!new_buff) {
++ dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgmgr instance\n",
++ __func__);
+ return false;
+ }
+
+@@ -88,7 +78,8 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+ new_buff->dev = pdev;
+ new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
+ if (!new_buff->dbgdev) {
+- dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n", __func__);
++ dev_err(NULL, "Error! kfd: In func %s >> failed to allocate dbgdev\n",
++ __func__);
+ kfree(new_buff);
+ return false;
+ }
+@@ -108,195 +99,176 @@ kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+ long
+ kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+ {
+- long status = 0;
+-
+- do {
+-
+- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) {
+- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__);
+- /* Invalid Pointer. */
+- status = -EINVAL;
+- break;
+- }
+- if (pmgr->pasid != 0) {
+- /* HW debugger is already active. */
+- status = -EBUSY;
+- break;
+- }
+-
+- /* remember pasid */
++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) {
++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n",
++ __func__);
++ /* Invalid Pointer. */
++ return -EINVAL;
++ }
++ if (pmgr->pasid != 0) {
++ /* HW debugger is already active. */
++ return -EBUSY;
++ }
+
+- pmgr->pasid = p->pasid;
++ /* remember pasid */
+
+- /* provide the pqm for diq generation */
++ pmgr->pasid = p->pasid;
+
+- pmgr->dbgdev->pqm = &p->pqm;
++ /* provide the pqm for diq generation */
+
+- /* activate the actual registering */
+- /* todo: you should lock with the process mutex here */
+- pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
+- /* todo: you should unlock with the process mutex here */
++ pmgr->dbgdev->pqm = &p->pqm;
+
+- } while (false);
++ /* activate the actual registering */
++ /* todo: you should lock with the process mutex here */
++ pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
++ /* todo: you should unlock with the process mutex here */
+
+- return status;
++ return 0;
+ }
+
+-/* ========================================================================== */
++/* ========================================================================= */
+
+ long
+ kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+ {
+
+- long status = 0;
+-
+- do {
+-
+- if ((pmgr == NULL) || (pmgr->dev == NULL)
+- || (pmgr->dbgdev == NULL) || (p == NULL)) {
+- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__);
+- /* Invalid Pointer */
+- status = -EINVAL;
+- break;
+- }
+- if (pmgr->pasid != p->pasid) {
+- /* Is the requests coming from the already registered process? */
+- status = -EINVAL;
+- break;
+- }
+-
+- /* todo: you should lock with the process mutex here */
++ if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) ||
++ (p == NULL)) {
++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n",
++ __func__);
++ /* Invalid Pointer */
++ return -EINVAL;
++ }
++ if (pmgr->pasid != p->pasid) {
++ /* Is the requests coming from the already registered
++ * process?
++ */
++ return -EINVAL;
++ }
+
+- pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
++ /* todo: you should lock with the process mutex here */
+
+- /* todo: you should unlock with the process mutex here */
++ pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
+
+- pmgr->pasid = 0;
++ /* todo: you should unlock with the process mutex here */
+
+- } while (false);
++ pmgr->pasid = 0;
+
+- return status;
++ return 0;
+ }
+
+-/* =========================================================================== */
++/* ========================================================================= */
+
+ long
+-kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info)
++kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
++ struct dbg_wave_control_info *wac_info)
+ {
+- long status = 0;
+-
+ dev_info(NULL, "kfd: In func %s\n", __func__);
+
+- do {
+-
+- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (wac_info == NULL)
+- || (wac_info->process == NULL)) {
+- /* Invalid Pointer */
+- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__);
+- status = -EINVAL;
+- break;
+- }
+- /* Is the requests coming from the already registered process? */
+- if (pmgr->pasid != wac_info->process->pasid) {
+- /* HW debugger support was not registered for requester process */
+- status = -EINVAL;
+- break;
+- }
+-
+- status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
+-
+- } while (false);
+-
+- return status;
++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!wac_info)
++ || (wac_info->process == NULL)) {
++ /* Invalid Pointer */
++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n",
++ __func__);
++ return -EINVAL;
++ }
++ /* Is the requests coming from the already registered
++ * process?
++ */
++ if (pmgr->pasid != wac_info->process->pasid) {
++ /* HW debugger support was not registered for
++ * requester process
++ */
++ return -EINVAL;
++ }
+
++ return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev,
++ wac_info);
+ }
+
+-/* =========================================================================== */
++/* ========================================================================= */
+
+ long
+-kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info)
++kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
++ struct dbg_address_watch_info *adw_info)
+ {
+- long status = 0;
+-
+ dev_info(NULL, "kfd: In func %s\n", __func__);
+
+- do {
+-
+- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL) || (adw_info == NULL)
+- || (adw_info->process == NULL)) {
+- /* Invalid Pointer */
+- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__);
+- status = -EINVAL;
+- break;
+- }
+- /* Is the requests coming from the already registered process? */
+- if (pmgr->pasid != adw_info->process->pasid) {
+- /* HW debugger support was not registered for requester process */
+- status = -EINVAL;
+- break;
+- }
+-
+- status = (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, adw_info);
+-
+- } while (false);
+-
+- return status;
++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev) || (!adw_info)
++ || (adw_info->process == NULL)) {
++ /* Invalid Pointer */
++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n",
++ __func__);
++ return -EINVAL;
++ }
++ /* Is the requests coming from the already registered
++ * process?
++ */
++ if (pmgr->pasid != adw_info->process->pasid) {
++ /* HW debugger support was not registered for
++ * requester process
++ */
++ return -EINVAL;
++ }
+
++ return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
++ adw_info);
+ }
+
+
+-/* =========================================================================== */
++/* ========================================================================= */
+ /*
+ * Handle abnormal process termination
+ * if we are in the midst of a debug session, we should kill all pending waves
+ * of the debugged process and unregister the process from the Debugger.
+ */
+ long
+-kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process)
++kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr,
++ struct kfd_process *process)
+ {
+ long status = 0;
+ struct dbg_wave_control_info wac_info;
+
+ dev_info(NULL, "kfd: In func %s\n", __func__);
+
+- do {
+-
+- if ((pmgr == NULL) || (pmgr->dev == NULL) || (pmgr->dbgdev == NULL)) {
+- /* Invalid Pointer */
+- dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n", __func__);
+- status = -EINVAL;
+- break;
+- }
+- /* first, we kill all the wavefronts of this process */
+-
+- wac_info.process = process;
+- wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
+- wac_info.operand = HSA_DBG_WAVEOP_KILL;
+- wac_info.trapId = 0x0; /* not used for the KILL */
+- wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0; /* not used for kill */
+- wac_info.dbgWave_msg.MemoryVA = NULL; /* not used for kill */
+-
+- status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, &wac_info);
+-
+- if (status != 0) {
+- dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n", __func__, status);
+- break;
+- }
+- if (pmgr->pasid == wac_info.process->pasid) {
+- /* if terminated process was registered for debug, then unregister it */
+- status = kfd_dbgmgr_unregister(pmgr, process);
+- pmgr->pasid = 0;
+- }
+- if (status != 0)
+- dev_info(NULL,
+- "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n",
+- __func__, status);
+-
+- } while (false);
++ if ((!pmgr) || (!pmgr->dev) || (!pmgr->dbgdev)) {
++ /* Invalid Pointer */
++ dev_info(NULL, "Error! kfd: In func %s >> Illegal pointers\n",
++ __func__);
++ return -EINVAL;
++ }
++ /* first, we kill all the wavefronts of this process */
++
++ wac_info.process = process;
++ wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
++ wac_info.operand = HSA_DBG_WAVEOP_KILL;
+
+- return status;
++ /* not used for KILL */
++ wac_info.trapId = 0x0;
++ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 0;
++ wac_info.dbgWave_msg.MemoryVA = NULL;
+
+-}
++ status = (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev,
++ &wac_info);
+
++ if (status != 0) {
++ dev_info(NULL, "Error! kfd: In func %s: wave control failed, status is: %ld\n",
++ __func__, status);
++ return status;
++ }
++ if (pmgr->pasid == wac_info.process->pasid) {
++ /* if terminated process was registered for debug,
++ * then unregister it
++ */
++ status = kfd_dbgmgr_unregister(pmgr, process);
++ pmgr->pasid = 0;
++ }
++ if (status != 0)
++ dev_info(NULL,
++ "Error! kfd: In func %s: unregister failed, status is: %ld debugger can not be reused\n",
++ __func__, status);
++
++ return status;
++}
+
+-/*///////////////////////////////////////////////////////////////////////////////////////// */
++/* ///////////////////////////////////////////////////////////////////////// */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+index 2b6484e..b9a769a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+@@ -26,242 +26,251 @@
+
+ #include "kfd_priv.h"
+
+-/*
+- * SQ_IND_CMD_CMD enum
+- */
+-
+-
+ /* must align with hsakmttypes definition. */
+ #pragma pack(push, 4)
+
+-typedef enum _HSA_DBG_WAVEOP {
+- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
+- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
+- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
+- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter debug mode */
+- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
++enum HSA_DBG_WAVEOP {
++ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
++ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
++ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
++ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */
++ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
+ HSA_DBG_NUM_WAVEOP = 5,
+ HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+-} HSA_DBG_WAVEOP;
++};
+
+-typedef enum _HSA_DBG_WAVEMODE {
+- HSA_DBG_WAVEMODE_SINGLE = 0, /* send command to a single wave */
+- /* Broadcast to all wavefronts of all processes is not supported for HSA user mode */
+- HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, /* send to waves within current process */
+- HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, /* send to waves within current process on CU */
++enum HSA_DBG_WAVEMODE {
++ /* send command to a single wave */
++ HSA_DBG_WAVEMODE_SINGLE = 0,
++ /* Broadcast to all wavefronts of all processes is not supported for
++ * HSA user mode
++ */
++
++ /* send to waves within current process */
++ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
++ /* send to waves within current process on CU */
++ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
+ HSA_DBG_NUM_WAVEMODE = 3,
+ HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+-} HSA_DBG_WAVEMODE;
++};
+
+-typedef enum _HSA_DBG_WAVEMSG_TYPE {
++enum HSA_DBG_WAVEMSG_TYPE {
+ HSA_DBG_WAVEMSG_AUTO = 0,
+ HSA_DBG_WAVEMSG_USER = 1,
+ HSA_DBG_WAVEMSG_ERROR = 2,
+ HSA_DBG_NUM_WAVEMSG,
+ HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+-} HSA_DBG_WAVEMSG_TYPE;
++};
+
+-typedef enum _HSA_DBG_WATCH_MODE {
+- HSA_DBG_WATCH_READ = 0, /* Read operations only */
+- HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
+- HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
+- HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
++enum HSA_DBG_WATCH_MODE {
++ HSA_DBG_WATCH_READ = 0, /* Read operations only */
++ HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
++ HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
++ HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
+ HSA_DBG_WATCH_NUM,
+ HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+-} HSA_DBG_WATCH_MODE;
++};
+
+ /* This structure is hardware specific and may change in the future */
+-typedef struct _HsaDbgWaveMsgAMDGen2 {
++struct HsaDbgWaveMsgAMDGen2 {
+ union {
+ struct {
+- uint32_t UserData:8; /* user data */
+- uint32_t ShaderArray:1; /* Shader array */
+- uint32_t Priv:1; /* Privileged */
+- uint32_t Reserved0:4; /* This field is reserved, should be 0 */
+- uint32_t WaveId:4; /* wave id */
+- uint32_t SIMD:2; /* SIMD id */
+- uint32_t HSACU:4; /* Compute unit */
+- uint32_t ShaderEngine:2; /* Shader engine */
+- uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
+- uint32_t Reserved1:4; /* This field is reserved, should be 0 */
++ uint32_t UserData:8; /* user data */
++ uint32_t ShaderArray:1; /* Shader array */
++ uint32_t Priv:1; /* Privileged */
++ uint32_t Reserved0:4; /* Reserved, should be 0 */
++ uint32_t WaveId:4; /* wave id */
++ uint32_t SIMD:2; /* SIMD id */
++ uint32_t HSACU:4; /* Compute unit */
++ uint32_t ShaderEngine:2;/* Shader engine */
++ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
++ uint32_t Reserved1:4; /* Reserved, should be 0 */
+ } ui32;
+ uint32_t Value;
+ };
+
+ uint32_t Reserved2;
+
+-} HsaDbgWaveMsgAMDGen2;
++};
+
+-typedef union _HsaDbgWaveMessageAMD {
+- HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+- /* for future HsaDbgWaveMsgAMDGen3; */
+-} HsaDbgWaveMessageAMD;
++union HsaDbgWaveMessageAMD {
++ struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
++ /* for future HsaDbgWaveMsgAMDGen3; */
++};
+
+-typedef struct _HsaDbgWaveMessage {
+- void *MemoryVA; /* ptr to associated host-accessible data */
+- HsaDbgWaveMessageAMD DbgWaveMsg;
+-} HsaDbgWaveMessage;
++struct HsaDbgWaveMessage {
++ void *MemoryVA; /* ptr to associated host-accessible data */
++ union HsaDbgWaveMessageAMD DbgWaveMsg;
++};
+
+ /* TODO: This definitions to be MOVED to kfd_event, once it is implemented.
++ *
++ * HSA sync primitive, Event and HW Exception notification API definitions.
++ * The API functions allow the runtime to define a so-called sync-primitive,
++ * a SW object combining a user-mode provided "syncvar" and a scheduler event
++ * that can be signaled through a defined GPU interrupt. A syncvar is
++ * a process virtual memory location of a certain size that can be accessed
++ * by CPU and GPU shader code within the process to set and query the content
++ * within that memory. The definition of the content is determined by the HSA
++ * runtime and potentially GPU shader code interfacing with the HSA runtime.
++ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
++ * in the user mode instruction stream. The OS scheduler event is typically
++ * associated and signaled by an interrupt issued by the GPU, but other HSA
++ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
++ * by the KFD by this mechanism, too.
++ */
+
+- HSA sync primitive, Event and HW Exception notification API definitions
+- The API functions allow the runtime to define a so-called sync-primitive, a SW object
+- combining a user-mode provided "syncvar" and a scheduler event that can be signaled
+- through a defined GPU interrupt. A syncvar is a process virtual memory location of
+- a certain size that can be accessed by CPU and GPU shader code within the process to set
+- and query the content within that memory. The definition of the content is determined by
+- the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
+- The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
+- user mode instruction stream. The OS scheduler event is typically associated and
+- signaled by an interrupt issued by the GPU, but other HSA system interrupt conditions
+- from other HW (e.g. IOMMUv2) may besurfaced by the KFD by this mechanism, too. */
+-
+-/* these are the new definitions for events */
+-
+-typedef enum _HSA_EVENTTYPE {
+- HSA_EVENTTYPE_SIGNAL = 0, /* /user-mode generated GPU signal */
+- HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
+- HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change( start/stop ) */
+- HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
+- HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
+- HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
+- HSA_EVENTTYPE_PROFILE_EVENT = 6, /* GPU signal for profiling */
+- HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state (EOP pm4) */
+- /* ... */
++/* these are the new definitions for events */
++enum HSA_EVENTTYPE {
++ HSA_EVENTTYPE_SIGNAL = 0, /* User-mode generated GPU signal */
++ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
++ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
++ * ( start/stop )
++ */
++ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
++ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
++ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
++ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
++ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
++ *(EOP pm4)
++ */
++ /* ... */
+ HSA_EVENTTYPE_MAXID,
+ HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+-} HSA_EVENTTYPE;
+-
+-typedef uint32_t HSA_EVENTID;
+-
+-/* Subdefinitions for various event types: Syncvar */
++};
+
+-typedef struct _HsaSyncVar {
++/* Subdefinitions for various event types: Syncvar */
++struct HsaSyncVar {
+ union {
+- void *UserData; /* pointer to user mode data */
+- uint64_t UserDataPtrValue; /* 64bit compatibility of value */
++ void *UserData; /* pointer to user mode data */
++ uint64_t UserDataPtrValue; /* 64bit compatibility of value */
+ } SyncVar;
+ uint64_t SyncVarSize;
+-} HsaSyncVar;
+-
+-/*
+- Subdefinitions for various event types: NodeChange
+-*/
++};
+
+-typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS {
++/* Subdefinitions for various event types: NodeChange */
++enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
+ HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+ HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+ HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+-} HSA_EVENTTYPE_NODECHANGE_FLAGS;
++};
+
+-typedef struct _HsaNodeChange {
+- HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; /* HSA node added/removed on the platform */
+-} HsaNodeChange;
++struct HsaNodeChange {
++ /* HSA node added/removed on the platform */
++ enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
++};
+
+-/*
+- Sub-definitions for various event types: DeviceStateChange
+-*/
++/* Sub-definitions for various event types: DeviceStateChange */
+
+-typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
+- HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, /* device started (and available) */
+- HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, /* device stopped (i.e. unavailable) */
++enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
++ /* device started (and available) */
++ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
++ /* device stopped (i.e. unavailable) */
++ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+-} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
++};
+
+-typedef enum _HSA_DEVICE {
++enum HSA_DEVICE {
+ HSA_DEVICE_CPU = 0,
+ HSA_DEVICE_GPU = 1,
+ MAX_HSA_DEVICE = 2
+-} HSA_DEVICE;
++};
+
+-typedef struct _HsaDeviceStateChange {
++struct HsaDeviceStateChange {
+ uint32_t NodeId; /* F-NUMA node that contains the device */
+- HSA_DEVICE Device; /* device type: GPU or CPU */
+- HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
+-} HsaDeviceStateChange;
++ enum HSA_DEVICE Device; /* device type: GPU or CPU */
++ enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
++};
+
+-typedef struct _HsaEventData {
+- HSA_EVENTTYPE EventType; /* event type */
++struct HsaEventData {
++ enum HSA_EVENTTYPE EventType; /* event type */
+ union {
+- /* return data associated with HSA_EVENTTYPE_SIGNAL and other events */
+- HsaSyncVar SyncVar;
++ /* return data associated with HSA_EVENTTYPE_SIGNAL and other
++ * events
++ */
++ struct HsaSyncVar SyncVar;
+
+ /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
+- HsaNodeChange NodeChangeState;
++ struct HsaNodeChange NodeChangeState;
+
+ /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
+- HsaDeviceStateChange DeviceState;
++ struct HsaDeviceStateChange DeviceState;
+ } EventData;
+
+- /* the following data entries are internal to the KFD & thunk itself. */
++ /* the following data entries are internal to the KFD & thunk itself */
+
+- uint64_t HWData1; /* internal thunk store for Event data (OsEventHandle) */
+- uint64_t HWData2; /* internal thunk store for Event data (HWAddress) */
+- uint32_t HWData3; /* internal thunk store for Event data (HWData) */
+-} HsaEventData;
++ /* internal thunk store for Event data (OsEventHandle) */
++ uint64_t HWData1;
++ /* internal thunk store for Event data (HWAddress) */
++ uint64_t HWData2;
++ /* internal thunk store for Event data (HWData) */
++ uint32_t HWData3;
++};
+
+-typedef struct _HsaEventDescriptor {
+- HSA_EVENTTYPE EventType; /* event type to allocate */
+- uint32_t NodeId; /* H-NUMA node containing GPU device that is event source */
+- HsaSyncVar SyncVar; /* pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL */
+-} HsaEventDescriptor;
++struct HsaEventDescriptor {
++ /* event type to allocate */
++ enum HSA_EVENTTYPE EventType;
++ /* H-NUMA node containing GPU device that is event source */
++ uint32_t NodeId;
++ /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
++ * may be NULL
++ */
++ struct HsaSyncVar SyncVar;
++};
+
+-typedef struct _HsaEvent {
+- HSA_EVENTID EventId;
+- HsaEventData EventData;
+-} HsaEvent;
++struct HsaEvent {
++ uint32_t EventId;
++ struct HsaEventData EventData;
++};
+
+
+ #pragma pack(pop)
+
+-typedef enum _DBGDEV_TYPE {
++enum DBGDEV_TYPE {
+ DBGDEV_TYPE_ILLEGAL = 0,
+ DBGDEV_TYPE_NODIQ = 1,
+ DBGDEV_TYPE_DIQ = 2,
+ DBGDEV_TYPE_TEST = 3
+-} DBGDEV_TYPE;
++};
+
+ struct dbg_address_watch_info {
+ struct kfd_process *process;
+- HSA_DBG_WATCH_MODE *watch_mode;
++ enum HSA_DBG_WATCH_MODE *watch_mode;
+ uint64_t *watch_address;
+ uint64_t *watch_mask;
+- HsaEvent *watch_event;
++ struct HsaEvent *watch_event;
+ uint32_t num_watch_points;
+ };
+
+ struct dbg_wave_control_info {
+ struct kfd_process *process;
+ uint32_t trapId;
+- HSA_DBG_WAVEOP operand;
+- HSA_DBG_WAVEMODE mode;
+- HsaDbgWaveMessage dbgWave_msg;
++ enum HSA_DBG_WAVEOP operand;
++ enum HSA_DBG_WAVEMODE mode;
++ struct HsaDbgWaveMessage dbgWave_msg;
+ };
+
+ struct kfd_dbgdev {
+
+ /* The device that owns this data. */
+-
+ struct kfd_dev *dev;
+
+ /* kernel queue for DIQ */
+-
+ struct kernel_queue *kq;
+
+ /* a pointer to the pqm of the calling process */
+-
+ struct process_queue_manager *pqm;
+
+ /* type of debug device ( DIQ, non DIQ, etc. ) */
+-
+- DBGDEV_TYPE type;
++ enum DBGDEV_TYPE type;
+
+ /* virtualized function pointers to device dbg */
+-
+ int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
+ int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
+- int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, struct dbg_address_watch_info *adw_info);
+- int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, struct dbg_wave_control_info *wac_info);
++ int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
++ struct dbg_address_watch_info *adw_info);
++ int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
++ struct dbg_wave_control_info *wac_info);
+
+ };
+
+@@ -277,7 +286,10 @@ void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
+ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
+ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+-long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info);
+-long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info);
+-long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr, struct kfd_process *process);
+-#endif /* KFD_DBGMGR_H_ */
++long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
++ struct dbg_wave_control_info *wac_info);
++long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
++ struct dbg_address_watch_info *adw_info);
++long kfd_dbgmgr_abnormal_termination(struct kfd_dbgmgr *pmgr,
++ struct kfd_process *process);
++#endif /* KFD_DBGMGR_H_ */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+index ccf982d..24952c2 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+@@ -51,19 +51,19 @@ void kfd_debugfs_init(void)
+ return;
+ }
+
+- ent = debugfs_create_file("mqds", S_IFREG | S_IRUGO, debugfs_root,
++ ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_mqds_by_process,
+ &kfd_debugfs_fops);
+ if (ent == NULL)
+ pr_warn("Failed to create mqds in kfd debugfs\n");
+
+- ent = debugfs_create_file("hqds", S_IFREG | S_IRUGO, debugfs_root,
++ ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_hqds_by_device,
+ &kfd_debugfs_fops);
+ if (ent == NULL)
+ pr_warn("Failed to create hqds in kfd debugfs\n");
+
+- ent = debugfs_create_file("rls", S_IFREG | S_IRUGO, debugfs_root,
++ ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
+ kfd_debugfs_rls_by_device,
+ &kfd_debugfs_fops);
+ if (ent == NULL)
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 5d657a9..0abccc4 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -343,12 +343,13 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd)
+ dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
+- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
++ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
++ != 0);
+ return false;
+ }
+
+ pasid_limit = min_t(unsigned int,
+- (unsigned int)1 << kfd->device_info->max_pasid_bits,
++ (unsigned int)(1 << kfd->device_info->max_pasid_bits),
+ iommu_info.max_pasids);
+ /*
+ * last pasid is used for kernel queues doorbells
+@@ -718,9 +719,10 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+
+ spin_lock(&kfd->interrupt_lock);
+
+- if (kfd->interrupts_active
+- && interrupt_is_wanted(kfd, ih_ring_entry, patched_ihre, &is_patched)
+- && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry))
++ if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry,
++ patched_ihre, &is_patched)
++ && enqueue_ih_ring_entry(kfd,
++ is_patched ? patched_ihre : ih_ring_entry))
+ queue_work(kfd->ih_wq, &kfd->interrupt_work);
+
+ spin_unlock(&kfd->interrupt_lock);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 2a4a556..99844c5 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -1182,7 +1182,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ }
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+- dqm->sdma_queue_count++;
++ dqm->sdma_queue_count++;
+ /*
+ * Unconditionally increment this counter, regardless of the queue's
+ * type or whether the queue is active.
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+index 341adfa..bf24368 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+@@ -171,7 +171,8 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
+ }
+
+ /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+- * aperture addresses. */
++ * aperture addresses.
++ */
+ temp = get_sh_mem_bases_nybble_64(pdd);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+
+@@ -202,7 +203,8 @@ static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+ {
+ /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+- * aperture addresses. */
++ * aperture addresses.
++ */
+ q->properties.sdma_vm_addr =
+ ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+index 2629143..2f37b04 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+@@ -225,7 +225,8 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
+ }
+
+ /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+- * aperture addresses. */
++ * aperture addresses.
++ */
+ temp = get_sh_mem_bases_nybble_64(pdd);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+
+@@ -256,7 +257,8 @@ static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+ {
+ /* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+- * aperture addresses. */
++ * aperture addresses.
++ */
+ q->properties.sdma_vm_addr =
+ ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index be5abd5..18198d8 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -34,8 +34,8 @@
+
+ /* A task can only be on a single wait_queue at a time, but we need to support
+ * waiting on multiple events (any/all).
+- * Instead of each event simply having a wait_queue with sleeping tasks, it has a
+- * singly-linked list of tasks.
++ * Instead of each event simply having a wait_queue with sleeping tasks, it has
++ * a singly-linked list of tasks.
+ * A thread that wants to sleep creates an array of these, one for each event
+ * and adds one to each event's waiter chain.
+ */
+@@ -56,9 +56,9 @@ struct kfd_event_waiter {
+
+ /* Over-complicated pooled allocator for event notification slots.
+ *
+- * Each signal event needs a 64-bit signal slot where the signaler will write a 1
+- * before sending an interrupt.l (This is needed because some interrupts do not
+- * contain enough spare data bits to identify an event.)
++ * Each signal event needs a 64-bit signal slot where the signaler will write a
++ * 1 before sending an interrupt.l (This is needed because some interrupts do
++ * not contain enough spare data bits to identify an event.)
+ * We get whole pages from vmalloc and map them to the process VA.
+ * Individual signal events are then allocated a slot in a page.
+ */
+@@ -101,7 +101,10 @@ allocate_free_slot(struct kfd_process *process,
+
+ list_for_each_entry(page, &process->signal_event_pages, event_pages) {
+ if (page->free_slots > 0) {
+- unsigned int slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
++ unsigned int slot =
++ find_first_zero_bit(page->used_slot_bitmap,
++ SLOTS_PER_PAGE);
++
+ __set_bit(slot, page->used_slot_bitmap);
+ page->free_slots--;
+
+@@ -139,13 +142,14 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
+
+ page->free_slots = SLOTS_PER_PAGE;
+
+- backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
++ backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+ if (!backing_store)
+ goto fail_alloc_signal_store;
+
+ /* prevent user-mode info leaks */
+- memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, KFD_SIGNAL_EVENT_LIMIT * 8);
++ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
++ KFD_SIGNAL_EVENT_LIMIT * 8);
+ page->kernel_address = backing_store;
+
+ /* Set bits of debug events to prevent allocation */
+@@ -213,7 +217,7 @@ allocate_signal_page_dgpu(struct kfd_process *p,
+ my_page->user_address = NULL;
+ my_page->free_slots = SLOTS_PER_PAGE;
+ if (list_empty(&p->signal_event_pages))
+- my_page->page_index = 0;
++ my_page->page_index = 0;
+ else
+ my_page->page_index = list_tail_entry(&p->signal_event_pages,
+ struct signal_page,
+@@ -284,7 +288,8 @@ static void release_event_notification_slot(struct signal_page *page,
+ page->free_slots++;
+
+ /* We don't free signal pages, they are retained by the process
+- * and reused until it exits. */
++ * and reused until it exits.
++ */
+ }
+
+ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
+@@ -292,7 +297,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
+ {
+ struct signal_page *page;
+
+- /* This is safe because we don't delete signal pages until the process exits. */
++ /* This is safe because we don't delete signal pages until the process
++ * exits.
++ */
+ list_for_each_entry(page, &p->signal_event_pages, event_pages)
+ if (page->page_index == page_index)
+ return page;
+@@ -300,7 +307,9 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
+ return NULL;
+ }
+
+-/* Assumes that p->event_mutex is held and of course that p is not going away (current or locked). */
++/* Assumes that p->event_mutex is held and of course that p is not going away
++ * (current or locked).
++ */
+ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
+ {
+ struct kfd_event *ev;
+@@ -321,27 +330,30 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
+ static u32 make_signal_event_id(struct signal_page *page,
+ unsigned int signal_slot_index)
+ {
+- return page->page_index | (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
++ return page->page_index |
++ (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
+ }
+
+ /* Produce a kfd event id for a nonsignal event.
+- * These are arbitrary numbers, so we do a sequential search through the hash table
+- * for an unused number.
++ * These are arbitrary numbers, so we do a sequential search through the hash
++ * table for an unused number.
+ */
+ static u32 make_nonsignal_event_id(struct kfd_process *p)
+ {
+ u32 id;
+
+ for (id = p->next_nonsignal_event_id;
+- id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL;
++ id < KFD_LAST_NONSIGNAL_EVENT_ID &&
++ lookup_event_by_id(p, id) != NULL;
+ id++)
+ ;
+
+ if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
+
+ /* What if id == LAST_NONSIGNAL_EVENT_ID - 1?
+- * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so the first loop
+- * fails immediately and we proceed with the wraparound loop below.
++ * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
++ * the first loop fails immediately and we proceed with the
++ * wraparound loop below.
+ */
+ p->next_nonsignal_event_id = id + 1;
+
+@@ -349,7 +361,8 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
+ }
+
+ for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+- id < KFD_LAST_NONSIGNAL_EVENT_ID && lookup_event_by_id(p, id) != NULL;
++ id < KFD_LAST_NONSIGNAL_EVENT_ID &&
++ lookup_event_by_id(p, id) != NULL;
+ id++)
+ ;
+
+@@ -357,10 +370,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
+ if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
+ p->next_nonsignal_event_id = id + 1;
+ return id;
+- } else {
+- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+- return 0;
+ }
++ p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
++ return 0;
+ }
+
+ static struct kfd_event *
+@@ -371,7 +383,8 @@ lookup_event_by_page_slot(struct kfd_process *p,
+ }
+
+ static int
+-create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev)
++create_signal_event(struct file *devkfd, struct kfd_process *p,
++ struct kfd_event *ev)
+ {
+ if ((ev->type == KFD_EVENT_TYPE_SIGNAL) &&
+ (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT)) {
+@@ -408,9 +421,11 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event
+ p->debug_event_count++;
+ }
+
+- ev->user_signal_address = &ev->signal_page->user_address[ev->signal_slot_index];
++ ev->user_signal_address =
++ &ev->signal_page->user_address[ev->signal_slot_index];
+
+- ev->event_id = make_signal_event_id(ev->signal_page, ev->signal_slot_index);
++ ev->event_id =
++ make_signal_event_id(ev->signal_page, ev->signal_slot_index);
+
+ pr_debug("signal event number %zu created with id %d, address %p\n",
+ p->signal_event_count, ev->event_id,
+@@ -420,7 +435,9 @@ create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event
+ }
+
+ /* No non-signal events are supported yet.
+- * We create them as events that never signal. Set event calls from user-mode are failed. */
++ * We create them as events that never signal. Set event calls from user-mode
++ * are failed.
++ */
+ static int
+ create_other_event(struct kfd_process *p, struct kfd_event *ev)
+ {
+@@ -456,7 +473,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
+ }
+ }
+
+- /* Abandon the list of waiters. Individual waiting threads will clean up their own data.*/
++ /* Abandon the list of waiters. Individual waiting threads will clean
++ * up their own data.
++ */
+ list_del(&ev->waiters);
+
+ hash_del(&ev->events);
+@@ -479,13 +498,15 @@ static void destroy_events(struct kfd_process *p)
+ destroy_event(p, ev);
+ }
+
+-/* We assume that the process is being destroyed and there is no need to unmap the pages
+- * or keep bookkeeping data in order. */
++/* We assume that the process is being destroyed and there is no need to unmap
++ * the pages or keep bookkeeping data in order.
++ */
+ static void shutdown_signal_pages(struct kfd_process *p)
+ {
+ struct signal_page *page, *tmp;
+
+- list_for_each_entry_safe(page, tmp, &p->signal_event_pages, event_pages) {
++ list_for_each_entry_safe(page, tmp, &p->signal_event_pages,
++ event_pages) {
+ if (page->user_address) {
+ free_pages((unsigned long)page->kernel_address,
+ get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+@@ -502,7 +523,8 @@ void kfd_event_free_process(struct kfd_process *p)
+
+ static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
+ {
+- return ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG;
++ return ev->type == KFD_EVENT_TYPE_SIGNAL ||
++ ev->type == KFD_EVENT_TYPE_DEBUG;
+ }
+
+ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
+@@ -519,6 +541,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
+ int ret = 0;
+
+ struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL);
++
+ if (!ev)
+ return -ENOMEM;
+
+@@ -648,7 +671,8 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
+
+ static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
+ {
+- page_slots(ev->signal_page)[ev->signal_slot_index] = UNSIGNALED_EVENT_SLOT;
++ page_slots(ev->signal_page)[ev->signal_slot_index] =
++ UNSIGNALED_EVENT_SLOT;
+ }
+
+ static bool is_slot_signaled(struct signal_page *page, unsigned int index)
+@@ -656,7 +680,8 @@ static bool is_slot_signaled(struct signal_page *page, unsigned int index)
+ return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
+ }
+
+-static void set_event_from_interrupt(struct kfd_process *p, struct kfd_event *ev)
++static void set_event_from_interrupt(struct kfd_process *p,
++ struct kfd_event *ev)
+ {
+ if (ev && event_can_be_gpu_signaled(ev)) {
+ acknowledge_signal(p, ev);
+@@ -674,6 +699,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+ * running so the lookup function increments the process ref count.
+ */
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
++
+ if (!p)
+ return; /* Presumably process exited. */
+
+@@ -686,19 +712,20 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+ ev = lookup_event_by_id(p, partial_id);
+ set_event_from_interrupt(p, ev);
+ } else {
+- /* Partial ID is in fact partial. For now we completely ignore it,
+- * but we could use any bits we did receive to search faster. */
++ /* Partial ID is in fact partial. For now we completely ignore
++ * it, but we could use any bits we did receive to search
++ * faster.
++ */
+ struct signal_page *page;
+- unsigned i;
++ unsigned int i;
+
+- list_for_each_entry(page, &p->signal_event_pages, event_pages) {
+- for (i = 0; i < SLOTS_PER_PAGE; i++) {
++ list_for_each_entry(page, &p->signal_event_pages, event_pages)
++ for (i = 0; i < SLOTS_PER_PAGE; i++)
+ if (is_slot_signaled(page, i)) {
+- ev = lookup_event_by_page_slot(p, page, i);
++ ev = lookup_event_by_page_slot(p,
++ page, i);
+ set_event_from_interrupt(p, ev);
+ }
+- }
+- }
+ }
+
+ mutex_unlock(&p->event_mutex);
+@@ -710,7 +737,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
+ struct kfd_event_waiter *event_waiters;
+ uint32_t i;
+
+- event_waiters = kmalloc(num_events * sizeof(struct kfd_event_waiter), GFP_KERNEL);
++ event_waiters = kmalloc_array(num_events,
++ sizeof(struct kfd_event_waiter), GFP_KERNEL);
+
+ if (event_waiters) {
+ for (i = 0; i < num_events; i++) {
+@@ -746,7 +774,8 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
+ struct kfd_event *ev = waiter->event;
+
+ /* Only add to the wait list if we actually need to
+- * wait on this event. */
++ * wait on this event.
++ */
+ if (!waiter->activated)
+ list_add(&waiter->waiters, &ev->waiters);
+ }
+@@ -783,8 +812,8 @@ static bool copy_signaled_event_data(uint32_t num_events,
+ if (event_waiters[i].activated &&
+ event_waiters[i].event->type == KFD_EVENT_TYPE_MEMORY)
+ if (copy_to_user(&data[event_waiters[i].input_index].memory_exception_data,
+- &event_waiters[i].event->memory_exception_data,
+- sizeof(struct kfd_hsa_memory_exception_data)))
++ &event_waiters[i].event->memory_exception_data,
++ sizeof(struct kfd_hsa_memory_exception_data)))
+ return false;
+
+ return true;
+@@ -803,7 +832,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
+
+ /* msecs_to_jiffies interprets all values above 2^31-1 as infinite,
+ * but we consider them finite.
+- * This hack is wrong, but nobody is likely to notice. */
++ * This hack is wrong, but nobody is likely to notice.
++ */
+ user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF);
+
+ return msecs_to_jiffies(user_timeout_ms) + 1;
+@@ -835,7 +865,8 @@ int kfd_wait_on_events(struct kfd_process *p,
+ mutex_lock(&p->event_mutex);
+
+ /* Set to something unreasonable - this is really
+- * just a bool for now. */
++ * just a bool for now.
++ */
+ *wait_result = KFD_WAIT_TIMEOUT;
+
+ event_waiters = alloc_event_waiters(num_events);
+@@ -889,10 +920,11 @@ int kfd_wait_on_events(struct kfd_process *p,
+
+ if (signal_pending(current)) {
+ /*
+- * This is wrong when a nonzero, non-infinite timeout is specified.
+- * We need to use ERESTARTSYS_RESTARTBLOCK, but struct restart_block
+- * contains a union with data for each user and it's in generic
+- * kernel code that I don't want to touch yet.
++ * This is wrong when a nonzero, non-infinite timeout
++ * is specified. We need to use
++ * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
++ * contains a union with data for each user and it's in
++ * generic kernel code that I don't want to touch yet.
+ */
+ ret = -ERESTARTSYS;
+ break;
+@@ -954,7 +986,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
+ page = lookup_signal_page_by_index(p, page_index);
+ if (!page) {
+ /* Probably KFD bug, but mmap is user-accessible. */
+- pr_debug("signal page could not be found for page_index %u\n", page_index);
++ pr_debug("signal page could not be found for page_index %u\n",
++ page_index);
+ return -EINVAL;
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+index 2fa5d32..a164fd5 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+@@ -426,5 +426,6 @@ int kfd_init_apertures(struct kfd_process *process)
+ void kfd_flush_tlb(struct kfd_dev *dev, uint32_t pasid)
+ {
+ const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
++
+ f2g->invalidate_tlbs(dev->kgd, pasid);
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+index c48fab5..258fdda 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+@@ -58,7 +58,8 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
+ int r;
+
+ r = kfifo_alloc(&kfd->ih_fifo,
+- KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
++ KFD_IH_NUM_ENTRIES *
++ kfd->device_info->ih_ring_entry_size,
+ GFP_KERNEL);
+ if (r) {
+ dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+@@ -159,7 +160,8 @@ static void interrupt_wq(struct work_struct *work)
+ sizeof(uint32_t))];
+
+ while (dequeue_ih_ring_entry(dev, ih_ring_entry))
+- dev->device_info->event_interrupt_class->interrupt_wq(dev, ih_ring_entry);
++ dev->device_info->event_interrupt_class->interrupt_wq(dev,
++ ih_ring_entry);
+ }
+
+ bool interrupt_is_wanted(struct kfd_dev *dev,
+@@ -167,7 +169,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
+ uint32_t *patched_ihre, bool *flag)
+ {
+ /* integer and bitwise OR so there is no boolean short-circuiting */
+- unsigned wanted = 0;
++ unsigned int wanted = 0;
+
+ wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+ ih_ring_entry, patched_ihre, flag);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+index b826689..4a67e76 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+@@ -244,7 +244,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
+
+ if (wptr + packet_size_in_dwords >= queue_size_dwords) {
+ /* make sure after rolling back to position 0, there is
+- * still enough space. */
++ * still enough space.
++ */
+ if (packet_size_in_dwords >= rptr) {
+ *buffer_ptr = NULL;
+ return -ENOMEM;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+index 6f12fe0..007a3ea 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+@@ -165,7 +165,7 @@ int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+- sizeof(struct pm4_mes_map_queues));
++ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index e6876f6..2126ec5 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -75,7 +75,7 @@ MODULE_PARM_DESC(send_sigterm,
+
+ static int amdkfd_init_completed;
+
+-int debug_largebar = 0;
++int debug_largebar;
+ module_param(debug_largebar, int, 0444);
+ MODULE_PARM_DESC(debug_largebar,
+ "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
+@@ -90,7 +90,8 @@ module_param_named(noretry, vega10_noretry, int, 0644);
+ MODULE_PARM_DESC(noretry,
+ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
+-int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
++int kgd2kfd_init(unsigned int interface_version,
++ const struct kgd2kfd_calls **g2f)
+ {
+ if (!amdkfd_init_completed)
+ return -EPROBE_DEFER;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+index 41d28b3..4dff1ec 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+@@ -249,7 +249,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
+ m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+ DEFAULT_MIN_AVAIL_SIZE;
+ m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+- if (atc_bit) {
++ if (atc_bit) {
+ m->cp_hqd_pq_control |= PQ_ATC_EN;
+ m->cp_hqd_ib_control |= IB_ATC_EN;
+ }
+@@ -268,9 +268,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
+
+ m->cp_hqd_vmid = q->vmid;
+
+- if (q->format == KFD_QUEUE_FORMAT_AQL) {
++ if (q->format == KFD_QUEUE_FORMAT_AQL)
+ m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
+- }
+
+ update_cu_mask(mm, mqd, q);
+ set_priority(m, q);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+index 0aeebc1..ddca15f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+@@ -396,6 +396,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+ {
+ struct vi_sdma_mqd *m;
++
+ BUG_ON(!mm || !mqd || !q);
+
+ m = get_sdma_mqd(mqd);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+index 6cfe7f1..b3f7d43 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+@@ -32,7 +32,8 @@ int kfd_pasid_init(void)
+ {
+ pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
+
+- pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
++ pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long),
++ GFP_KERNEL);
+ if (!pasid_bitmap)
+ return -ENOMEM;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+index 3fb8896..937c0ac 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+@@ -485,7 +485,6 @@ void kfd_init_peer_direct(void)
+ }
+
+ pr_info("amdkfd: PeerDirect support was initialized successfully\n");
+- return;
+ }
+
+ /**
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+index 05e692b..31cef21 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+@@ -28,14 +28,14 @@
+ #define PM4_MES_HEADER_DEFINED
+ union PM4_MES_TYPE_3_HEADER {
+ struct {
+- uint32_t reserved1:8; /* < reserved */
+- uint32_t opcode:8; /* < IT opcode */
+- uint32_t count:14; /* < number of DWORDs - 1
+- * in the information body.
+- */
+- uint32_t type:2; /* < packet identifier.
+- * It should be 3 for type 3 packets
+- */
++ /* reserved */
++ uint32_t reserved1:8;
++ /* IT opcode */
++ uint32_t opcode:8;
++ /* number of DWORDs - 1 in the information body */
++ uint32_t count:14;
++ /* packet identifier. It should be 3 for type 3 packets */
++ uint32_t type:2;
+ };
+ uint32_t u32all;
+ };
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+index 8cb3094..7c8d9b3 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+@@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+- uint32_t count : 14;/* < number of DWORDs - 1 in the
+- information body. */
+- uint32_t type : 2; /* < packet identifier.
+- It should be 3 for type 3 packets */
++ uint32_t count : 14;/* < Number of DWORDS - 1 in the
++ * information body
++ */
++ uint32_t type : 2; /* < packet identifier
++ * It should be 3 for type 3 packets
++ */
+ };
+ uint32_t u32All;
+ };
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index a81dfe7..fc5ba66 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -188,9 +188,11 @@ enum asic_family_type {
+ #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+
+ struct kfd_event_interrupt_class {
+- bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry,
+- uint32_t *patched_ihre, bool *patched_flag);
+- void (*interrupt_wq)(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
++ bool (*interrupt_isr)(struct kfd_dev *dev,
++ const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
++ bool *patched_flag);
++ void (*interrupt_wq)(struct kfd_dev *dev,
++ const uint32_t *ih_ring_entry);
+ };
+
+ struct kfd_device_info {
+@@ -401,13 +403,13 @@ enum KFD_QUEUE_PRIORITY {
+ * @write_ptr: Defines the number of dwords written to the ring buffer.
+ *
+ * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
+- * the queue ring buffer. This field should be similar to write_ptr and the user
+- * should update this field after he updated the write_ptr.
++ * the queue ring buffer. This field should be similar to write_ptr and the
++ * user should update this field after he updated the write_ptr.
+ *
+ * @doorbell_off: The doorbell offset in the doorbell pci-bar.
+ *
+- * @is_interop: Defines if this is a interop queue. Interop queue means that the
+- * queue can access both graphics and compute resources.
++ * @is_interop: Defines if this is a interop queue. Interop queue means that
++ * the queue can access both graphics and compute resources.
+ *
+ * @is_active: Defines if the queue is active or not.
+ *
+@@ -466,9 +468,10 @@ struct queue_properties {
+ * @properties: The queue properties.
+ *
+ * @mec: Used only in no cp scheduling mode and identifies to micro engine id
+- * that the queue should be execute on.
++ * that the queue should be execute on.
+ *
+- * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id.
++ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
++ * id.
+ *
+ * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
+ *
+@@ -552,7 +555,7 @@ struct qcm_process_device {
+ unsigned int queue_count;
+ unsigned int vmid;
+ bool is_debug;
+- unsigned evicted; /* eviction counter, 0=active */
++ unsigned int evicted; /* eviction counter, 0=active */
+ /*
+ * All the memory management data should be here too
+ */
+@@ -601,9 +604,11 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+ struct fence *fence);
+
+
+-/*8 byte handle containing GPU ID in the most significant 4 bytes and
+- * idr_handle in the least significant 4 bytes*/
+-#define MAKE_HANDLE(gpu_id, idr_handle) (((uint64_t)(gpu_id) << 32) + idr_handle)
++/* 8 byte handle containing GPU ID in the most significant 4 bytes and
++ * idr_handle in the least significant 4 bytes
++ */
++#define MAKE_HANDLE(gpu_id, idr_handle) \
++ (((uint64_t)(gpu_id) << 32) + idr_handle)
+ #define GET_GPU_ID(handle) (handle >> 32)
+ #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
+@@ -642,7 +647,8 @@ struct kfd_process_device {
+
+ uint64_t sh_hidden_private_base_vmid;
+
+- /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
++ /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid)
++ */
+ enum kfd_pdd_bound bound;
+
+ /* VM context for GPUVM allocations */
+@@ -711,7 +717,8 @@ struct kfd_process {
+
+ struct process_queue_manager pqm;
+
+- unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
++ unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
++ BITS_PER_LONG)];
+
+ /*Is the user space process 32 bit?*/
+ bool is_32bit_user_mode;
+@@ -720,7 +727,8 @@ struct kfd_process {
+ struct mutex event_mutex;
+ /* All events in process hashed by ID, linked on kfd_event.events. */
+ DECLARE_HASHTABLE(events, 4);
+- struct list_head signal_event_pages; /* struct slot_page_header.event_pages */
++ /* struct slot_page_header.event_pages */
++ struct list_head signal_event_pages;
+ u32 next_nonsignal_event_id;
+ size_t signal_event_count;
+ size_t debug_event_count;
+@@ -760,7 +768,7 @@ struct amdkfd_ioctl_desc {
+ void kfd_process_create_wq(void);
+ void kfd_process_destroy_wq(void);
+ struct kfd_process *kfd_create_process(struct file *filep);
+-struct kfd_process *kfd_get_process(const struct task_struct *);
++struct kfd_process *kfd_get_process(const struct task_struct *task);
+ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+ void kfd_unref_process(struct kfd_process *p);
+@@ -777,7 +785,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
+ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ struct kfd_process *p);
+
+-int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma);
++int kfd_reserved_mem_mmap(struct kfd_process *process,
++ struct vm_area_struct *vma);
+
+ /* KFD process API for creating and translating handles */
+ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+@@ -802,9 +811,11 @@ int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd);
+ int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd);
+
+ /* Process device data iterator */
+-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
+-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+- struct kfd_process_device *pdd);
++struct kfd_process_device *kfd_get_first_process_device_data(
++ struct kfd_process *p);
++struct kfd_process_device *kfd_get_next_process_device_data(
++ struct kfd_process *p,
++ struct kfd_process_device *pdd);
+ bool kfd_has_process_device_data(struct kfd_process *p);
+
+ /* PASIDs */
+@@ -938,7 +949,7 @@ struct packet_manager {
+ struct mutex lock;
+ bool allocated;
+ struct kfd_mem_obj *ib_buffer_obj;
+- unsigned ib_size_bytes;
++ unsigned int ib_size_bytes;
+
+ struct packet_manager_funcs *pmf;
+ };
+@@ -1046,7 +1057,8 @@ int kfd_wait_on_events(struct kfd_process *p,
+ uint32_t num_events, void __user *data,
+ bool all, uint32_t user_timeout_ms,
+ enum kfd_event_wait_result *wait_result);
+-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits);
++void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
++ uint32_t valid_id_bits);
+ #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
+ void kfd_signal_iommu_event(struct kfd_dev *dev,
+ unsigned int pasid, unsigned long address,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 601e551..025ee5d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -366,7 +366,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
+ /* No process locking is needed in this function, because the process
+ * is not findable any more. We must assume that no other thread is
+ * using it any more, otherwise we couldn't safely free the process
+- * stucture in the end. */
++ * structure in the end.
++ */
+ static void kfd_process_wq_release(struct work_struct *work)
+ {
+ struct kfd_process *p = container_of(work, struct kfd_process,
+@@ -448,7 +449,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+
+ /* Iterate over all process device data structures and if the pdd is in
+ * debug mode,we should first force unregistration, then we will be
+- * able to destroy the queues */
++ * able to destroy the queues
++ */
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ dev = pdd->dev;
+ mutex_lock(get_dbgmgr_mutex());
+@@ -611,8 +613,8 @@ static struct kfd_process *create_process(const struct task_struct *thread,
+ process->last_restore_timestamp = get_jiffies_64();
+
+ /* If PeerDirect interface was not detected try to detect it again
+- * in case if network driver was loaded later.
+- */
++ * in case if network driver was loaded later.
++ */
+ kfd_init_peer_direct();
+
+ return process;
+@@ -859,14 +861,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
+ }
+ #endif /* CONFIG_AMD_IOMMU_V2 */
+
+-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
++struct kfd_process_device *kfd_get_first_process_device_data(
++ struct kfd_process *p)
+ {
+ return list_first_entry(&p->per_device_data,
+ struct kfd_process_device,
+ per_device_list);
+ }
+
+-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
++struct kfd_process_device *kfd_get_next_process_device_data(
++ struct kfd_process *p,
+ struct kfd_process_device *pdd)
+ {
+ if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+@@ -880,7 +884,8 @@ bool kfd_has_process_device_data(struct kfd_process *p)
+ }
+
+ /* Create specific handle mapped to mem from process local memory idr
+- * Assumes that the process lock is held. */
++ * Assumes that the process lock is held.
++ */
+ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+ void *mem, uint64_t start,
+ uint64_t length,
+@@ -935,7 +940,8 @@ struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd,
+ }
+
+ /* Translate specific handle from process local memory idr
+- * Assumes that the process lock is held. */
++ * Assumes that the process lock is held.
++ */
+ void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
+ int handle)
+ {
+@@ -973,7 +979,8 @@ void *kfd_process_find_bo_from_interval(struct kfd_process *p,
+ }
+
+ /* Remove specific handle from process local memory idr
+- * Assumes that the process lock is held. */
++ * Assumes that the process lock is held.
++ */
+ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+ int handle)
+ {
+@@ -1042,7 +1049,8 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
+ return p;
+ }
+
+-int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vma)
++int kfd_reserved_mem_mmap(struct kfd_process *process,
++ struct vm_area_struct *vma)
+ {
+ unsigned long pfn, i;
+ int ret = 0;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+index 94e07ee..e4384ce 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+@@ -240,7 +240,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
+
+ case KFD_QUEUE_TYPE_COMPUTE:
+ /* check if there is over subscription */
+- if ((dev->dqm->sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
++ if ((dev->dqm->sched_policy ==
++ KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
+ ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
+ (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
+ pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c
+index 56bf9a2..2b3c300 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c
+@@ -179,7 +179,7 @@ void run_rdma_free_callback(struct kfd_bo *buf_obj)
+ * This function release resources previously allocated by get_pages() call.
+ *
+ * \param p_p2p_data - A pointer to pointer to amd_p2p_info entries
+- * allocated by get_pages() call.
++ * allocated by get_pages() call.
+ *
+ * \return 0 if operation was successful
+ */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+index 1d1992f..3e21aef 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+@@ -517,10 +517,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
+ sysfs_show_64bit_prop(buffer, "local_mem_size",
+ local_mem_info.local_mem_size_private +
+ local_mem_info.local_mem_size_public);
+- }
+- else
+- sysfs_show_64bit_prop(buffer, "local_mem_size",
+- (unsigned long long int) 0);
++ } else
++ sysfs_show_64bit_prop(buffer, "local_mem_size", 0ULL);
+
+ sysfs_show_32bit_prop(buffer, "fw_version",
+ dev->gpu->mec_fw_version);
+@@ -881,17 +879,20 @@ static void kfd_debug_print_topology(void)
+
+ down_read(&topology_lock);
+
+- dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
++ dev = list_last_entry(&topology_device_list,
++ struct kfd_topology_device, list);
+ if (dev) {
+- if (dev->node_props.cpu_cores_count && dev->node_props.simd_count) {
++ if (dev->node_props.cpu_cores_count &&
++ dev->node_props.simd_count) {
+ pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+- dev->node_props.device_id, dev->node_props.vendor_id);
+- }
+- else if (dev->node_props.cpu_cores_count)
++ dev->node_props.device_id,
++ dev->node_props.vendor_id);
++ } else if (dev->node_props.cpu_cores_count)
+ pr_info("Topology: Add CPU node\n");
+ else if (dev->node_props.simd_count)
+ pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+- dev->node_props.device_id, dev->node_props.vendor_id);
++ dev->node_props.device_id,
++ dev->node_props.vendor_id);
+ }
+ up_read(&topology_lock);
+ }
+@@ -903,7 +904,8 @@ static void kfd_update_system_properties(void)
+ struct kfd_topology_device *dev;
+
+ down_read(&topology_lock);
+- dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
++ dev = list_last_entry(&topology_device_list,
++ struct kfd_topology_device, list);
+ if (dev) {
+ sys_props.platform_id =
+ (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+@@ -1020,8 +1022,7 @@ int kfd_topology_init(void)
+ * topology_device_list
+ */
+
+- /* Initialize the head for the both the lists
+- */
++ /* Initialize the head for the both the lists */
+ INIT_LIST_HEAD(&topology_device_list);
+ INIT_LIST_HEAD(&temp_topology_device_list);
+ init_rwsem(&topology_lock);
+@@ -1031,7 +1032,8 @@ int kfd_topology_init(void)
+ /* Proximity domains in ACPI CRAT tables start counting at
+ * 0. The same should be true for virtual CRAT tables created
+ * at this stage. GPUs added later in kfd_topology_add_device
+- * use a counter. */
++ * use a counter.
++ */
+ proximity_domain = 0;
+
+ /*
+@@ -1091,12 +1093,12 @@ int kfd_topology_init(void)
+ kfd_update_system_properties();
+ kfd_debug_print_topology();
+ pr_info("Finished initializing topology\n");
+- }
+- else
++ } else
+ pr_err("Failed to update topology in sysfs ret=%d\n", ret);
+
+ /* For nodes with GPU, this information gets added
+- * when GPU is detected (kfd_topology_add_device). */
++ * when GPU is detected (kfd_topology_add_device).
++ */
+ if (cpu_only_node) {
+ /* Add additional information to CPU only node created above */
+ down_write(&topology_lock);
+@@ -1149,9 +1151,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
+ return hashout;
+ }
+ /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
+- * the GPU device is not already present in the topology device list
+- * then return NULL. This means a new topology device has to be
+- * created for this GPU.
++ * the GPU device is not already present in the topology device
++ * list then return NULL. This means a new topology device has to
++ * be created for this GPU.
+ * TODO: Rather than assiging @gpu to first topology device withtout
+ * gpu attached, it will better to have more stringent check.
+ */
+@@ -1244,9 +1246,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
+
+ /* Check to see if this gpu device exists in the topology_device_list.
+ * If so, assign the gpu to that device,
+- * else create a Virtual CRAT for this gpu device and then parse that CRAT
+- * to create a new topology device. Once created assign the gpu to that
+- * topology device
++ * else create a Virtual CRAT for this gpu device and then parse that
++ * CRAT to create a new topology device. Once created assign the gpu to
++ * that topology device
+ */
+ dev = kfd_assign_gpu(gpu);
+ if (!dev) {
+@@ -1265,8 +1267,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
+ kfd_topology_update_device_list(&temp_topology_device_list,
+ &topology_device_list);
+
+- /*
+- * Update the SYSFS tree, since we added another topology device
++ /* Update the SYSFS tree, since we added another topology
++ * device
+ */
+ res = kfd_topology_update_sysfs();
+ up_write(&topology_lock);
+@@ -1284,13 +1286,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
+ gpu->id = gpu_id;
+
+ /* TODO: Move the following lines to function
+- * kfd_add_non_crat_information */
++ * kfd_add_non_crat_information
++ */
+
+ /* Fill-in additional information that is not available in CRAT but
+- * needed for the topology */
++ * needed for the topology
++ */
+
+ dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
+- dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine;
++ dev->node_props.simd_arrays_per_engine =
++ cu_info.num_shader_arrays_per_engine;
+
+ dev->node_props.vendor_id = gpu->pdev->vendor;
+ dev->node_props.device_id = gpu->pdev->device;
+@@ -1329,8 +1334,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
+ }
+
+ /* Fix errors in CZ CRAT.
+- * simd_count: Carrizo CRAT reports wrong simd_count, probably because it
+- * doesn't consider masked out CUs
++ * simd_count: Carrizo CRAT reports wrong simd_count, probably because
++ * it doesn't consider masked out CUs
+ * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd.
+ * capability flag: Carrizo CRAT doesn't report IOMMU flags.
+ */
+@@ -1339,7 +1344,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
+ cu_info.simd_per_cu * cu_info.cu_active_number;
+ dev->node_props.max_waves_per_simd = 10;
+ dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+- }
++ }
+
+ kfd_debug_print_topology();
+ err:
+@@ -1444,7 +1449,7 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
+ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+ {
+ struct kfd_topology_device *dev;
+- unsigned i = 0;
++ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+@@ -1469,7 +1474,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
+ {
+ struct kfd_topology_device *dev;
+- unsigned i = 0;
++ unsigned int i = 0;
+ int r = 0;
+
+ down_read(&topology_lock);
+--
+2.7.4
+