diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4294-drm-amdkfd-Cosmetic-changes-to-match-upstream.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/4294-drm-amdkfd-Cosmetic-changes-to-match-upstream.patch | 1825 |
1 files changed, 1825 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4294-drm-amdkfd-Cosmetic-changes-to-match-upstream.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4294-drm-amdkfd-Cosmetic-changes-to-match-upstream.patch new file mode 100644 index 00000000..460b39c5 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4294-drm-amdkfd-Cosmetic-changes-to-match-upstream.patch @@ -0,0 +1,1825 @@ +From 9bde4e85bcb800b0f043f91f9092e8b9d6377e24 Mon Sep 17 00:00:00 2001 +From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +Date: Wed, 9 Jan 2019 18:20:04 +0530 +Subject: [PATCH 4294/5725] drm/amdkfd: Cosmetic changes to match upstream + +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> +Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 ++- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 57 ++++++--- + drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 48 ++++---- + drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 69 ++++++----- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 24 ++-- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 9 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 4 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 29 ++--- + drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 15 +-- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 7 +- + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 14 +-- + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 2 - + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 5 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 97 ++++++++------- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 11 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 - + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 13 +-- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 40 +++---- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 130 ++++++++++----------- + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 58 ++++----- + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 9 +- + drivers/gpu/drm/amd/amdkfd/soc15_int.h | 2 +- + 26 files changed, 334 insertions(+), 336 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 491652c..bb38da1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -24,6 +24,7 @@ + #include <linux/export.h> + #include <linux/err.h> + #include <linux/fs.h> ++#include <linux/file.h> + #include <linux/sched.h> + #include <linux/sched/mm.h> + #include <linux/slab.h> +@@ -45,7 +46,6 @@ + static long kfd_ioctl(struct file *, unsigned int, unsigned long); + static int kfd_open(struct inode *, struct file *); + static int kfd_mmap(struct file *, struct vm_area_struct *); +-static bool kfd_dev_is_large_bar(struct kfd_dev *dev); + + static const char kfd_dev_name[] = "kfd"; + +@@ -903,7 +903,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) +- goto out_upwrite; ++ goto out_unlock; + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); +@@ -912,7 +912,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd); + +- goto out_upwrite; ++ goto out_unlock; + } + + /* Fill in process-aperture information for all available +@@ -929,7 +929,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + if (!kfd_has_process_device_data(p)) { + args->num_of_nodes = 0; + kfree(pa); +- goto out_upwrite; ++ goto out_unlock; + } + + /* Run over all pdd of the process */ +@@ -971,7 +971,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + kfree(pa); + return ret ? -EFAULT : 0; + +-out_upwrite: ++out_unlock: + mutex_unlock(&p->mutex); + return 0; + } +@@ -1325,8 +1325,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + return 0; + + err_free: +- dev->kfd2kgd->free_memory_of_gpu(dev->kgd, +- (struct kgd_mem *) mem); ++ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + err_unlock: + mutex_unlock(&p->mutex); + return err; +@@ -1367,7 +1366,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. + */ +- if (ret == 0) ++ if (!ret) + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +index a803898..6688882 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -1,7 +1,27 @@ +-#include <linux/kernel.h> +-#include <linux/acpi.h> +-#include <linux/mm.h> ++/* ++ * Copyright 2015-2017 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ + #include <linux/pci.h> ++#include <linux/acpi.h> + #include "kfd_crat.h" + #include "kfd_priv.h" + #include "kfd_topology.h" +@@ -266,6 +286,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + + id = cache->processor_id_low; + ++ pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); + list_for_each_entry(dev, device_list, list) { + total_num_of_cu = (dev->node_props.array_count * + dev->node_props.cu_per_simd_array); +@@ -415,11 +436,15 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + ret = kfd_parse_subtype_cache(cache, device_list); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: +- /* For now, nothing to do here */ ++ /* ++ * For now, nothing to do here ++ */ + pr_debug("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: +- /* For now, nothing to do here */ ++ /* ++ * For now, nothing to do here ++ */ + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: +@@ -444,9 +469,8 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + * + * Return - 0 if successful else -ve value + */ +-int kfd_parse_crat_table(void *crat_image, +- struct list_head *device_list, +- uint32_t proximity_domain) ++int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, ++ uint32_t proximity_domain) + { + struct kfd_topology_device *top_dev = NULL; + struct crat_subtype_generic *sub_type_hdr; +@@ -693,7 +717,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + * crat_image will be NULL + * @size: [OUT] size of crat_image + * +- * Return 0 if successful else return -ve value ++ * Return 0 if successful else return error code + */ + #ifdef CONFIG_ACPI + int kfd_create_crat_image_acpi(void **crat_image, size_t *size) +@@ -725,10 +749,8 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) + } + + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); +- if (!pcrat_image) { +- pr_err("No memory for allocating CRAT image\n"); ++ if (!pcrat_image) + return -ENOMEM; +- } + + memcpy(pcrat_image, crat_table, crat_table->length); + +@@ -1072,8 +1094,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, + * [OUT] actual size of data filled in crat_image + */ + static int kfd_create_vcrat_image_gpu(void *pcrat_image, +- size_t *size, struct kfd_dev *kdev, +- uint32_t proximity_domain) ++ size_t *size, struct kfd_dev *kdev, ++ uint32_t proximity_domain) + { + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct crat_subtype_generic *sub_type_hdr; +@@ -1241,7 +1263,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + * Return 0 if successful else return -ve value + */ + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, +- int flags, struct kfd_dev *kdev, uint32_t proximity_domain) ++ int flags, struct kfd_dev *kdev, ++ uint32_t proximity_domain) + { + void *pcrat_image = NULL; + int ret = 0; +@@ -1271,8 +1294,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_GPU; +- ret = kfd_create_vcrat_image_gpu(pcrat_image, size, +- kdev, proximity_domain); ++ ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, ++ proximity_domain); + break; + case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): + /* TODO: */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +index 00de41f..cd7ee6d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +@@ -24,7 +24,6 @@ + #define KFD_CRAT_H_INCLUDED + + #include <linux/types.h> +-#include "kfd_priv.h" + + #pragma pack(1) + +@@ -228,12 +227,12 @@ struct crat_subtype_ccompute { + /* + * HSA IO Link Affinity structure and definitions + */ +-#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +-#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +-#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +-#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +-#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +-#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 ++#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) ++#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) ++#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) ++#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) ++#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) ++#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 + + /* + * IO interface types +@@ -241,18 +240,18 @@ struct crat_subtype_ccompute { + #define CRAT_IOLINK_TYPE_UNDEFINED 0 + #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 + #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 +-#define CRAT_IOLINK_TYPE_AMBA 3 +-#define CRAT_IOLINK_TYPE_MIPI 4 +-#define CRAT_IOLINK_TYPE_QPI_1_1 5 +-#define CRAT_IOLINK_TYPE_RESERVED1 6 +-#define CRAT_IOLINK_TYPE_RESERVED2 7 +-#define CRAT_IOLINK_TYPE_RAPID_IO 8 +-#define CRAT_IOLINK_TYPE_INFINIBAND 9 +-#define CRAT_IOLINK_TYPE_RESERVED3 10 +-#define CRAT_IOLINK_TYPE_OTHER 11 +-#define CRAT_IOLINK_TYPE_MAX 255 +- +-#define CRAT_IOLINK_RESERVED_LENGTH 24 ++#define CRAT_IOLINK_TYPE_AMBA 3 ++#define CRAT_IOLINK_TYPE_MIPI 4 ++#define CRAT_IOLINK_TYPE_QPI_1_1 5 ++#define CRAT_IOLINK_TYPE_RESERVED1 6 ++#define CRAT_IOLINK_TYPE_RESERVED2 7 ++#define CRAT_IOLINK_TYPE_RAPID_IO 8 ++#define CRAT_IOLINK_TYPE_INFINIBAND 9 ++#define CRAT_IOLINK_TYPE_RESERVED3 10 ++#define CRAT_IOLINK_TYPE_OTHER 11 ++#define CRAT_IOLINK_TYPE_MAX 255 ++ ++#define CRAT_IOLINK_RESERVED_LENGTH 24 + + struct crat_subtype_iolink { + uint8_t type; +@@ -308,13 +307,16 @@ struct cdit_header { + + #pragma pack() + ++struct kfd_dev; ++ + #ifdef CONFIG_ACPI + int kfd_create_crat_image_acpi(void **crat_image, size_t *size); + #endif + void kfd_destroy_crat_image(void *crat_image); +-int kfd_parse_crat_table(void *crat_image, +- struct list_head *device_list, +- uint32_t proximity_domain); ++int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, ++ uint32_t proximity_domain); + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, +- int flags, struct kfd_dev *kdev, uint32_t proximity_domain); ++ int flags, struct kfd_dev *kdev, ++ uint32_t proximity_domain); ++ + #endif /* KFD_CRAT_H_INCLUDED */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +index 232e28f..4bd6ebf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2014 Advanced Micro Devices, Inc. ++ * Copyright 2016-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 5b22ae0..8fb7580 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -240,6 +240,7 @@ static const struct kfd_device_info vega10_vf_device_info = { + .num_sdma_engines = 2, + }; + ++ + struct kfd_deviceid { + unsigned short did; + const struct kfd_device_info *device_info; +@@ -288,35 +289,35 @@ static const struct kfd_deviceid supported_devices[] = { + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ +- { 0x6920, &tonga_device_info }, /* Tonga */ +- { 0x6921, &tonga_device_info }, /* Tonga */ +- { 0x6928, &tonga_device_info }, /* Tonga */ +- { 0x6929, &tonga_device_info }, /* Tonga */ +- { 0x692B, &tonga_device_info }, /* Tonga */ +- { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ +- { 0x6938, &tonga_device_info }, /* Tonga */ +- { 0x6939, &tonga_device_info }, /* Tonga */ +- { 0x7300, &fiji_device_info }, /* Fiji */ +- { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ +- { 0x67C0, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C1, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C2, &polaris10_device_info }, /* Polaris10 */ ++ { 0x6920, &tonga_device_info }, /* Tonga */ ++ { 0x6921, &tonga_device_info }, /* Tonga */ ++ { 0x6928, &tonga_device_info }, /* Tonga */ ++ { 0x6929, &tonga_device_info }, /* Tonga */ ++ { 0x692B, &tonga_device_info }, /* Tonga */ ++ { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ ++ { 0x6938, &tonga_device_info }, /* Tonga */ ++ { 0x6939, &tonga_device_info }, /* Tonga */ ++ { 0x7300, &fiji_device_info }, /* Fiji */ ++ { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ ++ { 0x67C0, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C1, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C8, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C9, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CA, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CC, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CF, &polaris10_device_info }, /* Polaris10 */ +- { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ ++ { 0x67C8, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C9, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CA, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CC, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CF, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ +- { 0x67E0, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E1, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E0, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E7, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E8, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E9, &polaris11_device_info }, /* Polaris11 */ +- { 0x67EB, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E7, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E8, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E9, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ +@@ -366,11 +367,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + return NULL; + } + +- if (device_info->needs_pci_atomics) { +- /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. +- * 32 and 64-bit requests are possible and must be +- * supported. +- */ ++ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. ++ * 32 and 64-bit requests are possible and must be ++ * supported. ++ */ + ret = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); +@@ -379,7 +379,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + "skipped device %x:%x, PCI rejects atomics", + pdev->vendor, pdev->device); + return NULL; +- } + } + + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); +@@ -427,7 +426,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + KGD_ENGINE_SDMA1); + kfd->shared_resources = *gpu_resources; + +- /* Usually first_vmid_kfd = 8, last_vmid_kfd = 15 */ + kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd +@@ -669,10 +667,11 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) + + spin_lock(&kfd->interrupt_lock); + +- if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, +- patched_ihre, &is_patched) ++ if (kfd->interrupts_active ++ && interrupt_is_wanted(kfd, ih_ring_entry, ++ patched_ihre, &is_patched) + && enqueue_ih_ring_entry(kfd, +- is_patched ? patched_ihre : ih_ring_entry)) ++ is_patched ? patched_ihre : ih_ring_entry)) + queue_work(kfd->ih_wq, &kfd->interrupt_work); + + spin_unlock(&kfd->interrupt_lock); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 8067092..d7822e2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -21,10 +21,11 @@ + * + */ + ++#include <linux/ratelimit.h> ++#include <linux/printk.h> + #include <linux/slab.h> + #include <linux/list.h> + #include <linux/types.h> +-#include <linux/printk.h> + #include <linux/bitops.h> + #include <linux/sched.h> + #include "kfd_priv.h" +@@ -199,7 +200,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); +- /*invalidate the VM context after pasid and vmid mapping is set up*/ ++ /* invalidate the VM context after pasid and vmid mapping is set up */ + kfd_flush_tlb(qpd_to_pdd(qpd)); + + return 0; +@@ -289,7 +290,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + if (retval) { + if (list_empty(&qpd->queues_list)) + deallocate_vmid(dqm, qpd, q); +- + goto out_unlock; + } + +@@ -482,11 +482,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + int retval; + struct mqd_manager *mqd; + struct kfd_process_device *pdd; +- + bool prev_active = false; + + mutex_lock(&dqm->lock); +- + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) { + retval = -ENODEV; +@@ -502,7 +500,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ +- if (pdd->qpd.evicted > 0) ++ if (pdd->qpd.evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); +@@ -762,9 +760,9 @@ static int register_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + struct device_process_node *n; +- int retval; + struct kfd_process_device *pdd; + uint32_t pd_base; ++ int retval; + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) +@@ -781,7 +779,6 @@ static int register_process(struct device_queue_manager *dqm, + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; +- pr_debug("Updated PD address to 0x%08x\n", pd_base); + + retval = dqm->asic_ops.update_qpd(dqm, qpd); + +@@ -1076,9 +1073,7 @@ static int start_cpsch(struct device_queue_manager *dqm) + static int stop_cpsch(struct device_queue_manager *dqm) + { + mutex_lock(&dqm->lock); +- + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); +- + mutex_unlock(&dqm->lock); + + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); +@@ -1633,7 +1628,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + + out: + mutex_unlock(&dqm->lock); +- + return retval; + } + +@@ -1648,7 +1642,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + return NULL; + + switch (dev->device_info->asic_family) { ++ /* HWS is not available on Hawaii. */ + case CHIP_HAWAII: ++ /* HWS depends on CWSR for timely dequeue. CWSR is not ++ * available on Tonga. ++ * ++ * FIXME: This argument also applies to Kaveri. ++ */ + case CHIP_TONGA: + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; + break; +@@ -1728,7 +1728,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + + case CHIP_VEGA10: + case CHIP_RAVEN: +- device_queue_manager_init_v9_vega10(&dqm->asic_ops); ++ device_queue_manager_init_v9(&dqm->asic_ops); + break; + default: + WARN(1, "Unexpected ASIC family %u", +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index 3f17e5e..82fafd0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -209,7 +209,7 @@ void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops); + void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); +-void device_queue_manager_init_v9_vega10( ++void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops); + void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +@@ -218,18 +218,11 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); + +-int process_evict_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd); +-int process_restore_queues(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd); +- +- + static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) + { + return (pdd->lds_base >> 16) & 0xFF; + } + +-/* This function is only useful for GFXv7 and v8 */ + static inline unsigned int + get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) + { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +index cc27190..4175153 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -32,7 +32,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, + static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +-void device_queue_manager_init_v9_vega10( ++void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops) + { + asic_ops->update_qpd = update_qpd_v9; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +index 030b014..fd60a11 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +@@ -33,35 +33,22 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +-static int update_qpd_vi(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd); +-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, +- struct qcm_process_device *qpd); +- +-/* +- * Tonga device queue manager functions +- */ + static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); ++static int update_qpd_vi(struct device_queue_manager *dqm, ++ struct qcm_process_device *qpd); + static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); ++static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, ++ struct qcm_process_device *qpd); + static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); + +-void device_queue_manager_init_vi_tonga( +- struct device_queue_manager_asic_ops *asic_ops) +-{ +- asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; +- asic_ops->update_qpd = update_qpd_vi_tonga; +- asic_ops->init_sdma_vm = init_sdma_vm_tonga; +-} +- +- + void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops) + { +@@ -70,6 +57,14 @@ void device_queue_manager_init_vi( + asic_ops->init_sdma_vm = init_sdma_vm; + } + ++void device_queue_manager_init_vi_tonga( ++ struct device_queue_manager_asic_ops *asic_ops) ++{ ++ asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; ++ asic_ops->update_qpd = update_qpd_vi_tonga; ++ asic_ops->init_sdma_vm = init_sdma_vm_tonga; ++} ++ + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) + { + /* In 64-bit mode, we can only control the top 3 bits of the LDS, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +index fc41689..c3744d8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +@@ -115,7 +115,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) + pr_debug("doorbell aperture size == 0x%08lX\n", + kfd->shared_resources.doorbell_aperture_size); + +- pr_debug("doorbell kernel address == 0x%p\n", kfd->doorbell_kernel_ptr); ++ pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); + + return 0; + } +@@ -189,7 +189,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + + pr_debug("Get kernel queue doorbell\n" + " doorbell offset == 0x%08X\n" +- " kernel address == 0x%p\n", ++ " kernel address == %p\n", + *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); + + return kfd->doorbell_kernel_ptr + inx; +@@ -210,7 +210,7 @@ void write_kernel_doorbell(void __iomem *db, u32 value) + { + if (db) { + writel(value, db); +- pr_debug("Writing %d to doorbell address 0x%p\n", value, db); ++ pr_debug("Writing %d to doorbell address %p\n", value, db); + } + } + +@@ -220,14 +220,10 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); +- pr_debug("writing %llu to doorbell address 0x%p\n", value, db); ++ pr_debug("writing %llu to doorbell address %p\n", value, db); + } + } + +-/* +- * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 +- * to doorbells with the process's doorbell page +- */ + unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, + struct kfd_process *process, + unsigned int doorbell_id) +@@ -239,7 +235,8 @@ unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, + * units regardless of the ASIC-dependent doorbell size. + */ + return kfd->doorbell_id_offset + +- process->doorbell_index * (kfd_doorbell_process_slice(kfd)/sizeof(u32)) + ++ process->doorbell_index ++ * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 24d8a21..1dc1584 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -390,7 +390,11 @@ static void set_event(struct kfd_event *ev) + { + struct kfd_event_waiter *waiter; + +- /* Auto reset if the list is non-empty and we're waking someone. */ ++ /* Auto reset if the list is non-empty and we're waking ++ * someone. waitqueue_active is safe here because we're ++ * protected by the p->event_mutex, which is also held when ++ * updating the wait queues in kfd_wait_on_events. ++ */ + ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); + + list_for_each_entry(waiter, &ev->wq.head, wait.entry) +@@ -777,7 +781,6 @@ int kfd_wait_on_events(struct kfd_process *p, + + int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + { +- + unsigned long pfn; + struct kfd_signal_page *page; + int ret; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index 5672710..0cae2e9 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -289,7 +289,6 @@ + + #define MAKE_LDS_APP_BASE_VI() \ + (((uint64_t)(0x1UL) << 61) + 0x0) +- + #define MAKE_LDS_APP_LIMIT(base) \ + (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) + +@@ -323,7 +322,7 @@ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, + return 0; + } + +-void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) ++static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) + { + /* + * node id couldn't be 0 - the three MSB bits of +@@ -353,7 +352,7 @@ void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + } + +-void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) ++static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) + { + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); +@@ -388,10 +387,10 @@ int kfd_init_apertures(struct kfd_process *process) + pdd = kfd_create_process_device_data(dev, process); + if (!pdd) { + pr_err("Failed to create process device data\n"); +- return -1; ++ return -ENOMEM; + } + /* +- * For 64 bit process aperture will be statically reserved in ++ * For 64 bit process apertures will be statically reserved in + * the x86_64 non canonical process address space + * amdkfd doesn't currently support apertures for 32 bit process + */ +@@ -415,8 +414,9 @@ int kfd_init_apertures(struct kfd_process *process) + kfd_init_apertures_v9(pdd, id); + break; + default: +- pr_err("Unknown chip in kfd_init_apertures\n"); +- return -1; ++ WARN(1, "Unexpected ASIC family %u", ++ dev->device_info->asic_family); ++ return -EINVAL; + } + + if (!dev->device_info->needs_iommu_device) { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +index 728aaad..5217e51 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +index 5b798f9..7a61f38 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +@@ -75,7 +75,8 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { +- dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", ++ dev_err(kfd_device, ++ "error required iommu flags ats %i, pri %i, pasid %i\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +index b48c29f..19e54ac 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +@@ -22,8 +22,6 @@ + */ + + #include "kfd_kernel_queue.h" +-#include "kfd_pm4_headers.h" +-#include "kfd_pm4_opcodes.h" + + static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +index 6724b1a..684a3bf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -44,7 +44,7 @@ static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); +- if (retval != 0) ++ if (retval) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; +@@ -126,7 +126,6 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + +- + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +index 357478f..bf20c6d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +@@ -67,12 +67,25 @@ static void submit_packet_vi(struct kernel_queue *kq) + kq->pending_wptr); + } + +-static int pm_map_process_vi(struct packet_manager *pm, +- uint32_t *buffer, struct qcm_process_device *qpd) ++unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) ++{ ++ union PM4_MES_TYPE_3_HEADER header; ++ ++ header.u32All = 0; ++ header.opcode = opcode; ++ header.count = packet_size / 4 - 2; ++ header.type = PM4_TYPE_3; ++ ++ return header.u32All; ++} ++ ++static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, ++ struct qcm_process_device *qpd) + { + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; ++ + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, +@@ -99,27 +112,16 @@ static int pm_map_process_vi(struct packet_manager *pm, + return 0; + } + +- +-unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +-{ +- union PM4_MES_TYPE_3_HEADER header; +- +- header.u32All = 0; +- header.opcode = opcode; +- header.count = packet_size / 4 - 2; +- header.type = PM4_TYPE_3; +- +- return header.u32All; +-} +- + static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) + { + struct pm4_mes_runlist *packet; +- + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + ++ if (WARN_ON(!ib)) ++ return -EFAULT; ++ + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number +@@ -132,7 +134,6 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + +- + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +@@ -150,6 +151,34 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + ++int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, ++ struct scheduling_resources *res) ++{ ++ struct pm4_mes_set_resources *packet; ++ ++ packet = (struct pm4_mes_set_resources *)buffer; ++ memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); ++ ++ packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, ++ sizeof(struct pm4_mes_set_resources)); ++ ++ packet->bitfields2.queue_type = ++ queue_type__mes_set_resources__hsa_interface_queue_hiq; ++ packet->bitfields2.vmid_mask = res->vmid_mask; ++ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; ++ packet->bitfields7.oac_mask = res->oac_mask; ++ packet->bitfields8.gds_heap_base = res->gds_heap_base; ++ packet->bitfields8.gds_heap_size = res->gds_heap_size; ++ ++ packet->gws_mask_lo = lower_32_bits(res->gws_mask); ++ packet->gws_mask_hi = upper_32_bits(res->gws_mask); ++ ++ packet->queue_mask_lo = lower_32_bits(res->queue_mask); ++ packet->queue_mask_hi = upper_32_bits(res->queue_mask); ++ ++ return 0; ++} ++ + static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) + { +@@ -209,34 +238,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, +- struct scheduling_resources *res) +-{ +- struct pm4_mes_set_resources *packet; +- +- packet = (struct pm4_mes_set_resources *)buffer; +- memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); +- +- packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, +- sizeof(struct pm4_mes_set_resources)); +- +- packet->bitfields2.queue_type = +- queue_type__mes_set_resources__hsa_interface_queue_hiq; +- packet->bitfields2.vmid_mask = res->vmid_mask; +- packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; +- packet->bitfields7.oac_mask = res->oac_mask; +- packet->bitfields8.gds_heap_base = res->gds_heap_base; +- packet->bitfields8.gds_heap_size = res->gds_heap_size; +- +- packet->gws_mask_lo = lower_32_bits(res->gws_mask); +- packet->gws_mask_hi = upper_32_bits(res->gws_mask); +- +- packet->queue_mask_lo = lower_32_bits(res->queue_mask); +- packet->queue_mask_hi = upper_32_bits(res->queue_mask); +- +- return 0; +-} +- + static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, +@@ -310,7 +311,6 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + +- + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + +@@ -328,16 +328,15 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +- + static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) + { + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; +- memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); ++ memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, +- sizeof(struct pm4_mec_release_mem)); ++ sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +index d556779..cc2c3fb 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -217,8 +217,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + +- m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | +- 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; ++ m->cp_hqd_ib_control = ++ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | ++ 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size +@@ -243,8 +244,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; +- m->cp_hqd_pq_doorbell_control |= +- 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; ++ m->cp_hqd_pq_doorbell_control |= 1 << ++ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (priv_cp_queues) + m->cp_hqd_pq_control |= +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index c537f37..e3ae2d4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -544,4 +544,3 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_tonga; + return mqd; + } +- +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index cd380ad..c317feb4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -26,7 +26,6 @@ + #include "kfd_device_queue_manager.h" + #include "kfd_kernel_queue.h" + #include "kfd_priv.h" +-#include "kfd_pm4_opcodes.h" + + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, + unsigned int buffer_size_bytes) +@@ -45,8 +44,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + unsigned int process_count, queue_count, compute_queue_count; + unsigned int map_queue_size; + unsigned int max_proc_per_quantum = 1; +- +- struct kfd_dev *dev = pm->dqm->dev; ++ struct kfd_dev *dev = pm->dqm->dev; + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; +@@ -57,14 +55,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ +- + *over_subscription = false; + + if (dev->max_proc_per_quantum > 1) + max_proc_per_quantum = dev->max_proc_per_quantum; + + if ((process_count > max_proc_per_quantum) || +- compute_queue_count > get_queues_num(pm->dqm)) { ++ compute_queue_count > get_queues_num(pm->dqm)) { + *over_subscription = true; + pr_debug("Over subscribed runlist\n"); + } +@@ -193,6 +190,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); ++ + if (retval) + return retval; + +@@ -301,8 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + + pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + +- packet_size_dwords = pm->pmf->runlist_size / +- sizeof(uint32_t); ++ packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); + mutex_lock(&pm->lock); + + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, +@@ -311,7 +308,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + goto fail_acquire_packet_buffer; + + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, +- rl_ib_size / sizeof(uint32_t), false); ++ rl_ib_size / sizeof(uint32_t), false); + if (retval) + goto fail_create_runlist; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index bb05e95..fffdec6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -30,13 +30,13 @@ + #include <linux/atomic.h> + #include <linux/workqueue.h> + #include <linux/spinlock.h> +-#include <linux/idr.h> + #include <linux/kfd_ioctl.h> +-#include <linux/pid.h> +-#include <linux/interval_tree.h> ++#include <linux/idr.h> + #include <linux/seq_file.h> + #include <linux/kref.h> + #include <linux/kfifo.h> ++#include <linux/pid.h> ++#include <linux/interval_tree.h> + #include <kgd_kfd_interface.h> + + #include "amd_shared.h" +@@ -81,7 +81,6 @@ + #define KFD_CIK_HIQ_PIPE 4 + #define KFD_CIK_HIQ_QUEUE 0 + +- + /* Macro for allocating structures */ + #define kfd_alloc_struct(ptr_to_struct) \ + ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) +@@ -114,14 +113,14 @@ extern int max_num_of_queues_per_device; + /* Kernel module parameter to specify the scheduling policy */ + extern int sched_policy; + +-extern int cwsr_enable; +- + /* + * Kernel module parameter to specify the maximum process + * number per HW scheduler + */ + extern int hws_max_conc_proc; + ++extern int cwsr_enable; ++ + /* + * Kernel module parameter to specify whether to send sigterm to HSA process on + * unhandled exception +@@ -442,7 +441,11 @@ enum KFD_QUEUE_PRIORITY { + * @is_interop: Defines if this is a interop queue. Interop queue means that + * the queue can access both graphics and compute resources. + * +- * @is_active: Defines if the queue is active or not. ++ * @is_evicted: Defines if the queue is evicted. Only active queues ++ * are evicted, rendering them inactive. ++ * ++ * @is_active: Defines if the queue is active or not. @is_active and ++ * @is_evicted are protected by the DQM lock. + * + * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid + * of the queue. +@@ -464,7 +467,7 @@ struct queue_properties { + void __iomem *doorbell_ptr; + uint32_t doorbell_off; + bool is_interop; +- bool is_evicted; /* true -> queue is evicted */ ++ bool is_evicted; + bool is_active; + /* Not relevant for user mode queues in cp scheduling */ + unsigned int vmid; +@@ -583,7 +586,6 @@ struct qcm_process_device { + struct list_head priv_queue_list; + + unsigned int queue_count; +- /* a data field only meaningful for non-HWS case */ + unsigned int vmid; + bool is_debug; + unsigned int evicted; /* eviction counter, 0=active */ +@@ -614,11 +616,11 @@ struct qcm_process_device { + uint64_t tma_addr; + + /* IB memory */ +- uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ ++ uint64_t ib_base; + void *ib_kaddr; + + /*doorbell resources per process per device*/ +- unsigned long *doorbell_bitmap; ++ unsigned long *doorbell_bitmap; + }; + + /* KFD Memory Eviction */ +@@ -756,7 +758,7 @@ struct kfd_process { + struct rb_root_cached bo_interval_tree; + + /* Information used for memory eviction */ +- void *process_info; ++ void *kgd_process_info; + /* Eviction fence that is attached to all the BOs of this process. The + * fence will be triggered during eviction and new one will be created + * during restore +@@ -799,7 +801,7 @@ struct amdkfd_ioctl_desc { + int kfd_process_create_wq(void); + void kfd_process_destroy_wq(void); + struct kfd_process *kfd_create_process(struct file *filep); +-struct kfd_process *kfd_get_process(const struct task_struct *task); ++struct kfd_process *kfd_get_process(const struct task_struct *); + struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); + struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); + void kfd_unref_process(struct kfd_process *p); +@@ -811,7 +813,7 @@ int kfd_resume_all_processes(void); + int kfd_process_device_init_vm(struct kfd_process_device *pdd, + struct file *drm_file); + struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, +- struct kfd_process *p); ++ struct kfd_process *p); + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); + struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, +@@ -859,7 +861,7 @@ void kfd_pasid_free(unsigned int pasid); + size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); + int kfd_doorbell_init(struct kfd_dev *kfd); + void kfd_doorbell_fini(struct kfd_dev *kfd); +-int kfd_doorbell_mmap(struct kfd_dev *kfd, struct kfd_process *process, ++int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma); + void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + unsigned int *doorbell_off); +@@ -982,8 +984,6 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + #define KFD_FENCE_COMPLETED (100) + #define KFD_FENCE_INIT (10) + +-struct packet_manager_func; +- + struct packet_manager { + struct device_queue_manager *dqm; + struct kernel_queue *priv_queue; +@@ -996,7 +996,7 @@ struct packet_manager { + }; + + struct packet_manager_funcs { +- /* Support different firmware versions for PM4 packets */ ++ /* Support ASIC-specific packet formats for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, +@@ -1042,7 +1042,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + + void pm_release_ib(struct packet_manager *pm); + +-/* Following PM funcs can be shared among CIK and VI */ ++/* Following PM funcs can be shared among VI and AI */ + unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); + int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); +@@ -1089,8 +1089,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); + + bool kfd_is_locked(void); + +-#define KFD_SCRATCH_KV_FW_VER 413 +- + /* PeerDirect support */ + void kfd_init_peer_direct(void); + void kfd_close_peer_direct(void); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 7a28c21..9477e50 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -30,6 +30,7 @@ + #include <linux/notifier.h> + #include <linux/compat.h> + #include <linux/mman.h> ++#include <linux/file.h> + #include <asm/page.h> + #include "kfd_ipc.h" + +@@ -184,8 +185,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, + /* kfd_process_device_reserve_ib_mem - Reserve memory inside the + * process for IB usage The memory reserved is for KFD to submit + * IB to AMDGPU from kernel. If the memory is reserved +- * successfully, ib_kaddr_assigned will have the CPU/kernel +- * address. Check ib_kaddr_assigned before accessing the memory. ++ * successfully, ib_kaddr will have the CPU/kernel ++ * address. Check ib_kaddr before accessing the memory. + */ + static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) + { +@@ -212,7 +213,6 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) + struct kfd_process *kfd_create_process(struct file *filep) + { + struct kfd_process *process; +- + struct task_struct *thread = current; + + if (!thread->mm) +@@ -348,7 +348,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) + + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { +- /* Destroy the GPUVM VM context */ ++ pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", ++ pdd->dev->id, p->pasid); ++ + if (pdd->drm_file) + fput(pdd->drm_file); + else if (pdd->vm) +@@ -401,9 +403,6 @@ static void kfd_process_ref_release(struct kref *ref) + { + struct kfd_process *p = container_of(ref, struct kfd_process, ref); + +- if (WARN_ON(!kfd_process_wq)) +- return; +- + INIT_WORK(&p->release_work, kfd_process_wq_release); + queue_work(kfd_process_wq, &p->release_work); + } +@@ -486,9 +485,9 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) + + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) + << PAGE_SHIFT; +- qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, +- KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, +- MAP_SHARED, offset); ++ qpd->tba_addr = (int64_t)vm_mmap(filep, 0, ++ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, ++ MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { + int err = qpd->tba_addr; +@@ -725,10 +724,11 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, + + if (drm_file) + ret = dev->kfd2kgd->acquire_process_vm( +- dev->kgd, drm_file, &pdd->vm, &p->process_info, &p->ef); ++ dev->kgd, drm_file, ++ &pdd->vm, &p->kgd_process_info, &p->ef); + else + ret = dev->kfd2kgd->create_process_vm( +- dev->kgd, &pdd->vm, &p->process_info, &p->ef); ++ dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef); + if (ret) { + pr_err("Failed to create process VM object\n"); + return ret; +@@ -942,42 +942,6 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) + return ret_p; + } + +-void kfd_suspend_all_processes(void) +-{ +- struct kfd_process *p; +- unsigned int temp; +- int idx = srcu_read_lock(&kfd_processes_srcu); +- +- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { +- cancel_delayed_work_sync(&p->eviction_work); +- cancel_delayed_work_sync(&p->restore_work); +- +- if (kfd_process_evict_queues(p)) +- pr_err("Failed to suspend process %d\n", p->pasid); +- dma_fence_signal(p->ef); +- dma_fence_put(p->ef); +- p->ef = NULL; +- } +- srcu_read_unlock(&kfd_processes_srcu, idx); +-} +- +-int kfd_resume_all_processes(void) +-{ +- struct kfd_process *p; +- unsigned int temp; +- int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); +- +- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { +- if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { +- pr_err("Restore process %d failed during resume\n", +- p->pasid); +- ret = -EFAULT; +- } +- } +- srcu_read_unlock(&kfd_processes_srcu, idx); +- return ret; +-} +- + /* This increments the process->ref counter. */ + struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) + { +@@ -1069,15 +1033,14 @@ static void evict_process_worker(struct work_struct *work) + "Eviction fence mismatch\n"); + + /* Narrow window of overlap between restore and evict work +- * item is possible. Once +- * amdgpu_amdkfd_gpuvm_restore_process_bos unreserves KFD BOs, +- * it is possible to evicted again. But restore has few more +- * steps of finish. So lets wait for any previous restore work +- * to complete ++ * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos ++ * unreserves KFD BOs, it is possible to evicted again. But ++ * restore has few more steps of finish. So lets wait for any ++ * previous restore work to complete + */ + flush_delayed_work(&p->restore_work); + +- pr_info("Started evicting process of pasid %d\n", p->pasid); ++ pr_info("Started evicting pasid %d\n", p->pasid); + ret = kfd_process_evict_queues(p); + if (!ret) { + dma_fence_signal(p->ef); +@@ -1086,10 +1049,9 @@ static void evict_process_worker(struct work_struct *work) + queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); + +- pr_info("Finished evicting process of pasid %d\n", p->pasid); ++ pr_info("Finished evicting pasid %d\n", p->pasid); + } else +- pr_err("Failed to quiesce user queues. Cannot evict pasid %d\n", +- p->pasid); ++ pr_err("Failed to evict queues of pasid %d\n", p->pasid); + } + + static void restore_process_worker(struct work_struct *work) +@@ -1115,7 +1077,7 @@ static void restore_process_worker(struct work_struct *work) + struct kfd_process_device, + per_device_list); + +- pr_info("Started restoring process of pasid %d\n", p->pasid); ++ pr_info("Started restoring pasid %d\n", p->pasid); + + /* Setting last_restore_timestamp before successful restoration. + * Otherwise this would have to be set by KGD (restore_process_bos) +@@ -1128,10 +1090,11 @@ static void restore_process_worker(struct work_struct *work) + */ + + p->last_restore_timestamp = get_jiffies_64(); +- ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info, &p->ef); ++ ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, ++ &p->ef); + if (ret) { +- pr_info("Restore failed, try again after %d ms\n", +- PROCESS_BACK_OFF_TIME_MS); ++ pr_info("Failed to restore BOs of pasid %d, retry after %d ms\n", ++ p->pasid, PROCESS_BACK_OFF_TIME_MS); + ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); + WARN(!ret, "reschedule restore work failed\n"); +@@ -1139,10 +1102,46 @@ static void restore_process_worker(struct work_struct *work) + } + + ret = kfd_process_restore_queues(p); +- if (ret) +- pr_err("Failed to resume user queues\n"); ++ if (!ret) ++ pr_info("Finished restoring pasid %d\n", p->pasid); ++ else ++ pr_err("Failed to restore queues of pasid %d\n", p->pasid); ++} ++ ++void kfd_suspend_all_processes(void) ++{ ++ struct kfd_process *p; ++ unsigned int temp; ++ int idx = srcu_read_lock(&kfd_processes_srcu); + +- pr_info("Finished restoring process of pasid %d\n", p->pasid); ++ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { ++ cancel_delayed_work_sync(&p->eviction_work); ++ cancel_delayed_work_sync(&p->restore_work); ++ ++ if (kfd_process_evict_queues(p)) ++ pr_err("Failed to suspend process %d\n", p->pasid); ++ dma_fence_signal(p->ef); ++ dma_fence_put(p->ef); ++ p->ef = NULL; ++ } ++ srcu_read_unlock(&kfd_processes_srcu, idx); ++} ++ ++int kfd_resume_all_processes(void) ++{ ++ struct kfd_process *p; ++ unsigned int temp; ++ int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); ++ ++ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { ++ if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { ++ pr_err("Restore process %d failed during resume\n", ++ p->pasid); ++ ret = -EFAULT; ++ } ++ } ++ srcu_read_unlock(&kfd_processes_srcu, idx); ++ return ret; + } + + int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, +@@ -1176,7 +1175,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); + } + +- + void kfd_flush_tlb(struct kfd_process_device *pdd) + { + struct kfd_dev *dev = pdd->dev; +@@ -1211,7 +1209,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) + r = pqm_debugfs_mqds(m, &p->pqm); + mutex_unlock(&p->mutex); + +- if (r != 0) ++ if (r) + break; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index c950149..e18ed45 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -188,7 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) { +- pr_debug("Over-subscription is not allowed for SDMA\n"); ++ pr_debug("Over-subscription is not allowed for SDMA.\n"); + retval = -EPERM; + goto err_create_queue; + } +@@ -206,7 +206,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + case KFD_QUEUE_TYPE_COMPUTE: + /* check if there is over subscription */ + if ((dev->dqm->sched_policy == +- KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ++ KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || + (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { + pr_debug("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +index 320c8d3..82cff10 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -196,6 +196,7 @@ struct kfd_topology_device *kfd_create_topology_device( + return dev; + } + ++ + #define sysfs_show_gen_prop(buffer, fmt, ...) \ + snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) + #define sysfs_show_32bit_prop(buffer, name, value) \ +@@ -739,7 +740,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, + } + + /* All hardware blocks have the same number of attributes. */ +- num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); ++ num_attrs = ARRAY_SIZE(perf_attr_iommu); + list_for_each_entry(perf, &dev->perf_props, list) { + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) + * num_attrs + sizeof(struct attribute_group), +@@ -890,7 +891,8 @@ static void kfd_debug_print_topology(void) + up_read(&topology_lock); + } + +-/* Helper function for intializing platform_xx members of kfd_system_properties ++/* Helper function for intializing platform_xx members of ++ * kfd_system_properties. Uses OEM info from the last CPU/APU node. + */ + static void kfd_update_system_properties(void) + { +@@ -1013,13 +1015,12 @@ int kfd_topology_init(void) + */ + #ifdef CONFIG_ACPI + ret = kfd_create_crat_image_acpi(&crat_image, &image_size); +- if (ret == 0) { ++ if (!ret) { + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret || +- kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { +- ++ kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { + kfd_release_topology_device_list( + &temp_topology_device_list); + kfd_destroy_crat_image(crat_image); +@@ -1029,8 +1030,8 @@ int kfd_topology_init(void) + #endif + if (!crat_image) { + ret = kfd_create_crat_image_virtual(&crat_image, &image_size, +- COMPUTE_UNIT_CPU, NULL, +- proximity_domain); ++ COMPUTE_UNIT_CPU, NULL, ++ proximity_domain); + cpu_only_node = 1; + if (ret) { + pr_err("Error creating VCRAT table for CPU\n"); +@@ -1038,8 +1039,8 @@ int kfd_topology_init(void) + } + + ret = kfd_parse_crat_table(crat_image, +- &temp_topology_device_list, +- proximity_domain); ++ &temp_topology_device_list, ++ proximity_domain); + if (ret) { + pr_err("Error parsing VCRAT table for CPU\n"); + goto err; +@@ -1051,12 +1052,12 @@ int kfd_topology_init(void) + + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, +- &topology_device_list); ++ &topology_device_list); + atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + +- if (ret == 0) { ++ if (!ret) { + sys_props.generation_count++; + kfd_update_system_properties(); + kfd_debug_print_topology(); +@@ -1144,7 +1145,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) + break; + } + up_write(&topology_lock); +- + return out_dev; + } + +@@ -1212,8 +1212,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + + pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + +- proximity_domain = atomic_inc_return(& +- topology_crat_proximity_domain); ++ proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); + + /* Check to see if this gpu device exists in the topology_device_list. + * If so, assign the gpu to that device, +@@ -1224,15 +1223,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + dev = kfd_assign_gpu(gpu); + if (!dev) { + res = kfd_create_crat_image_virtual(&crat_image, &image_size, +- COMPUTE_UNIT_GPU, +- gpu, proximity_domain); ++ COMPUTE_UNIT_GPU, gpu, ++ proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + return res; + } + res = kfd_parse_crat_table(crat_image, +- &temp_topology_device_list, proximity_domain); ++ &temp_topology_device_list, ++ proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); +@@ -1249,14 +1249,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + res = kfd_topology_update_sysfs(); + up_write(&topology_lock); + +- if (res == 0) ++ if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + dev = kfd_assign_gpu(gpu); +- if (!dev) { +- pr_err("Could not assign GPU\n"); ++ if (WARN_ON(!dev)) { + res = -ENODEV; + goto err; + } +@@ -1315,14 +1314,15 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + default: +- BUG(); ++ WARN(1, "Unexpected ASIC family %u", ++ dev->gpu->device_info->asic_family); + } + + /* Fix errors in CZ CRAT. +- * simd_count: Carrizo CRAT reports wrong simd_count, probably because +- * it doesn't consider masked out CUs +- * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd. +- * capability flag: Carrizo CRAT doesn't report IOMMU flags. ++ * simd_count: Carrizo CRAT reports wrong simd_count, probably ++ * because it doesn't consider masked out CUs ++ * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd ++ * capability flag: Carrizo CRAT doesn't report IOMMU flags + */ + if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + dev->node_props.simd_count = +@@ -1362,7 +1362,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) + + up_write(&topology_lock); + +- if (res == 0) ++ if (!res) + kfd_notify_gpu_change(gpu_id, 0); + + return res; +@@ -1403,7 +1403,7 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) + { + int first_cpu_of_numa_node; + +- if (!cpumask || (cpumask == cpu_none_mask)) ++ if (!cpumask || cpumask == cpu_none_mask) + return -1; + first_cpu_of_numa_node = cpumask_first(cpumask); + if (first_cpu_of_numa_node >= nr_cpu_ids) +@@ -1446,7 +1446,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = dqm_debugfs_hqds(m, dev->gpu->dqm); +- if (r != 0) ++ if (r) + break; + } + +@@ -1471,7 +1471,7 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); +- if (r != 0) ++ if (r) + break; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +index 4c518fe8..2b36baf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +@@ -46,9 +46,6 @@ + #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 + #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 + #define HSA_CAP_DOORBELL_TYPE_2_0 0x2 +-#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 +-#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 +-#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 + #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 + + struct kfd_node_properties { +@@ -169,9 +166,9 @@ struct kfd_topology_device { + struct attribute attr_gpuid; + struct attribute attr_name; + struct attribute attr_props; +- uint8_t oem_id[CRAT_OEMID_LENGTH]; +- uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; +- uint32_t oem_revision; ++ uint8_t oem_id[CRAT_OEMID_LENGTH]; ++ uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; ++ uint32_t oem_revision; + }; + + struct kfd_system_properties { +diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h +index 011c14c..0bc0b25 100644 +--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h ++++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +-- +2.7.4 + |