From 9bde4e85bcb800b0f043f91f9092e8b9d6377e24 Mon Sep 17 00:00:00 2001 From: Chaudhary Amit Kumar Date: Wed, 9 Jan 2019 18:20:04 +0530 Subject: [PATCH 4294/5725] drm/amdkfd: Cosmetic changes to match upstream Signed-off-by: Felix Kuehling Signed-off-by: Kalyan Alle Signed-off-by: Chaudhary Amit Kumar --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 15 ++- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 57 ++++++--- drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 48 ++++---- drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 69 ++++++----- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 24 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 9 +- .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 4 +- .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 29 ++--- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 15 +-- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 14 +-- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 2 - drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 5 +- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 97 ++++++++------- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 11 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 13 +-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 40 +++---- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 130 ++++++++++----------- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 58 ++++----- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 9 +- drivers/gpu/drm/amd/amdkfd/soc15_int.h | 2 +- 26 files changed, 334 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 491652c..bb38da1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,6 @@ static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); static int kfd_mmap(struct file *, struct vm_area_struct *); -static bool kfd_dev_is_large_bar(struct kfd_dev *dev); static const char kfd_dev_name[] = "kfd"; @@ -903,7 +903,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, mutex_lock(&p->mutex); if (!kfd_has_process_device_data(p)) - goto out_upwrite; + goto out_unlock; /* Run over all pdd of the process */ pdd = kfd_get_first_process_device_data(p); @@ -912,7 +912,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, pdd = kfd_get_next_process_device_data(p, pdd); } while (pdd); - goto out_upwrite; + goto out_unlock; } /* Fill in process-aperture information for all available @@ -929,7 +929,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, if (!kfd_has_process_device_data(p)) { args->num_of_nodes = 0; kfree(pa); - goto out_upwrite; + goto out_unlock; } /* Run over all pdd of the process */ @@ -971,7 +971,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, kfree(pa); return ret ? -EFAULT : 0; -out_upwrite: +out_unlock: mutex_unlock(&p->mutex); return 0; } @@ -1325,8 +1325,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - dev->kfd2kgd->free_memory_of_gpu(dev->kgd, - (struct kgd_mem *) mem); + dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1367,7 +1366,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. */ - if (ret == 0) + if (!ret) kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index a803898..6688882 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1,7 +1,27 @@ -#include -#include -#include +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + #include +#include #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" @@ -266,6 +286,7 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, id = cache->processor_id_low; + pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); list_for_each_entry(dev, device_list, list) { total_num_of_cu = (dev->node_props.array_count * dev->node_props.cu_per_simd_array); @@ -415,11 +436,15 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, ret = kfd_parse_subtype_cache(cache, device_list); break; case CRAT_SUBTYPE_TLB_AFFINITY: - /* For now, nothing to do here */ + /* + * For now, nothing to do here + */ pr_debug("Found TLB entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: - /* For now, nothing to do here */ + /* + * For now, nothing to do here + */ pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); break; case CRAT_SUBTYPE_IOLINK_AFFINITY: @@ -444,9 +469,8 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, * * Return - 0 if successful else -ve value */ -int kfd_parse_crat_table(void *crat_image, - struct list_head *device_list, - uint32_t proximity_domain) +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain) { struct kfd_topology_device *top_dev = NULL; struct crat_subtype_generic *sub_type_hdr; @@ -693,7 +717,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, * crat_image will be NULL * @size: [OUT] size of crat_image * - * Return 0 if successful else return -ve value + * Return 0 if successful else return error code */ #ifdef CONFIG_ACPI int kfd_create_crat_image_acpi(void **crat_image, size_t *size) @@ -725,10 +749,8 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) } pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); - if (!pcrat_image) { - pr_err("No memory for allocating CRAT image\n"); + if (!pcrat_image) return -ENOMEM; - } memcpy(pcrat_image, crat_table, crat_table->length); @@ -1072,8 +1094,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_gpu(void *pcrat_image, - size_t *size, struct kfd_dev *kdev, - uint32_t proximity_domain) + size_t *size, struct kfd_dev *kdev, + uint32_t proximity_domain) { struct crat_header *crat_table = (struct crat_header *)pcrat_image; struct crat_subtype_generic *sub_type_hdr; @@ -1241,7 +1263,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * Return 0 if successful else return -ve value */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, uint32_t proximity_domain) + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain) { void *pcrat_image = NULL; int ret = 0; @@ -1271,8 +1294,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, if (!pcrat_image) return -ENOMEM; *size = VCRAT_SIZE_FOR_GPU; - ret = kfd_create_vcrat_image_gpu(pcrat_image, size, - kdev, proximity_domain); + ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, + proximity_domain); break; case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): /* TODO: */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 00de41f..cd7ee6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -24,7 +24,6 @@ #define KFD_CRAT_H_INCLUDED #include -#include "kfd_priv.h" #pragma pack(1) @@ -228,12 +227,12 @@ struct crat_subtype_ccompute { /* * HSA IO Link Affinity structure and definitions */ -#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) -#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) -#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) -#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) -#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) -#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 +#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 /* * IO interface types @@ -241,18 +240,18 @@ struct crat_subtype_ccompute { #define CRAT_IOLINK_TYPE_UNDEFINED 0 #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 -#define CRAT_IOLINK_TYPE_AMBA 3 -#define CRAT_IOLINK_TYPE_MIPI 4 -#define CRAT_IOLINK_TYPE_QPI_1_1 5 -#define CRAT_IOLINK_TYPE_RESERVED1 6 -#define CRAT_IOLINK_TYPE_RESERVED2 7 -#define CRAT_IOLINK_TYPE_RAPID_IO 8 -#define CRAT_IOLINK_TYPE_INFINIBAND 9 -#define CRAT_IOLINK_TYPE_RESERVED3 10 -#define CRAT_IOLINK_TYPE_OTHER 11 -#define CRAT_IOLINK_TYPE_MAX 255 - -#define CRAT_IOLINK_RESERVED_LENGTH 24 +#define CRAT_IOLINK_TYPE_AMBA 3 +#define CRAT_IOLINK_TYPE_MIPI 4 +#define CRAT_IOLINK_TYPE_QPI_1_1 5 +#define CRAT_IOLINK_TYPE_RESERVED1 6 +#define CRAT_IOLINK_TYPE_RESERVED2 7 +#define CRAT_IOLINK_TYPE_RAPID_IO 8 +#define CRAT_IOLINK_TYPE_INFINIBAND 9 +#define CRAT_IOLINK_TYPE_RESERVED3 10 +#define CRAT_IOLINK_TYPE_OTHER 11 +#define CRAT_IOLINK_TYPE_MAX 255 + +#define CRAT_IOLINK_RESERVED_LENGTH 24 struct crat_subtype_iolink { uint8_t type; @@ -308,13 +307,16 @@ struct cdit_header { #pragma pack() +struct kfd_dev; + #ifdef CONFIG_ACPI int kfd_create_crat_image_acpi(void **crat_image, size_t *size); #endif void kfd_destroy_crat_image(void *crat_image); -int kfd_parse_crat_table(void *crat_image, - struct list_head *device_list, - uint32_t proximity_domain); +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain); int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, uint32_t proximity_domain); + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain); + #endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 232e28f..4bd6ebf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2016-2017 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 5b22ae0..8fb7580 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -240,6 +240,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .num_sdma_engines = 2, }; + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; @@ -288,35 +289,35 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67B9, &hawaii_device_info }, /* Hawaii */ { 0x67BA, &hawaii_device_info }, /* Hawaii */ { 0x67BE, &hawaii_device_info }, /* Hawaii */ - { 0x6920, &tonga_device_info }, /* Tonga */ - { 0x6921, &tonga_device_info }, /* Tonga */ - { 0x6928, &tonga_device_info }, /* Tonga */ - { 0x6929, &tonga_device_info }, /* Tonga */ - { 0x692B, &tonga_device_info }, /* Tonga */ - { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ - { 0x6938, &tonga_device_info }, /* Tonga */ - { 0x6939, &tonga_device_info }, /* Tonga */ - { 0x7300, &fiji_device_info }, /* Fiji */ - { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ - { 0x67C0, &polaris10_device_info }, /* Polaris10 */ - { 0x67C1, &polaris10_device_info }, /* Polaris10 */ - { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x6920, &tonga_device_info }, /* Tonga */ + { 0x6921, &tonga_device_info }, /* Tonga */ + { 0x6928, &tonga_device_info }, /* Tonga */ + { 0x6929, &tonga_device_info }, /* Tonga */ + { 0x692B, &tonga_device_info }, /* Tonga */ + { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ + { 0x6938, &tonga_device_info }, /* Tonga */ + { 0x6939, &tonga_device_info }, /* Tonga */ + { 0x7300, &fiji_device_info }, /* Fiji */ + { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ + { 0x67C0, &polaris10_device_info }, /* Polaris10 */ + { 0x67C1, &polaris10_device_info }, /* Polaris10 */ + { 0x67C2, &polaris10_device_info }, /* Polaris10 */ { 0x67C4, &polaris10_device_info }, /* Polaris10 */ { 0x67C7, &polaris10_device_info }, /* Polaris10 */ - { 0x67C8, &polaris10_device_info }, /* Polaris10 */ - { 0x67C9, &polaris10_device_info }, /* Polaris10 */ - { 0x67CA, &polaris10_device_info }, /* Polaris10 */ - { 0x67CC, &polaris10_device_info }, /* Polaris10 */ - { 0x67CF, &polaris10_device_info }, /* Polaris10 */ - { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67C8, &polaris10_device_info }, /* Polaris10 */ + { 0x67C9, &polaris10_device_info }, /* Polaris10 */ + { 0x67CA, &polaris10_device_info }, /* Polaris10 */ + { 0x67CC, &polaris10_device_info }, /* Polaris10 */ + { 0x67CF, &polaris10_device_info }, /* Polaris10 */ + { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ { 0x67DF, &polaris10_device_info }, /* Polaris10 */ - { 0x67E0, &polaris11_device_info }, /* Polaris11 */ - { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E0, &polaris11_device_info }, /* Polaris11 */ + { 0x67E1, &polaris11_device_info }, /* Polaris11 */ { 0x67E3, &polaris11_device_info }, /* Polaris11 */ - { 0x67E7, &polaris11_device_info }, /* Polaris11 */ - { 0x67E8, &polaris11_device_info }, /* Polaris11 */ - { 0x67E9, &polaris11_device_info }, /* Polaris11 */ - { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67E7, &polaris11_device_info }, /* Polaris11 */ + { 0x67E8, &polaris11_device_info }, /* Polaris11 */ + { 0x67E9, &polaris11_device_info }, /* Polaris11 */ + { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ { 0x6860, &vega10_device_info }, /* Vega10 */ @@ -366,11 +367,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } - if (device_info->needs_pci_atomics) { - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. - * 32 and 64-bit requests are possible and must be - * supported. - */ + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ ret = pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); @@ -379,7 +379,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, "skipped device %x:%x, PCI rejects atomics", pdev->vendor, pdev->device); return NULL; - } } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); @@ -427,7 +426,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; - /* Usually first_vmid_kfd = 8, last_vmid_kfd = 15 */ kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd @@ -669,10 +667,11 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_lock(&kfd->interrupt_lock); - if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, - patched_ihre, &is_patched) + if (kfd->interrupts_active + && interrupt_is_wanted(kfd, ih_ring_entry, + patched_ihre, &is_patched) && enqueue_ih_ring_entry(kfd, - is_patched ? patched_ihre : ih_ring_entry)) + is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work); spin_unlock(&kfd->interrupt_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8067092..d7822e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -21,10 +21,11 @@ * */ +#include +#include #include #include #include -#include #include #include #include "kfd_priv.h" @@ -199,7 +200,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, qpd->vmid, qpd->page_table_base); - /*invalidate the VM context after pasid and vmid mapping is set up*/ + /* invalidate the VM context after pasid and vmid mapping is set up */ kfd_flush_tlb(qpd_to_pdd(qpd)); return 0; @@ -289,7 +290,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) { if (list_empty(&qpd->queues_list)) deallocate_vmid(dqm, qpd, q); - goto out_unlock; } @@ -482,11 +482,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) int retval; struct mqd_manager *mqd; struct kfd_process_device *pdd; - bool prev_active = false; mutex_lock(&dqm->lock); - pdd = kfd_get_process_device_data(q->device, q->process); if (!pdd) { retval = -ENODEV; @@ -502,7 +500,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later */ - if (pdd->qpd.evicted > 0) + if (pdd->qpd.evicted) q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); @@ -762,9 +760,9 @@ static int register_process(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; - int retval; struct kfd_process_device *pdd; uint32_t pd_base; + int retval; n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) @@ -781,7 +779,6 @@ static int register_process(struct device_queue_manager *dqm, /* Update PD Base in QPD */ qpd->page_table_base = pd_base; - pr_debug("Updated PD address to 0x%08x\n", pd_base); retval = dqm->asic_ops.update_qpd(dqm, qpd); @@ -1076,9 +1073,7 @@ static int start_cpsch(struct device_queue_manager *dqm) static int stop_cpsch(struct device_queue_manager *dqm) { mutex_lock(&dqm->lock); - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); - mutex_unlock(&dqm->lock); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); @@ -1633,7 +1628,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, out: mutex_unlock(&dqm->lock); - return retval; } @@ -1648,7 +1642,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) return NULL; switch (dev->device_info->asic_family) { + /* HWS is not available on Hawaii. */ case CHIP_HAWAII: + /* HWS depends on CWSR for timely dequeue. CWSR is not + * available on Tonga. + * + * FIXME: This argument also applies to Kaveri. + */ case CHIP_TONGA: dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; break; @@ -1728,7 +1728,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_VEGA10: case CHIP_RAVEN: - device_queue_manager_init_v9_vega10(&dqm->asic_ops); + device_queue_manager_init_v9(&dqm->asic_ops); break; default: WARN(1, "Unexpected ASIC family %u", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3f17e5e..82fafd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -209,7 +209,7 @@ void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi_tonga( struct device_queue_manager_asic_ops *asic_ops); -void device_queue_manager_init_v9_vega10( +void device_queue_manager_init_v9( struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); @@ -218,18 +218,11 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); -int process_evict_queues(struct device_queue_manager *dqm, - struct qcm_process_device *qpd); -int process_restore_queues(struct device_queue_manager *dqm, - struct qcm_process_device *qpd); - - static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { return (pdd->lds_base >> 16) & 0xFF; } -/* This function is only useful for GFXv7 and v8 */ static inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index cc27190..4175153 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,7 +32,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_v9_vega10( +void device_queue_manager_init_v9( struct device_queue_manager_asic_ops *asic_ops) { asic_ops->update_qpd = update_qpd_v9; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 030b014..fd60a11 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,35 +33,22 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); -static int update_qpd_vi(struct device_queue_manager *dqm, - struct qcm_process_device *qpd); -static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd); - -/* - * Tonga device queue manager functions - */ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +static int update_qpd_vi(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static int update_qpd_vi_tonga(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); static void init_sdma_vm_tonga(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); -void device_queue_manager_init_vi_tonga( - struct device_queue_manager_asic_ops *asic_ops) -{ - asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; - asic_ops->update_qpd = update_qpd_vi_tonga; - asic_ops->init_sdma_vm = init_sdma_vm_tonga; -} - - void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops) { @@ -70,6 +57,14 @@ void device_queue_manager_init_vi( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; + asic_ops->update_qpd = update_qpd_vi_tonga; + asic_ops->init_sdma_vm = init_sdma_vm_tonga; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index fc41689..c3744d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -115,7 +115,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) pr_debug("doorbell aperture size == 0x%08lX\n", kfd->shared_resources.doorbell_aperture_size); - pr_debug("doorbell kernel address == 0x%p\n", kfd->doorbell_kernel_ptr); + pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); return 0; } @@ -189,7 +189,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" - " kernel address == 0x%p\n", + " kernel address == %p\n", *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); return kfd->doorbell_kernel_ptr + inx; @@ -210,7 +210,7 @@ void write_kernel_doorbell(void __iomem *db, u32 value) { if (db) { writel(value, db); - pr_debug("Writing %d to doorbell address 0x%p\n", value, db); + pr_debug("Writing %d to doorbell address %p\n", value, db); } } @@ -220,14 +220,10 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) WARN(((unsigned long)db & 7) != 0, "Unaligned 64-bit doorbell"); writeq(value, (u64 __iomem *)db); - pr_debug("writing %llu to doorbell address 0x%p\n", value, db); + pr_debug("writing %llu to doorbell address %p\n", value, db); } } -/* - * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 - * to doorbells with the process's doorbell page - */ unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id) @@ -239,7 +235,8 @@ unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + - process->doorbell_index * (kfd_doorbell_process_slice(kfd)/sizeof(u32)) + + process->doorbell_index + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 24d8a21..1dc1584 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -390,7 +390,11 @@ static void set_event(struct kfd_event *ev) { struct kfd_event_waiter *waiter; - /* Auto reset if the list is non-empty and we're waking someone. */ + /* Auto reset if the list is non-empty and we're waking + * someone. waitqueue_active is safe here because we're + * protected by the p->event_mutex, which is also held when + * updating the wait queues in kfd_wait_on_events. + */ ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); list_for_each_entry(waiter, &ev->wq.head, wait.entry) @@ -777,7 +781,6 @@ int kfd_wait_on_events(struct kfd_process *p, int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) { - unsigned long pfn; struct kfd_signal_page *page; int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 5672710..0cae2e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -289,7 +289,6 @@ #define MAKE_LDS_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x0) - #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) @@ -323,7 +322,7 @@ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, return 0; } -void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) +static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) { /* * node id couldn't be 0 - the three MSB bits of @@ -353,7 +352,7 @@ void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); } -void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) +static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) { pdd->lds_base = MAKE_LDS_APP_BASE_V9(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); @@ -388,10 +387,10 @@ int kfd_init_apertures(struct kfd_process *process) pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); - return -1; + return -ENOMEM; } /* - * For 64 bit process aperture will be statically reserved in + * For 64 bit process apertures will be statically reserved in * the x86_64 non canonical process address space * amdkfd doesn't currently support apertures for 32 bit process */ @@ -415,8 +414,9 @@ int kfd_init_apertures(struct kfd_process *process) kfd_init_apertures_v9(pdd, id); break; default: - pr_err("Unknown chip in kfd_init_apertures\n"); - return -1; + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + return -EINVAL; } if (!dev->device_info->needs_iommu_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 728aaad..5217e51 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 5b798f9..7a61f38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -75,7 +75,8 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) } if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", + dev_err(kfd_device, + "error required iommu flags ats %i, pri %i, pasid %i\n", (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c index b48c29f..19e54ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -22,8 +22,6 @@ */ #include "kfd_kernel_queue.h" -#include "kfd_pm4_headers.h" -#include "kfd_pm4_opcodes.h" static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 6724b1a..684a3bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -44,7 +44,7 @@ static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, int retval; retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); - if (retval != 0) + if (retval) return false; kq->eop_gpu_addr = kq->eop_mem->gpu_addr; @@ -126,7 +126,6 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, concurrent_proc_cnt = min(pm->dqm->processes_count, kfd->max_proc_per_quantum); - packet = (struct pm4_mes_runlist *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_runlist)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index 357478f..bf20c6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -67,12 +67,25 @@ static void submit_packet_vi(struct kernel_queue *kq) kq->pending_wptr); } -static int pm_map_process_vi(struct packet_manager *pm, - uint32_t *buffer, struct qcm_process_device *qpd) +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32All = 0; + header.opcode = opcode; + header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +} + +static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) { struct pm4_mes_map_process *packet; packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, @@ -99,27 +112,16 @@ static int pm_map_process_vi(struct packet_manager *pm, return 0; } - -unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) -{ - union PM4_MES_TYPE_3_HEADER header; - - header.u32All = 0; - header.opcode = opcode; - header.count = packet_size / 4 - 2; - header.type = PM4_TYPE_3; - - return header.u32All; -} - static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { struct pm4_mes_runlist *packet; - int concurrent_proc_cnt = 0; struct kfd_dev *kfd = pm->dqm->dev; + if (WARN_ON(!ib)) + return -EFAULT; + /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number @@ -132,7 +134,6 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, concurrent_proc_cnt = min(pm->dqm->processes_count, kfd->max_proc_per_quantum); - packet = (struct pm4_mes_runlist *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_runlist)); @@ -150,6 +151,34 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { @@ -209,34 +238,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, - struct scheduling_resources *res) -{ - struct pm4_mes_set_resources *packet; - - packet = (struct pm4_mes_set_resources *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); - - packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); - - packet->bitfields2.queue_type = - queue_type__mes_set_resources__hsa_interface_queue_hiq; - packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; - packet->bitfields7.oac_mask = res->oac_mask; - packet->bitfields8.gds_heap_base = res->gds_heap_base; - packet->bitfields8.gds_heap_size = res->gds_heap_size; - - packet->gws_mask_lo = lower_32_bits(res->gws_mask); - packet->gws_mask_hi = upper_32_bits(res->gws_mask); - - packet->queue_mask_lo = lower_32_bits(res->queue_mask); - packet->queue_mask_hi = upper_32_bits(res->queue_mask); - - return 0; -} - static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, enum kfd_queue_type type, enum kfd_unmap_queues_filter filter, @@ -310,7 +311,6 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, packet = (struct pm4_mes_query_status *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_query_status)); - packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, sizeof(struct pm4_mes_query_status)); @@ -328,16 +328,15 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } - static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) { struct pm4_mec_release_mem *packet; packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + memset(buffer, 0, sizeof(*packet)); packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, - sizeof(struct pm4_mec_release_mem)); + sizeof(*packet)); packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index d556779..cc2c3fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -217,8 +217,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", m->cp_hqd_pq_doorbell_control); - m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | - 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + m->cp_hqd_ib_control = + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; /* * HW does not clamp this field correctly. Maximum EOP queue size @@ -243,8 +244,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; - m->cp_hqd_pq_doorbell_control |= - 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; } if (priv_cp_queues) m->cp_hqd_pq_control |= diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index c537f37..e3ae2d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -544,4 +544,3 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_tonga; return mqd; } - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index cd380ad..c317feb4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,7 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -45,8 +44,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int process_count, queue_count, compute_queue_count; unsigned int map_queue_size; unsigned int max_proc_per_quantum = 1; - - struct kfd_dev *dev = pm->dqm->dev; + struct kfd_dev *dev = pm->dqm->dev; process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; @@ -57,14 +55,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; if (dev->max_proc_per_quantum > 1) max_proc_per_quantum = dev->max_proc_per_quantum; if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_queues_num(pm->dqm)) { + compute_queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } @@ -193,6 +190,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, &rl_buffer[rl_wptr], q, qpd->is_debug); + if (retval) return retval; @@ -301,8 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = pm->pmf->runlist_size / - sizeof(uint32_t); + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, @@ -311,7 +308,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) goto fail_acquire_packet_buffer; retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, - rl_ib_size / sizeof(uint32_t), false); + rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bb05e95..fffdec6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -30,13 +30,13 @@ #include #include #include -#include #include -#include -#include +#include #include #include #include +#include +#include #include #include "amd_shared.h" @@ -81,7 +81,6 @@ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0 - /* Macro for allocating structures */ #define kfd_alloc_struct(ptr_to_struct) \ ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) @@ -114,14 +113,14 @@ extern int max_num_of_queues_per_device; /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; -extern int cwsr_enable; - /* * Kernel module parameter to specify the maximum process * number per HW scheduler */ extern int hws_max_conc_proc; +extern int cwsr_enable; + /* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception @@ -442,7 +441,11 @@ enum KFD_QUEUE_PRIORITY { * @is_interop: Defines if this is a interop queue. Interop queue means that * the queue can access both graphics and compute resources. * - * @is_active: Defines if the queue is active or not. + * @is_evicted: Defines if the queue is evicted. Only active queues + * are evicted, rendering them inactive. + * + * @is_active: Defines if the queue is active or not. @is_active and + * @is_evicted are protected by the DQM lock. * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. @@ -464,7 +467,7 @@ struct queue_properties { void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; - bool is_evicted; /* true -> queue is evicted */ + bool is_evicted; bool is_active; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; @@ -583,7 +586,6 @@ struct qcm_process_device { struct list_head priv_queue_list; unsigned int queue_count; - /* a data field only meaningful for non-HWS case */ unsigned int vmid; bool is_debug; unsigned int evicted; /* eviction counter, 0=active */ @@ -614,11 +616,11 @@ struct qcm_process_device { uint64_t tma_addr; /* IB memory */ - uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ + uint64_t ib_base; void *ib_kaddr; /*doorbell resources per process per device*/ - unsigned long *doorbell_bitmap; + unsigned long *doorbell_bitmap; }; /* KFD Memory Eviction */ @@ -756,7 +758,7 @@ struct kfd_process { struct rb_root_cached bo_interval_tree; /* Information used for memory eviction */ - void *process_info; + void *kgd_process_info; /* Eviction fence that is attached to all the BOs of this process. The * fence will be triggered during eviction and new one will be created * during restore @@ -799,7 +801,7 @@ struct amdkfd_ioctl_desc { int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(struct file *filep); -struct kfd_process *kfd_get_process(const struct task_struct *task); +struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); @@ -811,7 +813,7 @@ int kfd_resume_all_processes(void); int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct file *drm_file); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, - struct kfd_process *p); + struct kfd_process *p); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, @@ -859,7 +861,7 @@ void kfd_pasid_free(unsigned int pasid); size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); -int kfd_doorbell_mmap(struct kfd_dev *kfd, struct kfd_process *process, +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); @@ -982,8 +984,6 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) -struct packet_manager_func; - struct packet_manager { struct device_queue_manager *dqm; struct kernel_queue *priv_queue; @@ -996,7 +996,7 @@ struct packet_manager { }; struct packet_manager_funcs { - /* Support different firmware versions for PM4 packets */ + /* Support ASIC-specific packet formats for PM4 packets */ int (*map_process)(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd); int (*runlist)(struct packet_manager *pm, uint32_t *buffer, @@ -1042,7 +1042,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -/* Following PM funcs can be shared among CIK and VI */ +/* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, struct scheduling_resources *res); @@ -1089,8 +1089,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); bool kfd_is_locked(void); -#define KFD_SCRATCH_KV_FW_VER 413 - /* PeerDirect support */ void kfd_init_peer_direct(void); void kfd_close_peer_direct(void); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7a28c21..9477e50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "kfd_ipc.h" @@ -184,8 +185,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, /* kfd_process_device_reserve_ib_mem - Reserve memory inside the * process for IB usage The memory reserved is for KFD to submit * IB to AMDGPU from kernel. If the memory is reserved - * successfully, ib_kaddr_assigned will have the CPU/kernel - * address. Check ib_kaddr_assigned before accessing the memory. + * successfully, ib_kaddr will have the CPU/kernel + * address. Check ib_kaddr before accessing the memory. */ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) { @@ -212,7 +213,6 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; - struct task_struct *thread = current; if (!thread->mm) @@ -348,7 +348,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) { - /* Destroy the GPUVM VM context */ + pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", + pdd->dev->id, p->pasid); + if (pdd->drm_file) fput(pdd->drm_file); else if (pdd->vm) @@ -401,9 +403,6 @@ static void kfd_process_ref_release(struct kref *ref) { struct kfd_process *p = container_of(ref, struct kfd_process, ref); - if (WARN_ON(!kfd_process_wq)) - return; - INIT_WORK(&p->release_work, kfd_process_wq_release); queue_work(kfd_process_wq, &p->release_work); } @@ -486,9 +485,9 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) << PAGE_SHIFT; - qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, - KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, - MAP_SHARED, offset); + qpd->tba_addr = (int64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); if (IS_ERR_VALUE(qpd->tba_addr)) { int err = qpd->tba_addr; @@ -725,10 +724,11 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, if (drm_file) ret = dev->kfd2kgd->acquire_process_vm( - dev->kgd, drm_file, &pdd->vm, &p->process_info, &p->ef); + dev->kgd, drm_file, + &pdd->vm, &p->kgd_process_info, &p->ef); else ret = dev->kfd2kgd->create_process_vm( - dev->kgd, &pdd->vm, &p->process_info, &p->ef); + dev->kgd, &pdd->vm, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -942,42 +942,6 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) return ret_p; } -void kfd_suspend_all_processes(void) -{ - struct kfd_process *p; - unsigned int temp; - int idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - cancel_delayed_work_sync(&p->eviction_work); - cancel_delayed_work_sync(&p->restore_work); - - if (kfd_process_evict_queues(p)) - pr_err("Failed to suspend process %d\n", p->pasid); - dma_fence_signal(p->ef); - dma_fence_put(p->ef); - p->ef = NULL; - } - srcu_read_unlock(&kfd_processes_srcu, idx); -} - -int kfd_resume_all_processes(void) -{ - struct kfd_process *p; - unsigned int temp; - int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { - pr_err("Restore process %d failed during resume\n", - p->pasid); - ret = -EFAULT; - } - } - srcu_read_unlock(&kfd_processes_srcu, idx); - return ret; -} - /* This increments the process->ref counter. */ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) { @@ -1069,15 +1033,14 @@ static void evict_process_worker(struct work_struct *work) "Eviction fence mismatch\n"); /* Narrow window of overlap between restore and evict work - * item is possible. Once - * amdgpu_amdkfd_gpuvm_restore_process_bos unreserves KFD BOs, - * it is possible to evicted again. But restore has few more - * steps of finish. So lets wait for any previous restore work - * to complete + * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos + * unreserves KFD BOs, it is possible to evicted again. But + * restore has few more steps of finish. So lets wait for any + * previous restore work to complete */ flush_delayed_work(&p->restore_work); - pr_info("Started evicting process of pasid %d\n", p->pasid); + pr_info("Started evicting pasid %d\n", p->pasid); ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); @@ -1086,10 +1049,9 @@ static void evict_process_worker(struct work_struct *work) queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - pr_info("Finished evicting process of pasid %d\n", p->pasid); + pr_info("Finished evicting pasid %d\n", p->pasid); } else - pr_err("Failed to quiesce user queues. Cannot evict pasid %d\n", - p->pasid); + pr_err("Failed to evict queues of pasid %d\n", p->pasid); } static void restore_process_worker(struct work_struct *work) @@ -1115,7 +1077,7 @@ static void restore_process_worker(struct work_struct *work) struct kfd_process_device, per_device_list); - pr_info("Started restoring process of pasid %d\n", p->pasid); + pr_info("Started restoring pasid %d\n", p->pasid); /* Setting last_restore_timestamp before successful restoration. * Otherwise this would have to be set by KGD (restore_process_bos) @@ -1128,10 +1090,11 @@ static void restore_process_worker(struct work_struct *work) */ p->last_restore_timestamp = get_jiffies_64(); - ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info, &p->ef); + ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, + &p->ef); if (ret) { - pr_info("Restore failed, try again after %d ms\n", - PROCESS_BACK_OFF_TIME_MS); + pr_info("Failed to restore BOs of pasid %d, retry after %d ms\n", + p->pasid, PROCESS_BACK_OFF_TIME_MS); ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); WARN(!ret, "reschedule restore work failed\n"); @@ -1139,10 +1102,46 @@ static void restore_process_worker(struct work_struct *work) } ret = kfd_process_restore_queues(p); - if (ret) - pr_err("Failed to resume user queues\n"); + if (!ret) + pr_info("Finished restoring pasid %d\n", p->pasid); + else + pr_err("Failed to restore queues of pasid %d\n", p->pasid); +} + +void kfd_suspend_all_processes(void) +{ + struct kfd_process *p; + unsigned int temp; + int idx = srcu_read_lock(&kfd_processes_srcu); - pr_info("Finished restoring process of pasid %d\n", p->pasid); + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + + if (kfd_process_evict_queues(p)) + pr_err("Failed to suspend process %d\n", p->pasid); + dma_fence_signal(p->ef); + dma_fence_put(p->ef); + p->ef = NULL; + } + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +int kfd_resume_all_processes(void) +{ + struct kfd_process *p; + unsigned int temp; + int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { + pr_err("Restore process %d failed during resume\n", + p->pasid); + ret = -EFAULT; + } + } + srcu_read_unlock(&kfd_processes_srcu, idx); + return ret; } int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, @@ -1176,7 +1175,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); } - void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; @@ -1211,7 +1209,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) r = pqm_debugfs_mqds(m, &p->pqm); mutex_unlock(&p->mutex); - if (r != 0) + if (r) break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c950149..e18ed45 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -188,7 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_SDMA: if (dev->dqm->sdma_queue_count >= get_num_sdma_queues(dev->dqm)) { - pr_debug("Over-subscription is not allowed for SDMA\n"); + pr_debug("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; } @@ -206,7 +206,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ if ((dev->dqm->sched_policy == - KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_debug("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 320c8d3..82cff10 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -196,6 +196,7 @@ struct kfd_topology_device *kfd_create_topology_device( return dev; } + #define sysfs_show_gen_prop(buffer, fmt, ...) \ snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) #define sysfs_show_32bit_prop(buffer, name, value) \ @@ -739,7 +740,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, } /* All hardware blocks have the same number of attributes. */ - num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + num_attrs = ARRAY_SIZE(perf_attr_iommu); list_for_each_entry(perf, &dev->perf_props, list) { perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) * num_attrs + sizeof(struct attribute_group), @@ -890,7 +891,8 @@ static void kfd_debug_print_topology(void) up_read(&topology_lock); } -/* Helper function for intializing platform_xx members of kfd_system_properties +/* Helper function for intializing platform_xx members of + * kfd_system_properties. Uses OEM info from the last CPU/APU node. */ static void kfd_update_system_properties(void) { @@ -1013,13 +1015,12 @@ int kfd_topology_init(void) */ #ifdef CONFIG_ACPI ret = kfd_create_crat_image_acpi(&crat_image, &image_size); - if (ret == 0) { + if (!ret) { ret = kfd_parse_crat_table(crat_image, &temp_topology_device_list, proximity_domain); if (ret || - kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { - + kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { kfd_release_topology_device_list( &temp_topology_device_list); kfd_destroy_crat_image(crat_image); @@ -1029,8 +1030,8 @@ int kfd_topology_init(void) #endif if (!crat_image) { ret = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_CPU, NULL, - proximity_domain); + COMPUTE_UNIT_CPU, NULL, + proximity_domain); cpu_only_node = 1; if (ret) { pr_err("Error creating VCRAT table for CPU\n"); @@ -1038,8 +1039,8 @@ int kfd_topology_init(void) } ret = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, - proximity_domain); + &temp_topology_device_list, + proximity_domain); if (ret) { pr_err("Error parsing VCRAT table for CPU\n"); goto err; @@ -1051,12 +1052,12 @@ int kfd_topology_init(void) down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, - &topology_device_list); + &topology_device_list); atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); ret = kfd_topology_update_sysfs(); up_write(&topology_lock); - if (ret == 0) { + if (!ret) { sys_props.generation_count++; kfd_update_system_properties(); kfd_debug_print_topology(); @@ -1144,7 +1145,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) break; } up_write(&topology_lock); - return out_dev; } @@ -1212,8 +1212,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - proximity_domain = atomic_inc_return(& - topology_crat_proximity_domain); + proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); /* Check to see if this gpu device exists in the topology_device_list. * If so, assign the gpu to that device, @@ -1224,15 +1223,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev = kfd_assign_gpu(gpu); if (!dev) { res = kfd_create_crat_image_virtual(&crat_image, &image_size, - COMPUTE_UNIT_GPU, - gpu, proximity_domain); + COMPUTE_UNIT_GPU, gpu, + proximity_domain); if (res) { pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", gpu_id); return res; } res = kfd_parse_crat_table(crat_image, - &temp_topology_device_list, proximity_domain); + &temp_topology_device_list, + proximity_domain); if (res) { pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", gpu_id); @@ -1249,14 +1249,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu) res = kfd_topology_update_sysfs(); up_write(&topology_lock); - if (res == 0) + if (!res) sys_props.generation_count++; else pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", gpu_id, res); dev = kfd_assign_gpu(gpu); - if (!dev) { - pr_err("Could not assign GPU\n"); + if (WARN_ON(!dev)) { res = -ENODEV; goto err; } @@ -1315,14 +1314,15 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; default: - BUG(); + WARN(1, "Unexpected ASIC family %u", + dev->gpu->device_info->asic_family); } /* Fix errors in CZ CRAT. - * simd_count: Carrizo CRAT reports wrong simd_count, probably because - * it doesn't consider masked out CUs - * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd. - * capability flag: Carrizo CRAT doesn't report IOMMU flags. + * simd_count: Carrizo CRAT reports wrong simd_count, probably + * because it doesn't consider masked out CUs + * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd + * capability flag: Carrizo CRAT doesn't report IOMMU flags */ if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { dev->node_props.simd_count = @@ -1362,7 +1362,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) up_write(&topology_lock); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 0); return res; @@ -1403,7 +1403,7 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) { int first_cpu_of_numa_node; - if (!cpumask || (cpumask == cpu_none_mask)) + if (!cpumask || cpumask == cpu_none_mask) return -1; first_cpu_of_numa_node = cpumask_first(cpumask); if (first_cpu_of_numa_node >= nr_cpu_ids) @@ -1446,7 +1446,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); r = dqm_debugfs_hqds(m, dev->gpu->dqm); - if (r != 0) + if (r) break; } @@ -1471,7 +1471,7 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); - if (r != 0) + if (r) break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 4c518fe8..2b36baf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -46,9 +46,6 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 #define HSA_CAP_DOORBELL_TYPE_2_0 0x2 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { @@ -169,9 +166,9 @@ struct kfd_topology_device { struct attribute attr_gpuid; struct attribute attr_name; struct attribute attr_props; - uint8_t oem_id[CRAT_OEMID_LENGTH]; - uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; - uint32_t oem_revision; + uint8_t oem_id[CRAT_OEMID_LENGTH]; + uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; + uint32_t oem_revision; }; struct kfd_system_properties { diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 011c14c..0bc0b25 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), -- 2.7.4