From 15f201d47e55ad25961e051e0c762c7621a97afe Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sat, 26 Aug 2017 02:00:57 -0400 Subject: [PATCH 2121/4131] drm/amdkfd: Separate doorbell allocation from PASID PASID management is moving into KGD. Limiting the PASID range to the number of doorbell pages is no longer practical. Change-Id: I1def062f821b67e8cae608f34a2cab11e4a893e3 Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Reviewed-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 ----- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 48 ++++++++++++++++++++----------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 10 +++---- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 +++++ 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6f5f93c..30ea84b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -417,13 +417,6 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) pasid_limit = min_t(unsigned int, (unsigned int)(1 << kfd->device_info->max_pasid_bits), iommu_info.max_pasids); - /* - * last pasid is used for kernel queues doorbells - * in the future the last pasid might be used for a kernel thread. - */ - pasid_limit = min_t(unsigned int, - pasid_limit, - kfd->doorbell_process_limit - 1); if (!kfd_set_pasid_limit(pasid_limit)) { dev_err(kfd_device, "error setting pasid limit\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 008d258..a1ff443 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -24,16 +24,15 @@ #include #include #include +#include /* - * This extension supports a kernel level doorbells management for - * the kernel queues. - * Basically the last doorbells page is devoted to kernel queues - * and that's assures that any user process won't get access to the - * kernel doorbells page + * This extension supports a kernel level doorbells management for the + * kernel queues using the first doorbell page reserved for the kernel. */ -#define KERNEL_DOORBELL_PASID 1 +static DEFINE_IDA(doorbell_ida); +static unsigned int max_doorbell_slices; /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that @@ -83,13 +82,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) (doorbell_aperture_size - doorbell_start_offset) / kfd_doorbell_process_slice(kfd); else - doorbell_process_limit = 0; + return -ENOSPC; + + if (!max_doorbell_slices || + doorbell_process_limit < max_doorbell_slices) + max_doorbell_slices = doorbell_process_limit; kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); - kfd->doorbell_process_limit = doorbell_process_limit - 1; kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, kfd_doorbell_process_slice(kfd)); @@ -181,12 +183,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, inx *= kfd->device_info->doorbell_size / sizeof(u32); /* - * Calculating the kernel doorbell offset using "faked" kernel - * pasid that allocated for kernel queues only. Offset is in - * dword units regardless of the ASIC-dependent doorbell size. + * Calculating the kernel doorbell offset using the first + * doorbell page. */ - *doorbell_off = KERNEL_DOORBELL_PASID * - (kfd_doorbell_process_slice(kfd) / sizeof(u32)) + inx; + *doorbell_off = kfd->doorbell_id_offset + inx; pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -235,12 +235,12 @@ unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, { /* * doorbell_id_offset accounts for doorbells taken by KGD. - * pasid * kfd_doorbell_process_slice/sizeof(u32) adjusts to + * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to * the process's doorbells. The offset returned is in dword * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + - process->pasid * (kfd_doorbell_process_slice(kfd)/sizeof(u32)) + + process->doorbell_index * (kfd_doorbell_process_slice(kfd)/sizeof(u32)) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } @@ -258,5 +258,21 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process) { return dev->doorbell_base + - process->pasid * kfd_doorbell_process_slice(dev); + process->doorbell_index * kfd_doorbell_process_slice(dev); +} + +int kfd_alloc_process_doorbells(struct kfd_process *process) +{ + int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices, + GFP_KERNEL); + if (r > 0) + process->doorbell_index = r; + + return r; +} + +void kfd_free_process_doorbells(struct kfd_process *process) +{ + if (process->doorbell_index) + ida_simple_remove(&doorbell_ida, process->doorbell_index); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6e9578e..aa07780 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -233,9 +233,6 @@ struct kfd_dev { * to HW doorbell, GFX reserved some * at the start) */ - size_t doorbell_process_limit; /* Number of processes we have doorbell - * space for. - */ u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells * page used by kernel queue */ @@ -700,6 +697,7 @@ struct kfd_process { struct rcu_head rcu; unsigned int pasid; + unsigned int doorbell_index; /* * List of kfd_process_device structures, @@ -831,6 +829,10 @@ void write_kernel_doorbell64(void __iomem *db, u64 value); unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id); +phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, + struct kfd_process *process); +int kfd_alloc_process_doorbells(struct kfd_process *process); +void kfd_free_process_doorbells(struct kfd_process *process); /* GTT Sub-Allocator */ @@ -1024,8 +1026,6 @@ void kfd_pm_func_init_v9(struct packet_manager *pm, uint16_t fw_ver); uint64_t kfd_get_number_elems(struct kfd_dev *kfd); -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, - struct kfd_process *process); int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout_ms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fab1d59..6bf4ecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -414,6 +414,7 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_event_free_process(p); kfd_pasid_free(p->pasid); + kfd_free_process_doorbells(p); mutex_destroy(&p->mutex); @@ -590,7 +591,11 @@ static struct kfd_process *create_process(const struct task_struct *thread, if (process->pasid == 0) goto err_alloc_pasid; + if (kfd_alloc_process_doorbells(process) < 0) + goto err_alloc_doorbells; + kref_init(&process->ref); + mutex_init(&process->mutex); process->mm = thread->mm; @@ -651,6 +656,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); err_mmu_notifier: mutex_destroy(&process->mutex); + kfd_free_process_doorbells(process); +err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: kfree(process); -- 2.7.4