diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch | 573 |
1 files changed, 0 insertions, 573 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch deleted file mode 100644 index 4b444798..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch +++ /dev/null @@ -1,573 +0,0 @@ -From 35ed769b72218c7f3bcc4d36de2ca3ee68be1eb7 Mon Sep 17 00:00:00 2001 -From: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com> -Date: Tue, 26 Apr 2016 15:23:28 -0400 -Subject: [PATCH 1132/4131] drm/amdkfd: Automatically detect and enable - PeerDirect support v3 - -Normally PeerDirect is delivered as part of OFED package. Accordingly -we are not able to rely on the fact that PeerDirect is present -in the system. To improve user experience and to be "PeerDirect" --ready the logic implemented to detect PeerDirect interface presence -and creating PeerDirect "bridge". - -v2: Switch to use "symbol_request"/"symbol_put" instead of relying on -kallsyms support. Use PAGE_SIZE macro instead of hardcoded value. -Fix grammar errors. Update comments. -v3: Fixed errors in kfd_close_peer_direct() function - -Change-Id: I49ea6b0a1d80d7189eef4a16e15e4b7237b00168 -Signed-off-by: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com> ---- - drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- - drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 + - drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 488 ++++++++++++++++++++++++++++ - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 + - 4 files changed, 499 insertions(+), 1 deletion(-) - create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c - -diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile -index b5d3db9..59d284e 100644 ---- a/drivers/gpu/drm/amd/amdkfd/Makefile -+++ b/drivers/gpu/drm/amd/amdkfd/Makefile -@@ -15,6 +15,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ - kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ -- kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o -+ kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o \ -+ kfd_peerdirect.o - - obj-$(CONFIG_HSA_AMD) += amdkfd.o -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c -index bb0b00d..3109273 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c -@@ -137,6 +137,9 @@ static int __init kfd_module_init(void) - - amdkfd_init_completed = 1; - -+ if (!kfd_init_peer_direct()) -+ pr_info("PeerDirect support was enabled\n"); -+ - dev_info(kfd_device, "Initialized module\n"); - - return 0; -@@ -153,6 +156,7 @@ static void __exit kfd_module_exit(void) - { - amdkfd_init_completed = 0; - -+ kfd_close_peer_direct(); - kfd_process_destroy_wq(); - kfd_topology_shutdown(); - kfd_chardev_exit(); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c -new file mode 100644 -index 0000000..ffbccb3 ---- /dev/null -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c -@@ -0,0 +1,488 @@ -+/* -+ * Copyright 2016 Advanced Micro Devices, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -+ * OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+ -+/* NOTE: -+ * -+ * This file contains logic to dynamically detect and enable PeerDirect -+ * suppor. PeerDirect support is delivered e.g. as part of OFED -+ * from Mellanox. Because we are not able to rely on the fact that the -+ * corresponding OFED will be installed we should: -+ * - copy PeerDirect definitions locally to avoid dependency on -+ * corresponding header file -+ * - try dynamically detect address of PeerDirect function -+ * pointers. -+ * -+ * If dynamic detection failed then PeerDirect support should be -+ * enabled using the standard PeerDirect bridge driver from: -+ * https://github.com/RadeonOpenCompute/ROCnRDMA -+ * -+ * -+ * Logic to support PeerDirect relies only on official public API to be -+ * non-intrusive as much as possible. -+ * -+ **/ -+ -+#include <linux/device.h> -+#include <linux/export.h> -+#include <linux/pid.h> -+#include <linux/err.h> -+#include <linux/slab.h> -+#include <linux/scatterlist.h> -+#include <linux/module.h> -+#include "amd_rdma.h" -+ -+ -+ -+/* ----------------------- PeerDirect interface ------------------------------*/ -+ -+/* -+ * Copyright (c) 2013, Mellanox Technologies. All rights reserved. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ */ -+#define IB_PEER_MEMORY_NAME_MAX 64 -+#define IB_PEER_MEMORY_VER_MAX 16 -+ -+struct peer_memory_client { -+ char name[IB_PEER_MEMORY_NAME_MAX]; -+ char version[IB_PEER_MEMORY_VER_MAX]; -+ /* acquire return code: 1-mine, 0-not mine */ -+ int (*acquire)(unsigned long addr, size_t size, -+ void *peer_mem_private_data, -+ char *peer_mem_name, -+ void **client_context); -+ int (*get_pages)(unsigned long addr, -+ size_t size, int write, int force, -+ struct sg_table *sg_head, -+ void *client_context, void *core_context); -+ int (*dma_map)(struct sg_table *sg_head, void *client_context, -+ struct device *dma_device, int dmasync, int *nmap); -+ int (*dma_unmap)(struct sg_table *sg_head, void *client_context, -+ struct device *dma_device); -+ void (*put_pages)(struct sg_table *sg_head, void *client_context); -+ unsigned long (*get_page_size)(void *client_context); -+ void (*release)(void *client_context); -+ -+}; -+ -+typedef int (*invalidate_peer_memory)(void *reg_handle, -+ void *core_context); -+ -+void *ib_register_peer_memory_client(struct peer_memory_client *peer_client, -+ invalidate_peer_memory *invalidate_callback); -+void ib_unregister_peer_memory_client(void *reg_handle); -+ -+ -+/*------------------- PeerDirect bridge driver ------------------------------*/ -+ -+#define AMD_PEER_BRIDGE_DRIVER_VERSION "1.0" -+#define AMD_PEER_BRIDGE_DRIVER_NAME "amdkfd" -+ -+ -+static void* (*pfn_ib_register_peer_memory_client)(struct peer_memory_client -+ *peer_client, -+ invalidate_peer_memory -+ *invalidate_callback); -+ -+static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle); -+ -+static const struct amd_rdma_interface *rdma_interface; -+ -+static invalidate_peer_memory ib_invalidate_callback; -+static void *ib_reg_handle; -+ -+struct amd_mem_context { -+ uint64_t va; -+ uint64_t size; -+ struct pid *pid; -+ -+ struct amd_p2p_info *p2p_info; -+ -+ /* Flag that free callback was called */ -+ int free_callback_called; -+ -+ /* Context received from PeerDirect call */ -+ void *core_context; -+}; -+ -+ -+static void free_callback(void *client_priv) -+{ -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_priv; -+ -+ pr_debug("free_callback: data 0x%p\n", mem_context); -+ -+ if (!mem_context) { -+ pr_warn("free_callback: Invalid client context\n"); -+ return; -+ } -+ -+ pr_debug("mem_context->core_context 0x%p\n", mem_context->core_context); -+ -+ /* Call back IB stack asking to invalidate memory */ -+ (*ib_invalidate_callback) (ib_reg_handle, mem_context->core_context); -+ -+ /* amdkfd will free resources when we return from this callback. -+ * Set flag to inform that there is nothing to do on "put_pages", etc. -+ */ -+ ACCESS_ONCE(mem_context->free_callback_called) = 1; -+} -+ -+ -+static int amd_acquire(unsigned long addr, size_t size, -+ void *peer_mem_private_data, -+ char *peer_mem_name, void **client_context) -+{ -+ int ret; -+ struct amd_mem_context *mem_context; -+ struct pid *pid; -+ -+ /* Get pointer to structure describing current process */ -+ pid = get_task_pid(current, PIDTYPE_PID); -+ -+ pr_debug("acquire: addr:0x%lx,size:0x%x, pid 0x%p\n", -+ addr, (unsigned int)size, pid); -+ -+ /* Check if address is handled by AMD GPU driver */ -+ ret = rdma_interface->is_gpu_address(addr, pid); -+ -+ if (!ret) { -+ pr_debug("acquire: Not GPU Address\n"); -+ /* This is not GPU address */ -+ return 0; -+ } -+ -+ pr_debug("acquire: GPU address\n"); -+ -+ /* Initialize context used for operation with given address */ -+ mem_context = kzalloc(sizeof(struct amd_mem_context), GFP_KERNEL); -+ -+ if (!mem_context) -+ return 0; /* Error case handled as not GPU address */ -+ -+ mem_context->free_callback_called = 0; -+ mem_context->va = addr; -+ mem_context->size = size; -+ -+ /* Save PID. It is guaranteed that the function will be -+ * called in the correct process context as opposite to others. -+ */ -+ mem_context->pid = pid; -+ -+ pr_debug("acquire: Client context %p\n", mem_context); -+ -+ /* Return pointer to allocated context */ -+ *client_context = mem_context; -+ -+ /* Return 1 to inform that this address which will be handled -+ * by AMD GPU driver -+ */ -+ return 1; -+} -+ -+static int amd_get_pages(unsigned long addr, size_t size, int write, int force, -+ struct sg_table *sg_head, -+ void *client_context, void *core_context) -+{ -+ int ret; -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("get_pages: addr:0x%lx,size:0x%x, core_context:%p\n", -+ addr, (unsigned int)size, core_context); -+ -+ if (!mem_context) { -+ pr_warn("get_pages: Invalid client context"); -+ return -EINVAL; -+ } -+ -+ pr_debug("get_pages: pid :0x%p\n", mem_context->pid); -+ -+ -+ if (addr != mem_context->va) { -+ pr_warn("get_pages: Context address (0x%llx) is not the same\n", -+ mem_context->va); -+ return -EINVAL; -+ } -+ -+ if (size != mem_context->size) { -+ pr_warn("get_pages: Context size (0x%llx) is not the same\n", -+ mem_context->size); -+ return -EINVAL; -+ } -+ -+ ret = rdma_interface->get_pages(addr, -+ size, -+ mem_context->pid, -+ &mem_context->p2p_info, -+ free_callback, -+ mem_context); -+ -+ if (ret || !mem_context->p2p_info) { -+ pr_err("Could not rdma::get_pages failure: %d\n", ret); -+ return ret; -+ } -+ -+ mem_context->core_context = core_context; -+ -+ /* Note: At this stage it is OK not to fill sg_table */ -+ return 0; -+} -+ -+ -+static int amd_dma_map(struct sg_table *sg_head, void *client_context, -+ struct device *dma_device, int dmasync, int *nmap) -+{ -+ /* -+ * NOTE/TODO: -+ * We could have potentially three cases for real memory -+ * location: -+ * - all memory in the local -+ * - all memory in the system (RAM) -+ * - memory is spread (s/g) between local and system. -+ * -+ * In the case of all memory in the system we could use -+ * iommu driver to build DMA addresses but not in the case -+ * of local memory because currently iommu driver doesn't -+ * deal with local/device memory addresses (it requires "struct -+ * page"). -+ * -+ * Accordingly returning assumes that iommu funcutionality -+ * should be disabled so we can assume that sg_table already -+ * contains DMA addresses. -+ * -+ */ -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("dma_map: Context 0x%p, sg_head 0x%p\n", -+ client_context, sg_head); -+ -+ pr_debug("dma_map: pid 0x%p, address 0x%llx, size:0x%llx\n", -+ mem_context->pid, -+ mem_context->va, -+ mem_context->size); -+ -+ if (!mem_context->p2p_info) { -+ pr_err("dma_map: No sg table were allocated\n"); -+ return -EINVAL; -+ } -+ -+ /* Copy information about previosly allocated sg_table */ -+ *sg_head = *mem_context->p2p_info->pages; -+ -+ /* Return number of pages */ -+ *nmap = mem_context->p2p_info->pages->nents; -+ -+ return 0; -+} -+ -+static int amd_dma_unmap(struct sg_table *sg_head, void *client_context, -+ struct device *dma_device) -+{ -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("dma_unmap: Context 0x%p, sg_table 0x%p\n", -+ client_context, sg_head); -+ -+ pr_debug("dma_unmap: pid 0x%p, address 0x%llx, size:0x%llx\n", -+ mem_context->pid, -+ mem_context->va, -+ mem_context->size); -+ -+ /* Assume success */ -+ return 0; -+} -+static void amd_put_pages(struct sg_table *sg_head, void *client_context) -+{ -+ int ret = 0; -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("put_pages: sg_head %p client_context: 0x%p\n", -+ sg_head, client_context); -+ pr_debug("put_pages: pid 0x%p, address 0x%llx, size:0x%llx\n", -+ mem_context->pid, -+ mem_context->va, -+ mem_context->size); -+ -+ pr_debug("put_pages: mem_context->p2p_info %p\n", -+ mem_context->p2p_info); -+ -+ if (ACCESS_ONCE(mem_context->free_callback_called)) { -+ pr_debug("put_pages: free callback was called\n"); -+ return; -+ } -+ -+ if (mem_context->p2p_info) { -+ ret = rdma_interface->put_pages(&mem_context->p2p_info); -+ mem_context->p2p_info = NULL; -+ -+ if (ret) -+ pr_err("put_pages failure: %d (callback status %d)\n", -+ ret, mem_context->free_callback_called); -+ } else -+ pr_err("put_pages: Pointer to p2p info is null\n"); -+} -+static unsigned long amd_get_page_size(void *client_context) -+{ -+ unsigned long page_size; -+ int result; -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("get_page_size: context: %p\n", client_context); -+ pr_debug("get_page_size: pid 0x%p, address 0x%llx, size:0x%llx\n", -+ mem_context->pid, -+ mem_context->va, -+ mem_context->size); -+ -+ -+ result = rdma_interface->get_page_size( -+ mem_context->va, -+ mem_context->size, -+ mem_context->pid, -+ &page_size); -+ -+ if (result) { -+ pr_err("Could not get page size. %d\n", result); -+ /* If we failed to get page size then do not know what to do. -+ * Let's return some default value -+ */ -+ return PAGE_SIZE; -+ } -+ -+ return page_size; -+} -+ -+static void amd_release(void *client_context) -+{ -+ struct amd_mem_context *mem_context = -+ (struct amd_mem_context *)client_context; -+ -+ pr_debug("release: context: 0x%p\n", client_context); -+ pr_debug("release: pid 0x%p, address 0x%llx, size:0x%llx\n", -+ mem_context->pid, -+ mem_context->va, -+ mem_context->size); -+ -+ kfree(mem_context); -+} -+ -+ -+static struct peer_memory_client amd_mem_client = { -+ .acquire = amd_acquire, -+ .get_pages = amd_get_pages, -+ .dma_map = amd_dma_map, -+ .dma_unmap = amd_dma_unmap, -+ .put_pages = amd_put_pages, -+ .get_page_size = amd_get_page_size, -+ .release = amd_release, -+}; -+ -+ -+int kfd_init_peer_direct(void) -+{ -+ int result; -+ -+ pr_debug("Try to initialize PeerDirect support\n"); -+ -+ pfn_ib_register_peer_memory_client = -+ (void *(*)(struct peer_memory_client *, -+ invalidate_peer_memory *)) -+ symbol_request(ib_register_peer_memory_client); -+ -+ pfn_ib_unregister_peer_memory_client = (void (*)(void *)) -+ symbol_request(ib_unregister_peer_memory_client); -+ -+ if (!pfn_ib_register_peer_memory_client || -+ !pfn_ib_unregister_peer_memory_client) { -+ pr_warn("amdkfd: PeerDirect interface was not detected\n"); -+ return -EINVAL; -+ } -+ -+ result = amdkfd_query_rdma_interface(&rdma_interface); -+ -+ if (result < 0) { -+ pr_err("amdkfd: Cannot get RDMA Interface (result = %d)\n", -+ result); -+ return result; -+ } -+ -+ strcpy(amd_mem_client.name, AMD_PEER_BRIDGE_DRIVER_NAME); -+ strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION); -+ -+ ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client, -+ &ib_invalidate_callback); -+ -+ if (!ib_reg_handle) { -+ pr_err("amdkfd: Cannot register peer memory client\n"); -+ return -EINVAL; -+ } -+ -+ pr_info("amdkfd: PeerDirect support was initialized successfully\n"); -+ return 0; -+} -+ -+void kfd_close_peer_direct(void) -+{ -+ if (pfn_ib_unregister_peer_memory_client) { -+ if (ib_reg_handle) -+ pfn_ib_unregister_peer_memory_client(ib_reg_handle); -+ -+ symbol_put(ib_unregister_peer_memory_client); -+ } -+ -+ if (pfn_ib_register_peer_memory_client) -+ symbol_put(ib_register_peer_memory_client); -+ -+} -+ -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -index cfa2283..3ec7914 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -@@ -913,4 +913,9 @@ int restore(struct kfd_dev *kfd); - #define KFD_MULTI_PROC_MAPPING_HWS_SUPPORT 600 - #define KFD_CWSR_CZ_FW_VER 625 - -+/* PeerDirect support */ -+int kfd_init_peer_direct(void); -+void kfd_close_peer_direct(void); -+ -+ - #endif --- -2.7.4 - |