diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch | 573 |
1 files changed, 573 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch new file mode 100644 index 00000000..fb0f6fee --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch @@ -0,0 +1,573 @@ +From 05234bbc6870c6dc59dbf92e9d07e5ec1175451a Mon Sep 17 00:00:00 2001 +From: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com> +Date: Tue, 26 Apr 2016 15:23:28 -0400 +Subject: [PATCH 1424/4131] drm/amdkfd: Automatically detect and enable + PeerDirect support v3 + +Normally PeerDirect is delivered as part of OFED package. Accordingly +we are not able to rely on the fact that PeerDirect is present +in the system. To improve user experience and to be "PeerDirect" +-ready the logic implemented to detect PeerDirect interface presence +and creating PeerDirect "bridge". + +v2: Switch to use "symbol_request"/"symbol_put" instead of relying on +kallsyms support. Use PAGE_SIZE macro instead of hardcoded value. +Fix grammar errors. Update comments. +v3: Fixed errors in kfd_close_peer_direct() function + +Change-Id: I49ea6b0a1d80d7189eef4a16e15e4b7237b00168 +Signed-off-by: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 + + drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 488 ++++++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 + + 4 files changed, 499 insertions(+), 1 deletion(-) + create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c + +diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile +index 60c60c0..3d2acf9 100644 +--- a/drivers/gpu/drm/amd/amdkfd/Makefile ++++ b/drivers/gpu/drm/amd/amdkfd/Makefile +@@ -14,6 +14,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ + kfd_process_queue_manager.o kfd_device_queue_manager.o \ + kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ +- kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o ++ kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o \ ++ kfd_peerdirect.o + + obj-$(CONFIG_HSA_AMD) += amdkfd.o +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index bb0b00d..3109273 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -137,6 +137,9 @@ static int __init kfd_module_init(void) + + amdkfd_init_completed = 1; + ++ if (!kfd_init_peer_direct()) ++ pr_info("PeerDirect support was enabled\n"); ++ + dev_info(kfd_device, "Initialized module\n"); + + return 0; +@@ -153,6 +156,7 @@ static void __exit kfd_module_exit(void) + { + amdkfd_init_completed = 0; + ++ kfd_close_peer_direct(); + kfd_process_destroy_wq(); + kfd_topology_shutdown(); + kfd_chardev_exit(); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +new file mode 100644 +index 0000000..ffbccb3 +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +@@ -0,0 +1,488 @@ ++/* ++ * Copyright 2016 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++ ++/* NOTE: ++ * ++ * This file contains logic to dynamically detect and enable PeerDirect ++ * suppor. PeerDirect support is delivered e.g. as part of OFED ++ * from Mellanox. Because we are not able to rely on the fact that the ++ * corresponding OFED will be installed we should: ++ * - copy PeerDirect definitions locally to avoid dependency on ++ * corresponding header file ++ * - try dynamically detect address of PeerDirect function ++ * pointers. ++ * ++ * If dynamic detection failed then PeerDirect support should be ++ * enabled using the standard PeerDirect bridge driver from: ++ * https://github.com/RadeonOpenCompute/ROCnRDMA ++ * ++ * ++ * Logic to support PeerDirect relies only on official public API to be ++ * non-intrusive as much as possible. ++ * ++ **/ ++ ++#include <linux/device.h> ++#include <linux/export.h> ++#include <linux/pid.h> ++#include <linux/err.h> ++#include <linux/slab.h> ++#include <linux/scatterlist.h> ++#include <linux/module.h> ++#include "amd_rdma.h" ++ ++ ++ ++/* ----------------------- PeerDirect interface ------------------------------*/ ++ ++/* ++ * Copyright (c) 2013, Mellanox Technologies. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++#define IB_PEER_MEMORY_NAME_MAX 64 ++#define IB_PEER_MEMORY_VER_MAX 16 ++ ++struct peer_memory_client { ++ char name[IB_PEER_MEMORY_NAME_MAX]; ++ char version[IB_PEER_MEMORY_VER_MAX]; ++ /* acquire return code: 1-mine, 0-not mine */ ++ int (*acquire)(unsigned long addr, size_t size, ++ void *peer_mem_private_data, ++ char *peer_mem_name, ++ void **client_context); ++ int (*get_pages)(unsigned long addr, ++ size_t size, int write, int force, ++ struct sg_table *sg_head, ++ void *client_context, void *core_context); ++ int (*dma_map)(struct sg_table *sg_head, void *client_context, ++ struct device *dma_device, int dmasync, int *nmap); ++ int (*dma_unmap)(struct sg_table *sg_head, void *client_context, ++ struct device *dma_device); ++ void (*put_pages)(struct sg_table *sg_head, void *client_context); ++ unsigned long (*get_page_size)(void *client_context); ++ void (*release)(void *client_context); ++ ++}; ++ ++typedef int (*invalidate_peer_memory)(void *reg_handle, ++ void *core_context); ++ ++void *ib_register_peer_memory_client(struct peer_memory_client *peer_client, ++ invalidate_peer_memory *invalidate_callback); ++void ib_unregister_peer_memory_client(void *reg_handle); ++ ++ ++/*------------------- PeerDirect bridge driver ------------------------------*/ ++ ++#define AMD_PEER_BRIDGE_DRIVER_VERSION "1.0" ++#define AMD_PEER_BRIDGE_DRIVER_NAME "amdkfd" ++ ++ ++static void* (*pfn_ib_register_peer_memory_client)(struct peer_memory_client ++ *peer_client, ++ invalidate_peer_memory ++ *invalidate_callback); ++ ++static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle); ++ ++static const struct amd_rdma_interface *rdma_interface; ++ ++static invalidate_peer_memory ib_invalidate_callback; ++static void *ib_reg_handle; ++ ++struct amd_mem_context { ++ uint64_t va; ++ uint64_t size; ++ struct pid *pid; ++ ++ struct amd_p2p_info *p2p_info; ++ ++ /* Flag that free callback was called */ ++ int free_callback_called; ++ ++ /* Context received from PeerDirect call */ ++ void *core_context; ++}; ++ ++ ++static void free_callback(void *client_priv) ++{ ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_priv; ++ ++ pr_debug("free_callback: data 0x%p\n", mem_context); ++ ++ if (!mem_context) { ++ pr_warn("free_callback: Invalid client context\n"); ++ return; ++ } ++ ++ pr_debug("mem_context->core_context 0x%p\n", mem_context->core_context); ++ ++ /* Call back IB stack asking to invalidate memory */ ++ (*ib_invalidate_callback) (ib_reg_handle, mem_context->core_context); ++ ++ /* amdkfd will free resources when we return from this callback. ++ * Set flag to inform that there is nothing to do on "put_pages", etc. ++ */ ++ ACCESS_ONCE(mem_context->free_callback_called) = 1; ++} ++ ++ ++static int amd_acquire(unsigned long addr, size_t size, ++ void *peer_mem_private_data, ++ char *peer_mem_name, void **client_context) ++{ ++ int ret; ++ struct amd_mem_context *mem_context; ++ struct pid *pid; ++ ++ /* Get pointer to structure describing current process */ ++ pid = get_task_pid(current, PIDTYPE_PID); ++ ++ pr_debug("acquire: addr:0x%lx,size:0x%x, pid 0x%p\n", ++ addr, (unsigned int)size, pid); ++ ++ /* Check if address is handled by AMD GPU driver */ ++ ret = rdma_interface->is_gpu_address(addr, pid); ++ ++ if (!ret) { ++ pr_debug("acquire: Not GPU Address\n"); ++ /* This is not GPU address */ ++ return 0; ++ } ++ ++ pr_debug("acquire: GPU address\n"); ++ ++ /* Initialize context used for operation with given address */ ++ mem_context = kzalloc(sizeof(struct amd_mem_context), GFP_KERNEL); ++ ++ if (!mem_context) ++ return 0; /* Error case handled as not GPU address */ ++ ++ mem_context->free_callback_called = 0; ++ mem_context->va = addr; ++ mem_context->size = size; ++ ++ /* Save PID. It is guaranteed that the function will be ++ * called in the correct process context as opposite to others. ++ */ ++ mem_context->pid = pid; ++ ++ pr_debug("acquire: Client context %p\n", mem_context); ++ ++ /* Return pointer to allocated context */ ++ *client_context = mem_context; ++ ++ /* Return 1 to inform that this address which will be handled ++ * by AMD GPU driver ++ */ ++ return 1; ++} ++ ++static int amd_get_pages(unsigned long addr, size_t size, int write, int force, ++ struct sg_table *sg_head, ++ void *client_context, void *core_context) ++{ ++ int ret; ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("get_pages: addr:0x%lx,size:0x%x, core_context:%p\n", ++ addr, (unsigned int)size, core_context); ++ ++ if (!mem_context) { ++ pr_warn("get_pages: Invalid client context"); ++ return -EINVAL; ++ } ++ ++ pr_debug("get_pages: pid :0x%p\n", mem_context->pid); ++ ++ ++ if (addr != mem_context->va) { ++ pr_warn("get_pages: Context address (0x%llx) is not the same\n", ++ mem_context->va); ++ return -EINVAL; ++ } ++ ++ if (size != mem_context->size) { ++ pr_warn("get_pages: Context size (0x%llx) is not the same\n", ++ mem_context->size); ++ return -EINVAL; ++ } ++ ++ ret = rdma_interface->get_pages(addr, ++ size, ++ mem_context->pid, ++ &mem_context->p2p_info, ++ free_callback, ++ mem_context); ++ ++ if (ret || !mem_context->p2p_info) { ++ pr_err("Could not rdma::get_pages failure: %d\n", ret); ++ return ret; ++ } ++ ++ mem_context->core_context = core_context; ++ ++ /* Note: At this stage it is OK not to fill sg_table */ ++ return 0; ++} ++ ++ ++static int amd_dma_map(struct sg_table *sg_head, void *client_context, ++ struct device *dma_device, int dmasync, int *nmap) ++{ ++ /* ++ * NOTE/TODO: ++ * We could have potentially three cases for real memory ++ * location: ++ * - all memory in the local ++ * - all memory in the system (RAM) ++ * - memory is spread (s/g) between local and system. ++ * ++ * In the case of all memory in the system we could use ++ * iommu driver to build DMA addresses but not in the case ++ * of local memory because currently iommu driver doesn't ++ * deal with local/device memory addresses (it requires "struct ++ * page"). ++ * ++ * Accordingly returning assumes that iommu funcutionality ++ * should be disabled so we can assume that sg_table already ++ * contains DMA addresses. ++ * ++ */ ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("dma_map: Context 0x%p, sg_head 0x%p\n", ++ client_context, sg_head); ++ ++ pr_debug("dma_map: pid 0x%p, address 0x%llx, size:0x%llx\n", ++ mem_context->pid, ++ mem_context->va, ++ mem_context->size); ++ ++ if (!mem_context->p2p_info) { ++ pr_err("dma_map: No sg table were allocated\n"); ++ return -EINVAL; ++ } ++ ++ /* Copy information about previosly allocated sg_table */ ++ *sg_head = *mem_context->p2p_info->pages; ++ ++ /* Return number of pages */ ++ *nmap = mem_context->p2p_info->pages->nents; ++ ++ return 0; ++} ++ ++static int amd_dma_unmap(struct sg_table *sg_head, void *client_context, ++ struct device *dma_device) ++{ ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("dma_unmap: Context 0x%p, sg_table 0x%p\n", ++ client_context, sg_head); ++ ++ pr_debug("dma_unmap: pid 0x%p, address 0x%llx, size:0x%llx\n", ++ mem_context->pid, ++ mem_context->va, ++ mem_context->size); ++ ++ /* Assume success */ ++ return 0; ++} ++static void amd_put_pages(struct sg_table *sg_head, void *client_context) ++{ ++ int ret = 0; ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("put_pages: sg_head %p client_context: 0x%p\n", ++ sg_head, client_context); ++ pr_debug("put_pages: pid 0x%p, address 0x%llx, size:0x%llx\n", ++ mem_context->pid, ++ mem_context->va, ++ mem_context->size); ++ ++ pr_debug("put_pages: mem_context->p2p_info %p\n", ++ mem_context->p2p_info); ++ ++ if (ACCESS_ONCE(mem_context->free_callback_called)) { ++ pr_debug("put_pages: free callback was called\n"); ++ return; ++ } ++ ++ if (mem_context->p2p_info) { ++ ret = rdma_interface->put_pages(&mem_context->p2p_info); ++ mem_context->p2p_info = NULL; ++ ++ if (ret) ++ pr_err("put_pages failure: %d (callback status %d)\n", ++ ret, mem_context->free_callback_called); ++ } else ++ pr_err("put_pages: Pointer to p2p info is null\n"); ++} ++static unsigned long amd_get_page_size(void *client_context) ++{ ++ unsigned long page_size; ++ int result; ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("get_page_size: context: %p\n", client_context); ++ pr_debug("get_page_size: pid 0x%p, address 0x%llx, size:0x%llx\n", ++ mem_context->pid, ++ mem_context->va, ++ mem_context->size); ++ ++ ++ result = rdma_interface->get_page_size( ++ mem_context->va, ++ mem_context->size, ++ mem_context->pid, ++ &page_size); ++ ++ if (result) { ++ pr_err("Could not get page size. %d\n", result); ++ /* If we failed to get page size then do not know what to do. ++ * Let's return some default value ++ */ ++ return PAGE_SIZE; ++ } ++ ++ return page_size; ++} ++ ++static void amd_release(void *client_context) ++{ ++ struct amd_mem_context *mem_context = ++ (struct amd_mem_context *)client_context; ++ ++ pr_debug("release: context: 0x%p\n", client_context); ++ pr_debug("release: pid 0x%p, address 0x%llx, size:0x%llx\n", ++ mem_context->pid, ++ mem_context->va, ++ mem_context->size); ++ ++ kfree(mem_context); ++} ++ ++ ++static struct peer_memory_client amd_mem_client = { ++ .acquire = amd_acquire, ++ .get_pages = amd_get_pages, ++ .dma_map = amd_dma_map, ++ .dma_unmap = amd_dma_unmap, ++ .put_pages = amd_put_pages, ++ .get_page_size = amd_get_page_size, ++ .release = amd_release, ++}; ++ ++ ++int kfd_init_peer_direct(void) ++{ ++ int result; ++ ++ pr_debug("Try to initialize PeerDirect support\n"); ++ ++ pfn_ib_register_peer_memory_client = ++ (void *(*)(struct peer_memory_client *, ++ invalidate_peer_memory *)) ++ symbol_request(ib_register_peer_memory_client); ++ ++ pfn_ib_unregister_peer_memory_client = (void (*)(void *)) ++ symbol_request(ib_unregister_peer_memory_client); ++ ++ if (!pfn_ib_register_peer_memory_client || ++ !pfn_ib_unregister_peer_memory_client) { ++ pr_warn("amdkfd: PeerDirect interface was not detected\n"); ++ return -EINVAL; ++ } ++ ++ result = amdkfd_query_rdma_interface(&rdma_interface); ++ ++ if (result < 0) { ++ pr_err("amdkfd: Cannot get RDMA Interface (result = %d)\n", ++ result); ++ return result; ++ } ++ ++ strcpy(amd_mem_client.name, AMD_PEER_BRIDGE_DRIVER_NAME); ++ strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION); ++ ++ ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client, ++ &ib_invalidate_callback); ++ ++ if (!ib_reg_handle) { ++ pr_err("amdkfd: Cannot register peer memory client\n"); ++ return -EINVAL; ++ } ++ ++ pr_info("amdkfd: PeerDirect support was initialized successfully\n"); ++ return 0; ++} ++ ++void kfd_close_peer_direct(void) ++{ ++ if (pfn_ib_unregister_peer_memory_client) { ++ if (ib_reg_handle) ++ pfn_ib_unregister_peer_memory_client(ib_reg_handle); ++ ++ symbol_put(ib_unregister_peer_memory_client); ++ } ++ ++ if (pfn_ib_register_peer_memory_client) ++ symbol_put(ib_register_peer_memory_client); ++ ++} ++ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 196ede7..f3ad0c8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -913,4 +913,9 @@ int restore(struct kfd_dev *kfd); + #define KFD_MULTI_PROC_MAPPING_HWS_SUPPORT 600 + #define KFD_CWSR_CZ_FW_VER 625 + ++/* PeerDirect support */ ++int kfd_init_peer_direct(void); ++void kfd_close_peer_direct(void); ++ ++ + #endif +-- +2.7.4 + |