aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch573
1 files changed, 573 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch
new file mode 100644
index 00000000..4b444798
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1132-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch
@@ -0,0 +1,573 @@
+From 35ed769b72218c7f3bcc4d36de2ca3ee68be1eb7 Mon Sep 17 00:00:00 2001
+From: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com>
+Date: Tue, 26 Apr 2016 15:23:28 -0400
+Subject: [PATCH 1132/4131] drm/amdkfd: Automatically detect and enable
+ PeerDirect support v3
+
+Normally PeerDirect is delivered as part of OFED package. Accordingly
+we are not able to rely on the fact that PeerDirect is present
+in the system. To improve user experience and to be "PeerDirect"
+-ready the logic implemented to detect PeerDirect interface presence
+and creating PeerDirect "bridge".
+
+v2: Switch to use "symbol_request"/"symbol_put" instead of relying on
+kallsyms support. Use PAGE_SIZE macro instead of hardcoded value.
+Fix grammar errors. Update comments.
+v3: Fixed errors in kfd_close_peer_direct() function
+
+Change-Id: I49ea6b0a1d80d7189eef4a16e15e4b7237b00168
+Signed-off-by: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/Makefile | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 +
+ drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 488 ++++++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +
+ 4 files changed, 499 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
+index b5d3db9..59d284e 100644
+--- a/drivers/gpu/drm/amd/amdkfd/Makefile
++++ b/drivers/gpu/drm/amd/amdkfd/Makefile
+@@ -15,6 +15,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
+ kfd_process_queue_manager.o kfd_device_queue_manager.o \
+ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+- kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o
++ kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o \
++ kfd_peerdirect.o
+
+ obj-$(CONFIG_HSA_AMD) += amdkfd.o
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index bb0b00d..3109273 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -137,6 +137,9 @@ static int __init kfd_module_init(void)
+
+ amdkfd_init_completed = 1;
+
++ if (!kfd_init_peer_direct())
++ pr_info("PeerDirect support was enabled\n");
++
+ dev_info(kfd_device, "Initialized module\n");
+
+ return 0;
+@@ -153,6 +156,7 @@ static void __exit kfd_module_exit(void)
+ {
+ amdkfd_init_completed = 0;
+
++ kfd_close_peer_direct();
+ kfd_process_destroy_wq();
+ kfd_topology_shutdown();
+ kfd_chardev_exit();
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+new file mode 100644
+index 0000000..ffbccb3
+--- /dev/null
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+@@ -0,0 +1,488 @@
++/*
++ * Copyright 2016 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++
++/* NOTE:
++ *
++ * This file contains logic to dynamically detect and enable PeerDirect
++ * suppor. PeerDirect support is delivered e.g. as part of OFED
++ * from Mellanox. Because we are not able to rely on the fact that the
++ * corresponding OFED will be installed we should:
++ * - copy PeerDirect definitions locally to avoid dependency on
++ * corresponding header file
++ * - try dynamically detect address of PeerDirect function
++ * pointers.
++ *
++ * If dynamic detection failed then PeerDirect support should be
++ * enabled using the standard PeerDirect bridge driver from:
++ * https://github.com/RadeonOpenCompute/ROCnRDMA
++ *
++ *
++ * Logic to support PeerDirect relies only on official public API to be
++ * non-intrusive as much as possible.
++ *
++ **/
++
++#include <linux/device.h>
++#include <linux/export.h>
++#include <linux/pid.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/scatterlist.h>
++#include <linux/module.h>
++#include "amd_rdma.h"
++
++
++
++/* ----------------------- PeerDirect interface ------------------------------*/
++
++/*
++ * Copyright (c) 2013, Mellanox Technologies. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++#define IB_PEER_MEMORY_NAME_MAX 64
++#define IB_PEER_MEMORY_VER_MAX 16
++
++struct peer_memory_client {
++ char name[IB_PEER_MEMORY_NAME_MAX];
++ char version[IB_PEER_MEMORY_VER_MAX];
++ /* acquire return code: 1-mine, 0-not mine */
++ int (*acquire)(unsigned long addr, size_t size,
++ void *peer_mem_private_data,
++ char *peer_mem_name,
++ void **client_context);
++ int (*get_pages)(unsigned long addr,
++ size_t size, int write, int force,
++ struct sg_table *sg_head,
++ void *client_context, void *core_context);
++ int (*dma_map)(struct sg_table *sg_head, void *client_context,
++ struct device *dma_device, int dmasync, int *nmap);
++ int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
++ struct device *dma_device);
++ void (*put_pages)(struct sg_table *sg_head, void *client_context);
++ unsigned long (*get_page_size)(void *client_context);
++ void (*release)(void *client_context);
++
++};
++
++typedef int (*invalidate_peer_memory)(void *reg_handle,
++ void *core_context);
++
++void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
++ invalidate_peer_memory *invalidate_callback);
++void ib_unregister_peer_memory_client(void *reg_handle);
++
++
++/*------------------- PeerDirect bridge driver ------------------------------*/
++
++#define AMD_PEER_BRIDGE_DRIVER_VERSION "1.0"
++#define AMD_PEER_BRIDGE_DRIVER_NAME "amdkfd"
++
++
++static void* (*pfn_ib_register_peer_memory_client)(struct peer_memory_client
++ *peer_client,
++ invalidate_peer_memory
++ *invalidate_callback);
++
++static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle);
++
++static const struct amd_rdma_interface *rdma_interface;
++
++static invalidate_peer_memory ib_invalidate_callback;
++static void *ib_reg_handle;
++
++struct amd_mem_context {
++ uint64_t va;
++ uint64_t size;
++ struct pid *pid;
++
++ struct amd_p2p_info *p2p_info;
++
++ /* Flag that free callback was called */
++ int free_callback_called;
++
++ /* Context received from PeerDirect call */
++ void *core_context;
++};
++
++
++static void free_callback(void *client_priv)
++{
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_priv;
++
++ pr_debug("free_callback: data 0x%p\n", mem_context);
++
++ if (!mem_context) {
++ pr_warn("free_callback: Invalid client context\n");
++ return;
++ }
++
++ pr_debug("mem_context->core_context 0x%p\n", mem_context->core_context);
++
++ /* Call back IB stack asking to invalidate memory */
++ (*ib_invalidate_callback) (ib_reg_handle, mem_context->core_context);
++
++ /* amdkfd will free resources when we return from this callback.
++ * Set flag to inform that there is nothing to do on "put_pages", etc.
++ */
++ ACCESS_ONCE(mem_context->free_callback_called) = 1;
++}
++
++
++static int amd_acquire(unsigned long addr, size_t size,
++ void *peer_mem_private_data,
++ char *peer_mem_name, void **client_context)
++{
++ int ret;
++ struct amd_mem_context *mem_context;
++ struct pid *pid;
++
++ /* Get pointer to structure describing current process */
++ pid = get_task_pid(current, PIDTYPE_PID);
++
++ pr_debug("acquire: addr:0x%lx,size:0x%x, pid 0x%p\n",
++ addr, (unsigned int)size, pid);
++
++ /* Check if address is handled by AMD GPU driver */
++ ret = rdma_interface->is_gpu_address(addr, pid);
++
++ if (!ret) {
++ pr_debug("acquire: Not GPU Address\n");
++ /* This is not GPU address */
++ return 0;
++ }
++
++ pr_debug("acquire: GPU address\n");
++
++ /* Initialize context used for operation with given address */
++ mem_context = kzalloc(sizeof(struct amd_mem_context), GFP_KERNEL);
++
++ if (!mem_context)
++ return 0; /* Error case handled as not GPU address */
++
++ mem_context->free_callback_called = 0;
++ mem_context->va = addr;
++ mem_context->size = size;
++
++ /* Save PID. It is guaranteed that the function will be
++ * called in the correct process context as opposite to others.
++ */
++ mem_context->pid = pid;
++
++ pr_debug("acquire: Client context %p\n", mem_context);
++
++ /* Return pointer to allocated context */
++ *client_context = mem_context;
++
++ /* Return 1 to inform that this address which will be handled
++ * by AMD GPU driver
++ */
++ return 1;
++}
++
++static int amd_get_pages(unsigned long addr, size_t size, int write, int force,
++ struct sg_table *sg_head,
++ void *client_context, void *core_context)
++{
++ int ret;
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("get_pages: addr:0x%lx,size:0x%x, core_context:%p\n",
++ addr, (unsigned int)size, core_context);
++
++ if (!mem_context) {
++ pr_warn("get_pages: Invalid client context");
++ return -EINVAL;
++ }
++
++ pr_debug("get_pages: pid :0x%p\n", mem_context->pid);
++
++
++ if (addr != mem_context->va) {
++ pr_warn("get_pages: Context address (0x%llx) is not the same\n",
++ mem_context->va);
++ return -EINVAL;
++ }
++
++ if (size != mem_context->size) {
++ pr_warn("get_pages: Context size (0x%llx) is not the same\n",
++ mem_context->size);
++ return -EINVAL;
++ }
++
++ ret = rdma_interface->get_pages(addr,
++ size,
++ mem_context->pid,
++ &mem_context->p2p_info,
++ free_callback,
++ mem_context);
++
++ if (ret || !mem_context->p2p_info) {
++ pr_err("Could not rdma::get_pages failure: %d\n", ret);
++ return ret;
++ }
++
++ mem_context->core_context = core_context;
++
++ /* Note: At this stage it is OK not to fill sg_table */
++ return 0;
++}
++
++
++static int amd_dma_map(struct sg_table *sg_head, void *client_context,
++ struct device *dma_device, int dmasync, int *nmap)
++{
++ /*
++ * NOTE/TODO:
++ * We could have potentially three cases for real memory
++ * location:
++ * - all memory in the local
++ * - all memory in the system (RAM)
++ * - memory is spread (s/g) between local and system.
++ *
++ * In the case of all memory in the system we could use
++ * iommu driver to build DMA addresses but not in the case
++ * of local memory because currently iommu driver doesn't
++ * deal with local/device memory addresses (it requires "struct
++ * page").
++ *
++ * Accordingly returning assumes that iommu funcutionality
++ * should be disabled so we can assume that sg_table already
++ * contains DMA addresses.
++ *
++ */
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("dma_map: Context 0x%p, sg_head 0x%p\n",
++ client_context, sg_head);
++
++ pr_debug("dma_map: pid 0x%p, address 0x%llx, size:0x%llx\n",
++ mem_context->pid,
++ mem_context->va,
++ mem_context->size);
++
++ if (!mem_context->p2p_info) {
++ pr_err("dma_map: No sg table were allocated\n");
++ return -EINVAL;
++ }
++
++ /* Copy information about previosly allocated sg_table */
++ *sg_head = *mem_context->p2p_info->pages;
++
++ /* Return number of pages */
++ *nmap = mem_context->p2p_info->pages->nents;
++
++ return 0;
++}
++
++static int amd_dma_unmap(struct sg_table *sg_head, void *client_context,
++ struct device *dma_device)
++{
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("dma_unmap: Context 0x%p, sg_table 0x%p\n",
++ client_context, sg_head);
++
++ pr_debug("dma_unmap: pid 0x%p, address 0x%llx, size:0x%llx\n",
++ mem_context->pid,
++ mem_context->va,
++ mem_context->size);
++
++ /* Assume success */
++ return 0;
++}
++static void amd_put_pages(struct sg_table *sg_head, void *client_context)
++{
++ int ret = 0;
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("put_pages: sg_head %p client_context: 0x%p\n",
++ sg_head, client_context);
++ pr_debug("put_pages: pid 0x%p, address 0x%llx, size:0x%llx\n",
++ mem_context->pid,
++ mem_context->va,
++ mem_context->size);
++
++ pr_debug("put_pages: mem_context->p2p_info %p\n",
++ mem_context->p2p_info);
++
++ if (ACCESS_ONCE(mem_context->free_callback_called)) {
++ pr_debug("put_pages: free callback was called\n");
++ return;
++ }
++
++ if (mem_context->p2p_info) {
++ ret = rdma_interface->put_pages(&mem_context->p2p_info);
++ mem_context->p2p_info = NULL;
++
++ if (ret)
++ pr_err("put_pages failure: %d (callback status %d)\n",
++ ret, mem_context->free_callback_called);
++ } else
++ pr_err("put_pages: Pointer to p2p info is null\n");
++}
++static unsigned long amd_get_page_size(void *client_context)
++{
++ unsigned long page_size;
++ int result;
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("get_page_size: context: %p\n", client_context);
++ pr_debug("get_page_size: pid 0x%p, address 0x%llx, size:0x%llx\n",
++ mem_context->pid,
++ mem_context->va,
++ mem_context->size);
++
++
++ result = rdma_interface->get_page_size(
++ mem_context->va,
++ mem_context->size,
++ mem_context->pid,
++ &page_size);
++
++ if (result) {
++ pr_err("Could not get page size. %d\n", result);
++ /* If we failed to get page size then do not know what to do.
++ * Let's return some default value
++ */
++ return PAGE_SIZE;
++ }
++
++ return page_size;
++}
++
++static void amd_release(void *client_context)
++{
++ struct amd_mem_context *mem_context =
++ (struct amd_mem_context *)client_context;
++
++ pr_debug("release: context: 0x%p\n", client_context);
++ pr_debug("release: pid 0x%p, address 0x%llx, size:0x%llx\n",
++ mem_context->pid,
++ mem_context->va,
++ mem_context->size);
++
++ kfree(mem_context);
++}
++
++
++static struct peer_memory_client amd_mem_client = {
++ .acquire = amd_acquire,
++ .get_pages = amd_get_pages,
++ .dma_map = amd_dma_map,
++ .dma_unmap = amd_dma_unmap,
++ .put_pages = amd_put_pages,
++ .get_page_size = amd_get_page_size,
++ .release = amd_release,
++};
++
++
++int kfd_init_peer_direct(void)
++{
++ int result;
++
++ pr_debug("Try to initialize PeerDirect support\n");
++
++ pfn_ib_register_peer_memory_client =
++ (void *(*)(struct peer_memory_client *,
++ invalidate_peer_memory *))
++ symbol_request(ib_register_peer_memory_client);
++
++ pfn_ib_unregister_peer_memory_client = (void (*)(void *))
++ symbol_request(ib_unregister_peer_memory_client);
++
++ if (!pfn_ib_register_peer_memory_client ||
++ !pfn_ib_unregister_peer_memory_client) {
++ pr_warn("amdkfd: PeerDirect interface was not detected\n");
++ return -EINVAL;
++ }
++
++ result = amdkfd_query_rdma_interface(&rdma_interface);
++
++ if (result < 0) {
++ pr_err("amdkfd: Cannot get RDMA Interface (result = %d)\n",
++ result);
++ return result;
++ }
++
++ strcpy(amd_mem_client.name, AMD_PEER_BRIDGE_DRIVER_NAME);
++ strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION);
++
++ ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client,
++ &ib_invalidate_callback);
++
++ if (!ib_reg_handle) {
++ pr_err("amdkfd: Cannot register peer memory client\n");
++ return -EINVAL;
++ }
++
++ pr_info("amdkfd: PeerDirect support was initialized successfully\n");
++ return 0;
++}
++
++void kfd_close_peer_direct(void)
++{
++ if (pfn_ib_unregister_peer_memory_client) {
++ if (ib_reg_handle)
++ pfn_ib_unregister_peer_memory_client(ib_reg_handle);
++
++ symbol_put(ib_unregister_peer_memory_client);
++ }
++
++ if (pfn_ib_register_peer_memory_client)
++ symbol_put(ib_register_peer_memory_client);
++
++}
++
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index cfa2283..3ec7914 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -913,4 +913,9 @@ int restore(struct kfd_dev *kfd);
+ #define KFD_MULTI_PROC_MAPPING_HWS_SUPPORT 600
+ #define KFD_CWSR_CZ_FW_VER 625
+
++/* PeerDirect support */
++int kfd_init_peer_direct(void);
++void kfd_close_peer_direct(void);
++
++
+ #endif
+--
+2.7.4
+