1 files changed, 573 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch
new file mode 100644
index 00000000..fb0f6fee
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1424-drm-amdkfd-Automatically-detect-and-enable-PeerDirec.patch
@@ -0,0 +1,573 @@
+From 05234bbc6870c6dc59dbf92e9d07e5ec1175451a Mon Sep 17 00:00:00 2001
+From: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com>
+Date: Tue, 26 Apr 2016 15:23:28 -0400
+Subject: [PATCH 1424/4131] drm/amdkfd: Automatically detect and enable
+ PeerDirect support v3
+
+Normally PeerDirect is delivered  as part of OFED package. Accordingly
+we are not able to rely on the fact that PeerDirect is present
+in the system. To improve user experience and to be "PeerDirect"
+-ready the logic implemented to detect PeerDirect interface presence
+and creating  PeerDirect "bridge".
+
+v2: Switch to use "symbol_request"/"symbol_put" instead of relying on
+kallsyms support. Use PAGE_SIZE macro instead of hardcoded value.
+Fix grammar errors. Update comments.
+v3: Fixed errors in kfd_close_peer_direct() function
+
+Change-Id: I49ea6b0a1d80d7189eef4a16e15e4b7237b00168
+Signed-off-by: Serguei Sagalovitch <Serguei.Sagalovitch@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/Makefile         |   3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_module.c     |   4 +
+ drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 488 ++++++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h       |   5 +
+ 4 files changed, 499 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
+index 60c60c0..3d2acf9 100644
+--- a/drivers/gpu/drm/amd/amdkfd/Makefile
++++ b/drivers/gpu/drm/amd/amdkfd/Makefile
+@@ -14,6 +14,7 @@ amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
+ 		kfd_process_queue_manager.o kfd_device_queue_manager.o \
+ 		kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+ 		kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+-		kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o
++		kfd_dbgdev.o kfd_dbgmgr.o kfd_flat_memory.o kfd_crat.o kfd_rdma.o \
++		kfd_peerdirect.o
+ 
+ obj-$(CONFIG_HSA_AMD)	+= amdkfd.o
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index bb0b00d..3109273 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -137,6 +137,9 @@ static int __init kfd_module_init(void)
+ 
+ 	amdkfd_init_completed = 1;
+ 
++	if (!kfd_init_peer_direct())
++		pr_info("PeerDirect support was enabled\n");
++
+ 	dev_info(kfd_device, "Initialized module\n");
+ 
+ 	return 0;
+@@ -153,6 +156,7 @@ static void __exit kfd_module_exit(void)
+ {
+ 	amdkfd_init_completed = 0;
+ 
++	kfd_close_peer_direct();
+ 	kfd_process_destroy_wq();
+ 	kfd_topology_shutdown();
+ 	kfd_chardev_exit();
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+new file mode 100644
+index 0000000..ffbccb3
+--- /dev/null
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c
+@@ -0,0 +1,488 @@
++/*
++ * Copyright 2016 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++
++/* NOTE:
++ *
++ * This file contains logic to dynamically detect and enable PeerDirect
++ * suppor. PeerDirect support is delivered e.g. as part of OFED
++ * from Mellanox. Because we are not able to rely on the fact that the
++ * corresponding OFED will be installed we should:
++ *  - copy PeerDirect definitions locally to avoid dependency on
++ *    corresponding header file
++ *  - try dynamically detect address of PeerDirect function
++ *    pointers.
++ *
++ * If dynamic detection failed then PeerDirect support should be
++ * enabled using the standard PeerDirect bridge driver from:
++ * https://github.com/RadeonOpenCompute/ROCnRDMA
++ *
++ *
++ * Logic to support PeerDirect relies only on official public API to be
++ * non-intrusive as much as possible.
++ *
++ **/
++
++#include <linux/device.h>
++#include <linux/export.h>
++#include <linux/pid.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++#include <linux/scatterlist.h>
++#include <linux/module.h>
++#include "amd_rdma.h"
++
++
++
++/* ----------------------- PeerDirect interface ------------------------------*/
++
++/*
++ * Copyright (c) 2013,  Mellanox Technologies. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++#define IB_PEER_MEMORY_NAME_MAX 64
++#define IB_PEER_MEMORY_VER_MAX 16
++
++struct peer_memory_client {
++	char	name[IB_PEER_MEMORY_NAME_MAX];
++	char	version[IB_PEER_MEMORY_VER_MAX];
++	/* acquire return code: 1-mine, 0-not mine */
++	int (*acquire)(unsigned long addr, size_t size,
++			void *peer_mem_private_data,
++					char *peer_mem_name,
++					void **client_context);
++	int (*get_pages)(unsigned long addr,
++			  size_t size, int write, int force,
++			  struct sg_table *sg_head,
++			  void *client_context, void *core_context);
++	int (*dma_map)(struct sg_table *sg_head, void *client_context,
++			struct device *dma_device, int dmasync, int *nmap);
++	int (*dma_unmap)(struct sg_table *sg_head, void *client_context,
++			   struct device  *dma_device);
++	void (*put_pages)(struct sg_table *sg_head, void *client_context);
++	unsigned long (*get_page_size)(void *client_context);
++	void (*release)(void *client_context);
++
++};
++
++typedef int (*invalidate_peer_memory)(void *reg_handle,
++					  void *core_context);
++
++void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
++				  invalidate_peer_memory *invalidate_callback);
++void ib_unregister_peer_memory_client(void *reg_handle);
++
++
++/*------------------- PeerDirect bridge driver ------------------------------*/
++
++#define AMD_PEER_BRIDGE_DRIVER_VERSION	"1.0"
++#define AMD_PEER_BRIDGE_DRIVER_NAME	"amdkfd"
++
++
++static void* (*pfn_ib_register_peer_memory_client)(struct peer_memory_client
++							*peer_client,
++					invalidate_peer_memory
++							*invalidate_callback);
++
++static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle);
++
++static const struct amd_rdma_interface *rdma_interface;
++
++static invalidate_peer_memory ib_invalidate_callback;
++static void *ib_reg_handle;
++
++struct amd_mem_context {
++	uint64_t	va;
++	uint64_t	size;
++	struct pid	*pid;
++
++	struct amd_p2p_info  *p2p_info;
++
++	/* Flag that free callback was called */
++	int free_callback_called;
++
++	/* Context received from PeerDirect call */
++	void *core_context;
++};
++
++
++static void free_callback(void *client_priv)
++{
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_priv;
++
++	pr_debug("free_callback: data 0x%p\n", mem_context);
++
++	if (!mem_context) {
++		pr_warn("free_callback: Invalid client context\n");
++		return;
++	}
++
++	pr_debug("mem_context->core_context 0x%p\n", mem_context->core_context);
++
++	/* Call back IB stack asking to invalidate memory */
++	(*ib_invalidate_callback) (ib_reg_handle, mem_context->core_context);
++
++	/* amdkfd will free resources when we return from this callback.
++	 * Set flag to inform that there is nothing to do on "put_pages", etc.
++	 */
++	ACCESS_ONCE(mem_context->free_callback_called) = 1;
++}
++
++
++static int amd_acquire(unsigned long addr, size_t size,
++			void *peer_mem_private_data,
++			char *peer_mem_name, void **client_context)
++{
++	int ret;
++	struct amd_mem_context *mem_context;
++	struct pid *pid;
++
++	/* Get pointer to structure describing current process */
++	pid = get_task_pid(current, PIDTYPE_PID);
++
++	pr_debug("acquire: addr:0x%lx,size:0x%x, pid 0x%p\n",
++					addr, (unsigned int)size, pid);
++
++	/* Check if address is handled by AMD GPU driver */
++	ret = rdma_interface->is_gpu_address(addr, pid);
++
++	if (!ret) {
++		pr_debug("acquire: Not GPU Address\n");
++		/* This is not GPU address */
++		return 0;
++	}
++
++	pr_debug("acquire: GPU address\n");
++
++	/* Initialize context used for operation with given address */
++	mem_context = kzalloc(sizeof(struct amd_mem_context), GFP_KERNEL);
++
++	if (!mem_context)
++		return 0;	/* Error case handled as not GPU address  */
++
++	mem_context->free_callback_called = 0;
++	mem_context->va   = addr;
++	mem_context->size = size;
++
++	/* Save PID. It is guaranteed that the function will be
++	 * called in the correct process context as opposite to others.
++	 */
++	mem_context->pid  = pid;
++
++	pr_debug("acquire: Client context %p\n", mem_context);
++
++	/* Return pointer to allocated context */
++	*client_context = mem_context;
++
++	/* Return 1 to inform that this address which will be handled
++	 * by AMD GPU driver
++	 */
++	return 1;
++}
++
++static int amd_get_pages(unsigned long addr, size_t size, int write, int force,
++			  struct sg_table *sg_head,
++			  void *client_context, void *core_context)
++{
++	int ret;
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("get_pages: addr:0x%lx,size:0x%x, core_context:%p\n",
++		addr, (unsigned int)size, core_context);
++
++	if (!mem_context) {
++		pr_warn("get_pages: Invalid client context");
++		return -EINVAL;
++	}
++
++	pr_debug("get_pages: pid :0x%p\n", mem_context->pid);
++
++
++	if (addr != mem_context->va) {
++		pr_warn("get_pages: Context address (0x%llx) is not the same\n",
++			mem_context->va);
++		return -EINVAL;
++	}
++
++	if (size != mem_context->size) {
++		pr_warn("get_pages: Context size (0x%llx) is not the same\n",
++			mem_context->size);
++		return -EINVAL;
++	}
++
++	ret = rdma_interface->get_pages(addr,
++					size,
++					mem_context->pid,
++					&mem_context->p2p_info,
++					free_callback,
++					mem_context);
++
++	if (ret || !mem_context->p2p_info) {
++		pr_err("Could not rdma::get_pages failure: %d\n", ret);
++		return ret;
++	}
++
++	mem_context->core_context = core_context;
++
++	/* Note: At this stage it is OK not to fill sg_table */
++	return 0;
++}
++
++
++static int amd_dma_map(struct sg_table *sg_head, void *client_context,
++			struct device *dma_device, int dmasync, int *nmap)
++{
++	/*
++	 * NOTE/TODO:
++	 * We could have potentially three cases for real memory
++	 *	location:
++	 *		- all memory in the local
++	 *		- all memory in the system (RAM)
++	 *		- memory is spread (s/g) between local and system.
++	 *
++	 *	In the case of all memory in the system we could use
++	 *	iommu driver to build DMA addresses but not in the case
++	 *	of local memory because currently iommu driver doesn't
++	 *	deal with local/device memory addresses (it requires "struct
++	 *	page").
++	 *
++	 *	Accordingly returning assumes that iommu funcutionality
++	 *	should be disabled so we can assume that sg_table already
++	 *	contains DMA addresses.
++	 *
++	 */
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("dma_map: Context 0x%p, sg_head 0x%p\n",
++			client_context, sg_head);
++
++	pr_debug("dma_map: pid 0x%p, address 0x%llx, size:0x%llx\n",
++			mem_context->pid,
++			mem_context->va,
++			mem_context->size);
++
++	if (!mem_context->p2p_info) {
++		pr_err("dma_map: No sg table were allocated\n");
++		return -EINVAL;
++	}
++
++	/* Copy information about previosly allocated sg_table */
++	*sg_head = *mem_context->p2p_info->pages;
++
++	/* Return number of pages */
++	*nmap = mem_context->p2p_info->pages->nents;
++
++	return 0;
++}
++
++static int amd_dma_unmap(struct sg_table *sg_head, void *client_context,
++			   struct device  *dma_device)
++{
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("dma_unmap: Context 0x%p, sg_table 0x%p\n",
++			client_context, sg_head);
++
++	pr_debug("dma_unmap: pid 0x%p, address 0x%llx, size:0x%llx\n",
++			mem_context->pid,
++			mem_context->va,
++			mem_context->size);
++
++	/* Assume success */
++	return 0;
++}
++static void amd_put_pages(struct sg_table *sg_head, void *client_context)
++{
++	int ret = 0;
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("put_pages: sg_head %p client_context: 0x%p\n",
++			sg_head, client_context);
++	pr_debug("put_pages: pid 0x%p, address 0x%llx, size:0x%llx\n",
++			mem_context->pid,
++			mem_context->va,
++			mem_context->size);
++
++	pr_debug("put_pages: mem_context->p2p_info %p\n",
++				mem_context->p2p_info);
++
++	if (ACCESS_ONCE(mem_context->free_callback_called)) {
++		pr_debug("put_pages: free callback was called\n");
++		return;
++	}
++
++	if (mem_context->p2p_info) {
++		ret = rdma_interface->put_pages(&mem_context->p2p_info);
++		mem_context->p2p_info = NULL;
++
++		if (ret)
++			pr_err("put_pages failure: %d (callback status %d)\n",
++					ret, mem_context->free_callback_called);
++	} else
++		pr_err("put_pages: Pointer to p2p info is null\n");
++}
++static unsigned long amd_get_page_size(void *client_context)
++{
++	unsigned long page_size;
++	int result;
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("get_page_size: context: %p\n", client_context);
++	pr_debug("get_page_size: pid 0x%p, address 0x%llx, size:0x%llx\n",
++			mem_context->pid,
++			mem_context->va,
++			mem_context->size);
++
++
++	result = rdma_interface->get_page_size(
++				mem_context->va,
++				mem_context->size,
++				mem_context->pid,
++				&page_size);
++
++	if (result) {
++		pr_err("Could not get page size. %d\n", result);
++		/* If we failed to get page size then do not know what to do.
++		 * Let's return some default value
++		 */
++		return PAGE_SIZE;
++	}
++
++	return page_size;
++}
++
++static void amd_release(void *client_context)
++{
++	struct amd_mem_context *mem_context =
++		(struct amd_mem_context *)client_context;
++
++	pr_debug("release: context: 0x%p\n", client_context);
++	pr_debug("release: pid 0x%p, address 0x%llx, size:0x%llx\n",
++			mem_context->pid,
++			mem_context->va,
++			mem_context->size);
++
++	kfree(mem_context);
++}
++
++
++static struct peer_memory_client amd_mem_client = {
++	.acquire = amd_acquire,
++	.get_pages = amd_get_pages,
++	.dma_map = amd_dma_map,
++	.dma_unmap = amd_dma_unmap,
++	.put_pages = amd_put_pages,
++	.get_page_size = amd_get_page_size,
++	.release = amd_release,
++};
++
++
++int kfd_init_peer_direct(void)
++{
++	int result;
++
++	pr_debug("Try to initialize PeerDirect support\n");
++
++	pfn_ib_register_peer_memory_client =
++		(void *(*)(struct peer_memory_client *,
++			  invalidate_peer_memory *))
++		symbol_request(ib_register_peer_memory_client);
++
++	pfn_ib_unregister_peer_memory_client = (void (*)(void *))
++		symbol_request(ib_unregister_peer_memory_client);
++
++	if (!pfn_ib_register_peer_memory_client ||
++		!pfn_ib_unregister_peer_memory_client) {
++		pr_warn("amdkfd: PeerDirect interface was not detected\n");
++		return -EINVAL;
++	}
++
++	result = amdkfd_query_rdma_interface(&rdma_interface);
++
++	if (result < 0) {
++		pr_err("amdkfd: Cannot get RDMA Interface (result = %d)\n",
++				result);
++		return result;
++	}
++
++	strcpy(amd_mem_client.name,    AMD_PEER_BRIDGE_DRIVER_NAME);
++	strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION);
++
++	ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client,
++						&ib_invalidate_callback);
++
++	if (!ib_reg_handle) {
++		pr_err("amdkfd: Cannot register peer memory client\n");
++		return -EINVAL;
++	}
++
++	pr_info("amdkfd: PeerDirect support was initialized successfully\n");
++	return 0;
++}
++
++void kfd_close_peer_direct(void)
++{
++	if (pfn_ib_unregister_peer_memory_client) {
++		if (ib_reg_handle)
++			pfn_ib_unregister_peer_memory_client(ib_reg_handle);
++
++		symbol_put(ib_unregister_peer_memory_client);
++	}
++
++	if (pfn_ib_register_peer_memory_client)
++		symbol_put(ib_register_peer_memory_client);
++
++}
++
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 196ede7..f3ad0c8 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -913,4 +913,9 @@ int restore(struct kfd_dev *kfd);
+ #define KFD_MULTI_PROC_MAPPING_HWS_SUPPORT 600
+ #define KFD_CWSR_CZ_FW_VER 625
+ 
++/* PeerDirect support */
++int kfd_init_peer_direct(void);
++void kfd_close_peer_direct(void);
++
++
+ #endif
+-- 
+2.7.4
+