aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch156
1 files changed, 156 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch
new file mode 100644
index 00000000..12b1efa9
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch
@@ -0,0 +1,156 @@
+From ce44d7b079aa3a4fc06aa08a452c2edef1c38af0 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Mon, 27 Nov 2017 18:29:45 -0500
+Subject: [PATCH 3404/4131] drm/amdkfd: map multiple processes to HW scheduler
+
+Allow HWS to to execute multiple processes on the hardware
+concurrently. The number of concurrent processes is limited by
+the number of VMIDs allocated to the HWS.
+
+A module parameter can be used for limiting this further or turn
+it off altogether (mainly for debugging purposes).
+
+Signed-off-by: Yong Zhao <yong.zhao@amd.com>
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
+Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 +++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +++++
+ drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 30 +++++++++++++++++++++++--
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++++++++
+ 4 files changed, 53 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 4f05eac..a8fa33a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
+ - kfd->vm_info.first_vmid_kfd + 1;
+
++ /* Verify module parameters regarding mapped process number*/
++ if ((hws_max_conc_proc < 0)
++ || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
++ dev_err(kfd_device,
++ "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
++ hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
++ kfd->vm_info.vmid_num_kfd);
++ kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
++ } else
++ kfd->max_proc_per_quantum = hws_max_conc_proc;
++
+ /* calculate max size of mqds needed for queues */
+ size = max_num_of_queues_per_device *
+ kfd->device_info->mqd_size_aligned;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+index ee8adf6..4e060c8 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+@@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444);
+ MODULE_PARM_DESC(sched_policy,
+ "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
++int hws_max_conc_proc = 8;
++module_param(hws_max_conc_proc, int, 0444);
++MODULE_PARM_DESC(hws_max_conc_proc,
++ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
++
+ int cwsr_enable = 1;
+ module_param(cwsr_enable, int, 0444);
+ MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index 69c147a..0b7092e 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ {
+ unsigned int process_count, queue_count;
+ unsigned int map_queue_size;
++ unsigned int max_proc_per_quantum = 1;
++ struct kfd_dev *dev = pm->dqm->dev;
+
+ process_count = pm->dqm->processes_count;
+ queue_count = pm->dqm->queue_count;
+
+- /* check if there is over subscription*/
++ /* check if there is over subscription
++ * Note: the arbitration between the number of VMIDs and
++ * hws_max_conc_proc has been done in
++ * kgd2kfd_device_init().
++ */
+ *over_subscription = false;
+- if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
++
++ if (dev->max_proc_per_quantum > 1)
++ max_proc_per_quantum = dev->max_proc_per_quantum;
++
++ if ((process_count > max_proc_per_quantum) ||
++ queue_count > get_queues_num(pm->dqm)) {
+ *over_subscription = true;
+ pr_debug("Over subscribed runlist\n");
+ }
+@@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+ {
+ struct pm4_mes_runlist *packet;
++ int concurrent_proc_cnt = 0;
++ struct kfd_dev *kfd = pm->dqm->dev;
+
+ if (WARN_ON(!ib))
+ return -EFAULT;
+
++ /* Determine the number of processes to map together to HW:
++ * it can not exceed the number of VMIDs available to the
++ * scheduler, and it is determined by the smaller of the number
++ * of processes in the runlist and kfd module parameter
++ * hws_max_conc_proc.
++ * Note: the arbitration between the number of VMIDs and
++ * hws_max_conc_proc has been done in
++ * kgd2kfd_device_init().
++ */
++ concurrent_proc_cnt = min(pm->dqm->processes_count,
++ kfd->max_proc_per_quantum);
++
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+@@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
++ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index a668764..1edab21 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device;
+ /* Kernel module parameter to specify the scheduling policy */
+ extern int sched_policy;
+
++/*
++ * Kernel module parameter to specify the maximum process
++ * number per HW scheduler
++ */
++extern int hws_max_conc_proc;
++
+ extern int cwsr_enable;
+
+ /*
+@@ -214,6 +220,9 @@ struct kfd_dev {
+ /* Debug manager */
+ struct kfd_dbgmgr *dbgmgr;
+
++ /* Maximum process number mapped to HW scheduler */
++ unsigned int max_proc_per_quantum;
++
+ /* CWSR */
+ bool cwsr_enabled;
+ const void *cwsr_isa;
+--
+2.7.4
+