diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch new file mode 100644 index 00000000..12b1efa9 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3404-drm-amdkfd-map-multiple-processes-to-HW-scheduler.patch @@ -0,0 +1,156 @@ +From ce44d7b079aa3a4fc06aa08a452c2edef1c38af0 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Mon, 27 Nov 2017 18:29:45 -0500 +Subject: [PATCH 3404/4131] drm/amdkfd: map multiple processes to HW scheduler + +Allow HWS to to execute multiple processes on the hardware +concurrently. The number of concurrent processes is limited by +the number of VMIDs allocated to the HWS. + +A module parameter can be used for limiting this further or turn +it off altogether (mainly for debugging purposes). + +Signed-off-by: Yong Zhao <yong.zhao@amd.com> +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Acked-by: Oded Gabbay <oded.gabbay@gmail.com> +Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 +++++++++ + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 5 +++++ + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 30 +++++++++++++++++++++++-- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++++++++ + 4 files changed, 53 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 4f05eac..a8fa33a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd + - kfd->vm_info.first_vmid_kfd + 1; + ++ /* Verify module parameters regarding mapped process number*/ ++ if ((hws_max_conc_proc < 0) ++ || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { ++ dev_err(kfd_device, ++ "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", ++ hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, ++ kfd->vm_info.vmid_num_kfd); ++ kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; ++ } else ++ kfd->max_proc_per_quantum = hws_max_conc_proc; ++ + /* calculate max size of mqds needed for queues */ + size = max_num_of_queues_per_device * + kfd->device_info->mqd_size_aligned; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index ee8adf6..4e060c8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444); + MODULE_PARM_DESC(sched_policy, + "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); + ++int hws_max_conc_proc = 8; ++module_param(hws_max_conc_proc, int, 0444); ++MODULE_PARM_DESC(hws_max_conc_proc, ++ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); ++ + int cwsr_enable = 1; + module_param(cwsr_enable, int, 0444); + MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index 69c147a..0b7092e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + { + unsigned int process_count, queue_count; + unsigned int map_queue_size; ++ unsigned int max_proc_per_quantum = 1; ++ struct kfd_dev *dev = pm->dqm->dev; + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; + +- /* check if there is over subscription*/ ++ /* check if there is over subscription ++ * Note: the arbitration between the number of VMIDs and ++ * hws_max_conc_proc has been done in ++ * kgd2kfd_device_init(). ++ */ + *over_subscription = false; +- if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { ++ ++ if (dev->max_proc_per_quantum > 1) ++ max_proc_per_quantum = dev->max_proc_per_quantum; ++ ++ if ((process_count > max_proc_per_quantum) || ++ queue_count > get_queues_num(pm->dqm)) { + *over_subscription = true; + pr_debug("Over subscribed runlist\n"); + } +@@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) + { + struct pm4_mes_runlist *packet; ++ int concurrent_proc_cnt = 0; ++ struct kfd_dev *kfd = pm->dqm->dev; + + if (WARN_ON(!ib)) + return -EFAULT; + ++ /* Determine the number of processes to map together to HW: ++ * it can not exceed the number of VMIDs available to the ++ * scheduler, and it is determined by the smaller of the number ++ * of processes in the runlist and kfd module parameter ++ * hws_max_conc_proc. ++ * Note: the arbitration between the number of VMIDs and ++ * hws_max_conc_proc has been done in ++ * kgd2kfd_device_init(). ++ */ ++ concurrent_proc_cnt = min(pm->dqm->processes_count, ++ kfd->max_proc_per_quantum); ++ + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +@@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; ++ packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index a668764..1edab21 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device; + /* Kernel module parameter to specify the scheduling policy */ + extern int sched_policy; + ++/* ++ * Kernel module parameter to specify the maximum process ++ * number per HW scheduler ++ */ ++extern int hws_max_conc_proc; ++ + extern int cwsr_enable; + + /* +@@ -214,6 +220,9 @@ struct kfd_dev { + /* Debug manager */ + struct kfd_dbgmgr *dbgmgr; + ++ /* Maximum process number mapped to HW scheduler */ ++ unsigned int max_proc_per_quantum; ++ + /* CWSR */ + bool cwsr_enabled; + const void *cwsr_isa; +-- +2.7.4 + |