aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch314
1 files changed, 314 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch
new file mode 100644
index 00000000..2a841667
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3921-Enable-over-subscription-with-1-GWS-queue.patch
@@ -0,0 +1,314 @@
+From bfdaf131a7afc8226419aa8f5176648833329105 Mon Sep 17 00:00:00 2001
+From: Joseph Greathouse <Joseph.Greathouse@amd.com>
+Date: Wed, 18 Sep 2019 14:49:57 -0500
+Subject: [PATCH 3921/4256] Enable over-subscription with >1 GWS queue
+
+The current GWS usage model will only allows a single GWS-enabled
+process to be active on the GPU at once. This ensures that a
+barrier-using kernel gets a known amount of GPU hardware, to
+prevent deadlock due to inability to go beyond the GWS barrier.
+
+The HWS watches how many GWS entries are assigned to each process,
+and goes into over-subscription mode when two processes need more
+than the 64 that are available. The current KFD method for working
+with this is to allocate all 64 GWS entries to each GWS-capable
+process.
+
+When more than one GWS-enabled process is in the runlist, we must
+make sure the runlist is in over-subscription mode, so that the
+HWS gets a chained RUN_LIST packet and continues scheduling
+kernels.
+
+Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 +
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 58 ++++++++++++++++++-
+ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 +
+ .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 2 +-
+ .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 6 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 +++++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 +
+ 8 files changed, 78 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 3362b4516089..838a8d46ba47 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -219,6 +219,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
+ }
+
+ q_properties->is_interop = false;
++ q_properties->is_gws = false;
+ q_properties->queue_percent = args->queue_percentage;
+ q_properties->priority = args->queue_priority;
+ q_properties->queue_address = args->ring_base_address;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index eb7e1aaf54a4..3aec5046d26d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -504,8 +504,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
+ deallocate_vmid(dqm, qpd, q);
+ }
+ qpd->queue_count--;
+- if (q->properties.is_active)
++ if (q->properties.is_active) {
+ dqm->queue_count--;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
++ }
+
+ return retval;
+ }
+@@ -577,6 +582,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ else if (!q->properties.is_active && prev_active)
+ dqm->queue_count--;
+
++ if (q->gws && !q->properties.is_gws) {
++ if (q->properties.is_active) {
++ dqm->gws_queue_count++;
++ pdd->qpd.mapped_gws_queue = true;
++ }
++ q->properties.is_gws = true;
++ } else if (!q->gws && q->properties.is_gws) {
++ if (q->properties.is_active) {
++ dqm->gws_queue_count--;
++ pdd->qpd.mapped_gws_queue = false;
++ }
++ q->properties.is_gws = false;
++ }
++
+ if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
+ retval = map_queues_cpsch(dqm);
+ else if (q->properties.is_active &&
+@@ -619,6 +638,10 @@ static int suspend_single_queue(struct device_queue_manager *dqm,
+ if (q->properties.is_active) {
+ dqm->queue_count--;
+ q->properties.is_active = false;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ pdd->qpd.mapped_gws_queue = false;
++ }
+ }
+
+ return retval;
+@@ -653,6 +676,10 @@ static int resume_single_queue(struct device_queue_manager *dqm,
+ if (QUEUE_IS_ACTIVE(q->properties)) {
+ q->properties.is_active = true;
+ dqm->queue_count++;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count++;
++ qpd->mapped_gws_queue = true;
++ }
+ }
+
+ return retval;
+@@ -693,6 +720,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+ */
+ ret = retval;
+ dqm->queue_count--;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
+ }
+
+ out:
+@@ -725,6 +756,10 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
+
+ q->properties.is_active = false;
+ dqm->queue_count--;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
+ }
+ retval = execute_queues_cpsch(dqm,
+ qpd->is_debug ?
+@@ -802,6 +837,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
+ */
+ ret = retval;
+ dqm->queue_count++;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count++;
++ qpd->mapped_gws_queue = true;
++ }
+ }
+ qpd->evicted = 0;
+ out:
+@@ -846,6 +885,10 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
+
+ q->properties.is_active = true;
+ dqm->queue_count++;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count++;
++ qpd->mapped_gws_queue = true;
++ }
+ }
+ retval = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+@@ -952,6 +995,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
+ dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+ dqm->sdma_queue_count = 0;
+ dqm->xgmi_sdma_queue_count = 0;
++ dqm->gws_queue_count = 0;
+ dqm->trap_debug_vmid = 0;
+
+ for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+@@ -1108,6 +1152,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
+ dqm->queue_count = dqm->processes_count = 0;
+ dqm->sdma_queue_count = 0;
+ dqm->xgmi_sdma_queue_count = 0;
++ dqm->gws_queue_count = 0;
+ dqm->active_runlist = false;
+ dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
+ dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
+@@ -1492,6 +1537,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+ USE_DEFAULT_GRACE_PERIOD);
+ if (retval == -ETIME)
+ qpd->reset_wavefronts = true;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
+ }
+
+ /*
+@@ -1704,8 +1753,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ deallocate_sdma_queue(dqm, q);
+ }
+
+- if (q->properties.is_active)
++ if (q->properties.is_active) {
+ dqm->queue_count--;
++ if (q->properties.is_gws) {
++ dqm->gws_queue_count--;
++ qpd->mapped_gws_queue = false;
++ }
++ }
+
+ dqm->total_queue_count--;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+index 54f4fad61359..eed8f950b663 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+@@ -182,6 +182,7 @@ struct device_queue_manager {
+ unsigned int queue_count;
+ unsigned int sdma_queue_count;
+ unsigned int xgmi_sdma_queue_count;
++ unsigned int gws_queue_count;
+ unsigned int total_queue_count;
+ unsigned int next_pipe_to_allocate;
+ unsigned int *allocated_queues;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+index 9ec62435326e..ac031dc09d66 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+@@ -115,6 +115,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+
+ prop.queue_size = queue_size;
+ prop.is_interop = false;
++ prop.is_gws = false;
+ prop.priority = 1;
+ prop.queue_percent = 100;
+ prop.type = type;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+index c3f39ef4de56..f7d9dac26485 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+@@ -84,7 +84,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+ packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+- packet->bitfields14.num_gws = qpd->num_gws;
++ packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
+ packet->bitfields14.num_oac = qpd->num_oac;
+ packet->bitfields14.sdma_enable = 1;
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index 08d3b38117b5..43e8e0258188 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ unsigned int *rlib_size,
+ bool *over_subscription)
+ {
+- unsigned int process_count, queue_count, compute_queue_count;
++ unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
+ unsigned int map_queue_size;
+ unsigned int max_proc_per_quantum = 1;
+ struct kfd_dev *dev = pm->dqm->dev;
+@@ -50,6 +50,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ queue_count = pm->dqm->queue_count;
+ compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
+ pm->dqm->xgmi_sdma_queue_count;
++ gws_queue_count = pm->dqm->gws_queue_count;
+
+ /* check if there is over subscription
+ * Note: the arbitration between the number of VMIDs and
+@@ -62,7 +63,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ max_proc_per_quantum = dev->max_proc_per_quantum;
+
+ if ((process_count > max_proc_per_quantum) ||
+- compute_queue_count > get_queues_num(pm->dqm)) {
++ compute_queue_count > get_queues_num(pm->dqm) ||
++ gws_queue_count > 1) {
+ *over_subscription = true;
+ pr_debug("Over subscribed runlist\n");
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 6bf5be992303..9ac50a4eb294 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -466,6 +466,10 @@ enum KFD_QUEUE_PRIORITY {
+ * @is_active: Defines if the queue is active or not. @is_active and
+ * @is_evicted are protected by the DQM lock.
+ *
++ * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
++ * @is_gws should be protected by the DQM lock, since changing it can yield the
++ * possibility of updating DQM state on number of GWS queues.
++ *
+ * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
+ * of the queue.
+ *
+@@ -490,6 +494,7 @@ struct queue_properties {
+ bool is_suspended;
+ bool is_active;
+ bool is_new;
++ bool is_gws;
+ /* Not relevant for user mode queues in cp scheduling */
+ unsigned int vmid;
+ /* Relevant only for sdma queues*/
+@@ -628,6 +633,14 @@ struct qcm_process_device {
+ */
+ bool reset_wavefronts;
+
++ /* This flag tells us if this process has a GWS-capable
++ * queue that will be mapped into the runlist. It's
++ * possible to request a GWS BO, but not have the queue
++ * currently mapped, and this changes how the MAP_PROCESS
++ * PM4 packet is configured.
++ */
++ bool mapped_gws_queue;
++
+ /*
+ * All the memory management data should be here too
+ */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index c14fdf3bda75..d7e057376d8f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -824,6 +824,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ pdd->qpd.dqm = dev->dqm;
+ pdd->qpd.pqm = &p->pqm;
+ pdd->qpd.evicted = 0;
++ pdd->qpd.mapped_gws_queue = false;
+ mutex_init(&pdd->qpd.doorbell_lock);
+ pdd->process = p;
+ pdd->bound = PDD_UNBOUND;
+--
+2.17.1
+