aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/5680-drm-amdkfd-Vega20-bring-up-on-amdkfd-side.patch
blob: 9d81a54b2f972db79bbdbf5054bcb50772d16652 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
From 640741018c6037ba919804ef9ee61688c1755f99 Mon Sep 17 00:00:00 2001
From: Shaoyun Liu <Shaoyun.Liu@amd.com>
Date: Tue, 31 Oct 2017 13:32:53 -0400
Subject: [PATCH 5680/5725] drm/amdkfd: Vega20 bring up on amdkfd side

Change-Id: I6a2572ad6caf92e3feaf29a7b14fd4f0daa16dbc
Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c              |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            | 22 ++++++++++++++++++++++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c       |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c      |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c       |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c          |  1 +
 8 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 6688882..c540b65 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -642,6 +642,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
 		break;
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 		pcache_info = vega10_cache_info;
 		num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4db30d2..895f82f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -253,6 +253,22 @@ static const struct kfd_device_info vega10_vf_device_info = {
 	.num_sdma_queues_per_engine = 2,
 };
 
+static const struct kfd_device_info vega20_device_info = {
+	.asic_family = CHIP_VEGA20,
+	.max_pasid_bits = 16,
+	.max_no_of_hqd	= 24,
+	.doorbell_size	= 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
+	.needs_iommu_device = false,
+	.needs_pci_atomics = true,
+	.num_sdma_engines = 2,
+	.num_sdma_queues_per_engine = 8,
+};
+
 struct kfd_deviceid {
 	unsigned short did;
 	const struct kfd_device_info *device_info;
@@ -341,6 +357,12 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x6868, &vega10_device_info },	/* Vega10 */
 	{ 0x686C, &vega10_vf_device_info },	/* Vega10  vf*/
 	{ 0x687F, &vega10_device_info },	/* Vega10 */
+	{ 0x66a0, &vega20_device_info },	/* Vega20 */
+	{ 0x66a1, &vega20_device_info },	/* Vega20 */
+	{ 0x66a2, &vega20_device_info },	/* Vega20 */
+	{ 0x66a3, &vega20_device_info },	/* Vega20 */
+	{ 0x66a7, &vega20_device_info },	/* Vega20 */
+	{ 0x66af, &vega20_device_info }		/* Vega20 */
 };
 
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 91b88d2..8ea9a29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1727,6 +1727,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		break;
 
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		device_queue_manager_init_v9(&dqm->asic_ops);
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index f7de732..8f123a2 100755
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -400,6 +400,7 @@ int kfd_init_apertures(struct kfd_process *process)
 				kfd_init_apertures_vi(pdd, id);
 				break;
 			case CHIP_VEGA10:
+			case CHIP_VEGA20:
 			case CHIP_RAVEN:
 				kfd_init_apertures_v9(pdd, id);
 				break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 51b976d..be038c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -355,6 +355,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 		break;
 
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		kernel_queue_init_v9(&kq->ops_asic_specific);
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 8279b74..d39e81c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -81,6 +81,7 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
 	case CHIP_POLARIS11:
 		return mqd_manager_init_vi_tonga(type, dev);
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		return mqd_manager_init_v9(type, dev);
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 1092631..c6080ed3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -229,6 +229,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 		pm->pmf = &kfd_vi_pm_funcs;
 		break;
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		pm->pmf = &kfd_v9_pm_funcs;
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 82cff10..4fe5ebc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1308,6 +1308,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
 		break;
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
-- 
2.7.4
#define get_byte_2 lsr #8 #define get_byte_3 lsl #0 #define put_byte_0 lsl #24 #define put_byte_1 lsl #16 #define put_byte_2 lsl #8 #define put_byte_3 lsl #0 #endif /* * Data preload for architectures that support it */ #if __LINUX_ARM_ARCH__ >= 5 #define PLD(code...) code #else #define PLD(code...) #endif /* * This can be used to enable code to cacheline align the destination * pointer when bulk writing to memory. Experiments on StrongARM and * XScale didn't show this a worthwhile thing to do when the cache is not * set to write-allocate (this would need further testing on XScale when WA * is used). * * On Feroceon there is much to gain however, regardless of cache mode. */ #ifdef CONFIG_CPU_FEROCEON #define CALGN(code...) code #else #define CALGN(code...) #endif /* * Enable and disable interrupts */ #if __LINUX_ARM_ARCH__ >= 6 .macro disable_irq_notrace cpsid i .endm .macro enable_irq_notrace cpsie i .endm #else .macro disable_irq_notrace msr cpsr_c, #PSR_I_BIT | SVC_MODE .endm .macro enable_irq_notrace msr cpsr_c, #SVC_MODE .endm #endif .macro asm_trace_hardirqs_off #if defined(CONFIG_TRACE_IRQFLAGS) stmdb sp!, {r0-r3, ip, lr} bl trace_hardirqs_off ldmia sp!, {r0-r3, ip, lr} #endif .endm .macro asm_trace_hardirqs_on_cond, cond #if defined(CONFIG_TRACE_IRQFLAGS) /* * actually the registers should be pushed and pop'd conditionally, but * after bl the flags are certainly clobbered */ stmdb sp!, {r0-r3, ip, lr} bl\cond trace_hardirqs_on ldmia sp!, {r0-r3, ip, lr} #endif .endm .macro asm_trace_hardirqs_on asm_trace_hardirqs_on_cond al .endm .macro disable_irq disable_irq_notrace asm_trace_hardirqs_off .endm .macro enable_irq asm_trace_hardirqs_on enable_irq_notrace .endm /* * Save the current IRQ state and disable IRQs. Note that this macro * assumes FIQs are enabled, and that the processor is in SVC mode. */ .macro save_and_disable_irqs, oldcpsr mrs \oldcpsr, cpsr disable_irq .endm .macro save_and_disable_irqs_notrace, oldcpsr mrs \oldcpsr, cpsr disable_irq_notrace .endm /* * Restore interrupt state previously stored in a register. We don't * guarantee that this will preserve the flags. */ .macro restore_irqs_notrace, oldcpsr msr cpsr_c, \oldcpsr .endm .macro restore_irqs, oldcpsr tst \oldcpsr, #PSR_I_BIT asm_trace_hardirqs_on_cond eq restore_irqs_notrace \oldcpsr .endm #define USER(x...) \ 9999: x; \ .pushsection __ex_table,"a"; \ .align 3; \ .long 9999b,9001f; \ .popsection #ifdef CONFIG_SMP #define ALT_SMP(instr...) \ 9998: instr /* * Note: if you get assembler errors from ALT_UP() when building with * CONFIG_THUMB2_KERNEL, you almost certainly need to use * ALT_SMP( W(instr) ... ) */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ 9997: instr ;\ .if . - 9997b != 4 ;\ .error "ALT_UP() content must assemble to exactly 4 bytes";\ .endif ;\ .popsection #define ALT_UP_B(label) \ .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ W(b) . + up_b_offset ;\ .popsection #else #define ALT_SMP(instr...) #define ALT_UP(instr...) instr #define ALT_UP_B(label) b label #endif /* * Instruction barrier */ .macro instr_sync #if __LINUX_ARM_ARCH__ >= 7 isb #elif __LINUX_ARM_ARCH__ == 6 mcr p15, 0, r0, c7, c5, 4 #endif .endm /* * SMP data memory barrier */ .macro smp_dmb mode #ifdef CONFIG_SMP #if __LINUX_ARM_ARCH__ >= 7 .ifeqs "\mode","arm" ALT_SMP(dmb) .else ALT_SMP(W(dmb)) .endif #elif __LINUX_ARM_ARCH__ == 6 ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb #else #error Incompatible SMP platform #endif .ifeqs "\mode","arm" ALT_UP(nop) .else ALT_UP(W(nop)) .endif #endif .endm #ifdef CONFIG_THUMB2_KERNEL .macro setmode, mode, reg mov \reg, #\mode msr cpsr_c, \reg .endm #else .macro setmode, mode, reg msr cpsr_c, #\mode .endm #endif /* * STRT/LDRT access macros with ARM and Thumb-2 variants */ #ifdef CONFIG_THUMB2_KERNEL .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER() 9999: .if \inc == 1 \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] .elseif \inc == 4 \instr\cond\()\t\().w \reg, [\ptr, #\off] .else .error "Unsupported inc macro argument" .endif .pushsection __ex_table,"a" .align 3 .long 9999b, \abort .popsection .endm .macro usracc, instr, reg, ptr, inc, cond, rept, abort @ explicit IT instruction needed because of the label @ introduced by the USER macro .ifnc \cond,al .if \rept == 1 itt \cond .elseif \rept == 2 ittt \cond .else .error "Unsupported rept macro argument" .endif .endif @ Slightly optimised to avoid incrementing the pointer twice usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort .if \rept == 2 usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort .endif add\cond \ptr, #\rept * \inc .endm #else /* !CONFIG_THUMB2_KERNEL */ .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=TUSER() .rept \rept 9999: .if \inc == 1 \instr\cond\()b\()\t \reg, [\ptr], #\inc .elseif \inc == 4 \instr\cond\()\t \reg, [\ptr], #\inc .else .error "Unsupported inc macro argument" .endif .pushsection __ex_table,"a" .align 3 .long 9999b, \abort .popsection .endr .endm #endif /* CONFIG_THUMB2_KERNEL */ .macro strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f usracc str, \reg, \ptr, \inc, \cond, \rept, \abort .endm .macro ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f usracc ldr, \reg, \ptr, \inc, \cond, \rept, \abort .endm /* Utility macro for declaring string literals */ .macro string name:req, string .type \name , #object \name: .asciz "\string" .size \name , . - \name .endm #endif /* __ASM_ASSEMBLER_H__ */