diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
31 files changed, 967 insertions, 319 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index d1caaf0e6a7c..5a0308d26b53 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -678,7 +678,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { }; static const uint32_t cwsr_trap_nv1x_hex[] = { - 0xbf820001, 0xbf8201f5, + 0xbf820001, 0xbf820394, 0xb0804004, 0xb978f802, 0x8a78ff78, 0x00020006, 0xb97bf803, 0x876eff78, @@ -769,13 +769,90 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820002, 0xbeff03c1, - 0xbf82000b, 0xbef603ff, - 0x01000000, 0xe0704000, - 0x705d0000, 0xe0704080, - 0x705d0100, 0xe0704100, - 0x705d0200, 0xe0704180, - 0x705d0300, 0xbf82000a, - 0xbef603ff, 0x01000000, + 0xbf820058, 0xbef603ff, + 0x01000000, 0xb97af803, + 0x8a7a7aff, 0x10000000, + 0xbf850049, 0xbe840380, + 0xd7600000, 0x00000900, + 0x80048104, 0xd7600001, + 0x00000900, 0x80048104, + 0xd7600002, 0x00000900, + 0x80048104, 0xd7600003, + 0x00000900, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000901, + 0x80048104, 0xd7600001, + 0x00000901, 0x80048104, + 0xd7600002, 0x00000901, + 0x80048104, 0xd7600003, + 0x00000901, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000902, + 0x80048104, 0xd7600001, + 0x00000902, 0x80048104, + 0xd7600002, 0x00000902, + 0x80048104, 0xd7600003, + 0x00000902, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000903, + 0x80048104, 0xd7600001, + 0x00000903, 0x80048104, + 0xd7600002, 0x00000903, + 0x80048104, 0xd7600003, + 0x00000903, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbf820060, + 0xe0704000, 0x705d0000, + 0xe0704080, 0x705d0100, + 0xe0704100, 0x705d0200, + 0xe0704180, 0x705d0300, + 0xbf820057, 0xbef603ff, + 0x01000000, 0xb97af803, + 0x8a7a7aff, 0x10000000, + 0xbf850049, 0xbe840380, + 0xd7600000, 0x00000900, + 0x80048104, 0xd7600001, + 0x00000900, 0x80048104, + 0xd7600002, 0x00000900, + 0x80048104, 0xd7600003, + 0x00000900, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06c004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000901, + 0x80048104, 0xd7600001, + 0x00000901, 0x80048104, + 0xd7600002, 0x00000901, + 0x80048104, 0xd7600003, + 0x00000901, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06c004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000902, + 0x80048104, 0xd7600001, + 0x00000902, 0x80048104, + 0xd7600002, 0x00000902, + 0x80048104, 0xd7600003, + 0x00000902, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06c004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000903, + 0x80048104, 0xd7600001, + 0x00000903, 0x80048104, + 0xd7600002, 0x00000903, + 0x80048104, 0xd7600003, + 0x00000903, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06c004, + 0xbf84ffef, 0xbf820008, 0xe0704000, 0x705d0000, 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, @@ -855,9 +932,9 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, 0xb97b4306, 0x877bc17b, - 0xbf840044, 0xbf8a0000, + 0xbf840086, 0xbf8a0000, 0x877aff6d, 0x80000000, - 0xbf840040, 0x8f7b867b, + 0xbf840082, 0x8f7b867b, 0x8f7b827b, 0xbef6037b, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, @@ -871,16 +948,49 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xd7660000, 0x000200c1, 0x16000084, 0x907c9973, 0x877c817c, 0xbf06817c, - 0xbefc0380, 0xbf850012, - 0xbe8303ff, 0x00000080, + 0xbefc0380, 0xbf850033, + 0xb97af803, 0x8a7a7aff, + 0x10000000, 0xbf85001d, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xbe840380, + 0xd7600000, 0x00000901, + 0x80048104, 0xd7600001, + 0x00000901, 0x80048104, + 0xd7600002, 0x00000901, + 0x80048104, 0xd7600003, + 0x00000901, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0x807cff7c, + 0x00000080, 0xd5250000, + 0x0001ff00, 0x00000080, + 0xbf0a7b7c, 0xbf85ffe4, + 0xbf820044, 0xbe8303ff, + 0x00000080, 0xbf800000, 0xbf800000, 0xbf800000, - 0xbf800000, 0xd8d80000, + 0xd8d80000, 0x01000000, + 0xbf8c0000, 0xe0704000, + 0x705d0100, 0x807c037c, + 0x80700370, 0xd5250000, + 0x0001ff00, 0x00000080, + 0xbf0a7b7c, 0xbf85fff4, + 0xbf820032, 0xb97af803, + 0x8a7a7aff, 0x10000000, + 0xbf85001d, 0xd8d80000, 0x01000000, 0xbf8c0000, - 0xe0704000, 0x705d0100, - 0x807c037c, 0x80700370, + 0xbe840380, 0xd7600000, + 0x00000901, 0x80048104, + 0xd7600001, 0x00000901, + 0x80048104, 0xd7600002, + 0x00000901, 0x80048104, + 0xd7600003, 0x00000901, + 0x80048104, 0xf469003a, + 0xe0000000, 0x80709070, + 0xbf06c004, 0xbf84ffef, + 0x807cff7c, 0x00000100, 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7c, - 0xbf85fff4, 0xbf820011, + 0x00000100, 0xbf0a7b7c, + 0xbf85ffe4, 0xbf820011, 0xbe8303ff, 0x00000100, 0xbf800000, 0xbf800000, 0xbf800000, 0xd8d80000, @@ -898,10 +1008,52 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbeff03c1, 0xb97b3a05, 0x807b817b, 0x8f7b827b, 0x907c9973, 0x877c817c, - 0xbf06817c, 0xbf850017, + 0xbf06817c, 0xbf85006b, 0xbef603ff, 0x01000000, 0xbefc0384, 0xbf0a7b7c, - 0xbf840037, 0x7e008700, + 0xbf8400fa, 0xb97af803, + 0x8a7a7aff, 0x10000000, + 0xbf850050, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xbe840380, + 0xd7600000, 0x00000900, + 0x80048104, 0xd7600001, + 0x00000900, 0x80048104, + 0xd7600002, 0x00000900, + 0x80048104, 0xd7600003, + 0x00000900, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000901, + 0x80048104, 0xd7600001, + 0x00000901, 0x80048104, + 0xd7600002, 0x00000901, + 0x80048104, 0xd7600003, + 0x00000901, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000902, + 0x80048104, 0xd7600001, + 0x00000902, 0x80048104, + 0xd7600002, 0x00000902, + 0x80048104, 0xd7600003, + 0x00000902, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0xbe840380, + 0xd7600000, 0x00000903, + 0x80048104, 0xd7600001, + 0x00000903, 0x80048104, + 0xd7600002, 0x00000903, + 0x80048104, 0xd7600003, + 0x00000903, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06a004, + 0xbf84ffef, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf8200a6, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, 0xe0704080, @@ -910,9 +1062,51 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x705d0300, 0x807c847c, 0x8070ff70, 0x00000200, 0xbf0a7b7c, 0xbf85ffef, - 0xbf820025, 0xbef603ff, + 0xbf820094, 0xbef603ff, 0x01000000, 0xbefc0384, - 0xbf0a7b7c, 0xbf840011, + 0xbf0a7b7c, 0xbf840065, + 0xb97af803, 0x8a7a7aff, + 0x10000000, 0xbf850050, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, + 0xbe840380, 0xd7600000, + 0x00000900, 0x80048104, + 0xd7600001, 0x00000900, + 0x80048104, 0xd7600002, + 0x00000900, 0x80048104, + 0xd7600003, 0x00000900, + 0x80048104, 0xf469003a, + 0xe0000000, 0x80709070, + 0xbf06c004, 0xbf84ffef, + 0xbe840380, 0xd7600000, + 0x00000901, 0x80048104, + 0xd7600001, 0x00000901, + 0x80048104, 0xd7600002, + 0x00000901, 0x80048104, + 0xd7600003, 0x00000901, + 0x80048104, 0xf469003a, + 0xe0000000, 0x80709070, + 0xbf06c004, 0xbf84ffef, + 0xbe840380, 0xd7600000, + 0x00000902, 0x80048104, + 0xd7600001, 0x00000902, + 0x80048104, 0xd7600002, + 0x00000902, 0x80048104, + 0xd7600003, 0x00000902, + 0x80048104, 0xf469003a, + 0xe0000000, 0x80709070, + 0xbf06c004, 0xbf84ffef, + 0xbe840380, 0xd7600000, + 0x00000903, 0x80048104, + 0xd7600001, 0x00000903, + 0x80048104, 0xd7600002, + 0x00000903, 0x80048104, + 0xd7600003, 0x00000903, + 0x80048104, 0xf469003a, + 0xe0000000, 0x80709070, + 0xbf06c004, 0xbf84ffef, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffb1, 0xbf82003b, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0704000, 0x705d0000, @@ -922,179 +1116,192 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, 0xbf85ffef, 0xb97b1e06, - 0x877bc17b, 0xbf84000c, + 0x877bc17b, 0xbf840027, 0x8f7b837b, 0x807b7c7b, 0xbefe03c1, 0xbeff0380, - 0x7e008700, 0xe0704000, - 0x705d0000, 0x807c817c, - 0x8070ff70, 0x00000080, - 0xbf0a7b7c, 0xbf85fff8, - 0xbf820144, 0xbef4037e, - 0x8775ff7f, 0x0000ffff, - 0x8875ff75, 0x00040000, - 0xbef60380, 0xbef703ff, - 0x10807fac, 0xb97202dc, - 0x8f729972, 0x876eff7f, - 0x04000000, 0xbf840034, + 0xb97af803, 0x8a7a7aff, + 0x10000000, 0xbf850017, + 0x7e008700, 0xbe840380, + 0xd7600000, 0x00000900, + 0x80048104, 0xd7600001, + 0x00000900, 0x80048104, + 0xd7600002, 0x00000900, + 0x80048104, 0xd7600003, + 0x00000900, 0x80048104, + 0xf469003a, 0xe0000000, + 0x80709070, 0xbf06c004, + 0xbf84ffef, 0x807c817c, + 0xbf0a7b7c, 0xbf85ffea, + 0xbf820008, 0x7e008700, + 0xe0704000, 0x705d0000, + 0x807c817c, 0x8070ff70, + 0x00000080, 0xbf0a7b7c, + 0xbf85fff8, 0xbf820144, + 0xbef4037e, 0x8775ff7f, + 0x0000ffff, 0x8875ff75, + 0x00040000, 0xbef60380, + 0xbef703ff, 0x10807fac, + 0xb97202dc, 0x8f729972, + 0x876eff7f, 0x04000000, + 0xbf840034, 0xbefe03c1, + 0x907c9972, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f4306, + 0x876fc16f, 0xbf840029, + 0x8f6f866f, 0x8f6f826f, + 0xbef6036f, 0xb9783a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x8078ff78, + 0x00000080, 0xbef603ff, + 0x01000000, 0x907c9972, + 0x877c817c, 0xbf06817c, + 0xbefc0380, 0xbf850009, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7c, 0xbf85fff8, + 0xbf820008, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000100, 0x8078ff78, + 0x00000100, 0xbf0a6f7c, + 0xbf85fff8, 0xbef80380, 0xbefe03c1, 0x907c9972, 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, - 0xb96f4306, 0x876fc16f, - 0xbf840029, 0x8f6f866f, - 0x8f6f826f, 0xbef6036f, - 0xb9783a05, 0x80788178, - 0xbf0d9972, 0xbf850002, - 0x8f788978, 0xbf820001, - 0x8f788a78, 0xb96e1e06, - 0x8f6e8a6e, 0x80786e78, + 0xb96f3a05, 0x806f816f, + 0x8f6f826f, 0x907c9972, + 0x877c817c, 0xbf06817c, + 0xbf850024, 0xbef603ff, + 0x01000000, 0xbeee0378, 0x8078ff78, 0x00000200, - 0x8078ff78, 0x00000080, - 0xbef603ff, 0x01000000, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbefc0380, - 0xbf850009, 0xe0310000, - 0x781d0000, 0x807cff7c, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7c, - 0xbf85fff8, 0xbf820008, - 0xe0310000, 0x781d0000, - 0x807cff7c, 0x00000100, - 0x8078ff78, 0x00000100, - 0xbf0a6f7c, 0xbf85fff8, - 0xbef80380, 0xbefe03c1, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb96f3a05, - 0x806f816f, 0x8f6f826f, - 0x907c9972, 0x877c817c, - 0xbf06817c, 0xbf850024, - 0xbef603ff, 0x01000000, - 0xbeee0378, 0x8078ff78, - 0x00000200, 0xbefc0384, - 0xbf0a6f7c, 0xbf840050, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840050, 0xe0304000, + 0x785d0000, 0xe0304080, + 0x785d0100, 0xe0304100, + 0x785d0200, 0xe0304180, + 0x785d0300, 0xbf8c3f70, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807c847c, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85ffee, 0xe0304000, + 0x6e5d0000, 0xe0304080, + 0x6e5d0100, 0xe0304100, + 0x6e5d0200, 0xe0304180, + 0x6e5d0300, 0xbf8c3f70, + 0xbf820034, 0xbef603ff, + 0x01000000, 0xbeee0378, + 0x8078ff78, 0x00000400, + 0xbefc0384, 0xbf0a6f7c, + 0xbf840012, 0xe0304000, + 0x785d0000, 0xe0304100, + 0x785d0100, 0xe0304200, + 0x785d0200, 0xe0304300, + 0x785d0300, 0xbf8c3f70, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb96f1e06, + 0x876fc16f, 0xbf84000e, + 0x8f6f836f, 0x806f7c6f, + 0xbefe03c1, 0xbeff0380, 0xe0304000, 0x785d0000, - 0xe0304080, 0x785d0100, - 0xe0304100, 0x785d0200, - 0xe0304180, 0x785d0300, 0xbf8c3f70, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807c847c, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85ffee, + 0x807c817c, 0x8078ff78, + 0x00000080, 0xbf0a6f7c, + 0xbf85fff7, 0xbeff03c1, 0xe0304000, 0x6e5d0000, - 0xe0304080, 0x6e5d0100, - 0xe0304100, 0x6e5d0200, - 0xe0304180, 0x6e5d0300, - 0xbf8c3f70, 0xbf820034, - 0xbef603ff, 0x01000000, - 0xbeee0378, 0x8078ff78, - 0x00000400, 0xbefc0384, - 0xbf0a6f7c, 0xbf840012, - 0xe0304000, 0x785d0000, - 0xe0304100, 0x785d0100, - 0xe0304200, 0x785d0200, - 0xe0304300, 0x785d0300, - 0xbf8c3f70, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb96f1e06, 0x876fc16f, - 0xbf84000e, 0x8f6f836f, - 0x806f7c6f, 0xbefe03c1, - 0xbeff0380, 0xe0304000, - 0x785d0000, 0xbf8c3f70, - 0x7e008500, 0x807c817c, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff7, - 0xbeff03c1, 0xe0304000, - 0x6e5d0000, 0xe0304100, - 0x6e5d0100, 0xe0304200, - 0x6e5d0200, 0xe0304300, - 0x6e5d0300, 0xbf8c3f70, + 0xe0304100, 0x6e5d0100, + 0xe0304200, 0x6e5d0200, + 0xe0304300, 0x6e5d0300, + 0xbf8c3f70, 0xb9783a05, + 0x80788178, 0xbf0d9972, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb96e1e06, 0x8f6e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x80f8ff78, + 0x00000050, 0xbef603ff, + 0x01000000, 0xbefc03ff, + 0x0000006c, 0x80f89078, + 0xf429003a, 0xf0000000, + 0xbf8cc07f, 0x80fc847c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0x80f8a078, + 0xf42d003a, 0xf0000000, + 0xbf8cc07f, 0x80fc887c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0x80f8c078, + 0xf431003a, 0xf0000000, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0xbe883108, + 0xbe8a310a, 0xbe8c310c, + 0xbe8e310e, 0xbf06807c, + 0xbf84fff0, 0xba80f801, + 0x00000000, 0xbf8a0000, 0xb9783a05, 0x80788178, 0xbf0d9972, 0xbf850002, 0x8f788978, 0xbf820001, 0x8f788a78, 0xb96e1e06, 0x8f6e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x80f8ff78, 0x00000050, 0xbef603ff, 0x01000000, - 0xbefc03ff, 0x0000006c, - 0x80f89078, 0xf429003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc847c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0x80f8a078, 0xf42d003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc887c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0x80f8c078, 0xf431003a, - 0xf0000000, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe803100, 0xbe823102, - 0xbe843104, 0xbe863106, - 0xbe883108, 0xbe8a310a, - 0xbe8c310c, 0xbe8e310e, - 0xbf06807c, 0xbf84fff0, - 0xba80f801, 0x00000000, - 0xbf8a0000, 0xb9783a05, - 0x80788178, 0xbf0d9972, - 0xbf850002, 0x8f788978, - 0xbf820001, 0x8f788a78, - 0xb96e1e06, 0x8f6e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef603ff, - 0x01000000, 0xf4211bfa, + 0xf4211bfa, 0xf0000000, + 0x80788478, 0xf4211b3a, 0xf0000000, 0x80788478, - 0xf4211b3a, 0xf0000000, - 0x80788478, 0xf4211b7a, + 0xf4211b7a, 0xf0000000, + 0x80788478, 0xf4211c3a, 0xf0000000, 0x80788478, - 0xf4211c3a, 0xf0000000, - 0x80788478, 0xf4211c7a, + 0xf4211c7a, 0xf0000000, + 0x80788478, 0xf4211eba, 0xf0000000, 0x80788478, - 0xf4211eba, 0xf0000000, - 0x80788478, 0xf4211efa, + 0xf4211efa, 0xf0000000, + 0x80788478, 0xf4211e7a, 0xf0000000, 0x80788478, - 0xf4211e7a, 0xf0000000, - 0x80788478, 0xf4211cfa, + 0xf4211cfa, 0xf0000000, + 0x80788478, 0xf4211bba, 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef814, 0xf4211bba, 0xf0000000, 0x80788478, 0xbf8cc07f, - 0xb9eef814, 0xf4211bba, - 0xf0000000, 0x80788478, - 0xbf8cc07f, 0xb9eef815, - 0xbefc036f, 0xbefe0370, - 0xbeff0371, 0x876f7bff, - 0x000003ff, 0xb9ef4803, - 0xb9f9f816, 0x876f7bff, - 0xfffff800, 0x906f8b6f, - 0xb9efa2c3, 0xb9f3f801, - 0xb96e3a05, 0x806e816e, - 0xbf0d9972, 0xbf850002, - 0x8f6e896e, 0xbf820001, - 0x8f6e8a6e, 0xb96f1e06, - 0x8f6f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x876fff6f, 0x0000ffff, - 0xf4091c37, 0xfa000050, - 0xf4091d37, 0xfa000060, - 0xf4011e77, 0xfa000074, - 0xbf8cc07f, 0x906e8977, - 0x876fff6e, 0x003f8000, - 0x906e8677, 0x876eff6e, - 0x02000000, 0x886e6f6e, - 0xb9eef807, 0x876dff6d, - 0x0000ffff, 0x87fe7e7e, - 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf9b0000, + 0xb9eef815, 0xbefc036f, + 0xbefe0370, 0xbeff0371, + 0x876f7bff, 0x000003ff, + 0xb9ef4803, 0xb9f9f816, + 0x876f7bff, 0xfffff800, + 0x906f8b6f, 0xb9efa2c3, + 0xb9f3f801, 0xb96e3a05, + 0x806e816e, 0xbf0d9972, + 0xbf850002, 0x8f6e896e, + 0xbf820001, 0x8f6e8a6e, + 0xb96f1e06, 0x8f6f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x876fff6f, + 0x0000ffff, 0xf4091c37, + 0xfa000050, 0xf4091d37, + 0xfa000060, 0xf4011e77, + 0xfa000074, 0xbf8cc07f, + 0x906e8977, 0x876fff6e, + 0x003f8000, 0x906e8677, + 0x876eff6e, 0x02000000, + 0x886e6f6e, 0xb9eef807, + 0x876dff6d, 0x0000ffff, + 0x87fe7e7e, 0x87ea6a6a, + 0xb9faf802, 0xbe80226c, + 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_arcturus_hex[] = { @@ -2518,7 +2725,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { 0x8b6eff7b, 0x00000400, 0xbfa20045, 0xbf830010, 0xb8fbf803, 0xbfa0fffa, - 0x8b6eff7b, 0x00000900, + 0x8b6eff7b, 0x00160900, 0xbfa20015, 0x8b6eff7b, 0x000071ff, 0xbfa10008, 0x8b6fff7b, 0x00007080, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 71b3dc0c7363..e1aaa5ce0784 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -44,6 +44,7 @@ #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO) #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO) #define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO) +#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised @@ -81,6 +82,12 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000 +#if ASIC_FAMILY >= CHIP_PLUM_BONITO +var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000 +var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000 +var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000 +#endif +var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000 var SQ_WAVE_MODE_EXCP_EN_SHIFT = 12 var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT = 19 @@ -92,6 +99,16 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 +#if ASIC_FAMILY < CHIP_PLUM_BONITO +var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK +#else +var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK |\ + SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK |\ + SQ_WAVE_TRAPSTS_WAVE_START_MASK |\ + SQ_WAVE_TRAPSTS_WAVE_END_MASK |\ + SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK +#endif + // bits [31:24] unused by SPI debug data var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000 @@ -224,7 +241,7 @@ L_NOT_HALTED: // Check non-maskable exceptions. memory_violation, illegal_instruction // and xnack_error exceptions always cause the wave to enter the trap // handler. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_NON_MASKABLE_EXCP_MASK s_cbranch_scc1 L_FETCH_2ND_TRAP // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. @@ -460,6 +477,16 @@ L_SAVE_4VGPR_WAVE32: // VGPR Allocated in 4-GPR granularity +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_FIRST_VGPRS32_WITH_TCP + + write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + s_branch L_SAVE_HWREG + +L_SAVE_FIRST_VGPRS32_WITH_TCP: +#endif + #if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 #endif @@ -473,6 +500,16 @@ L_SAVE_4VGPR_WAVE64: // VGPR Allocated in 4-GPR granularity +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_FIRST_VGPRS64_WITH_TCP + + write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + s_branch L_SAVE_HWREG + +L_SAVE_FIRST_VGPRS64_WITH_TCP: +#endif + #if !NO_SQC_STORE buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 #endif @@ -645,6 +682,26 @@ L_SAVE_LDS_NORMAL: s_cbranch_scc1 L_SAVE_LDS_W64 L_SAVE_LDS_W32: +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W32 + +L_SAVE_LDS_LOOP_SQC_W32: + ds_read_b32 v1, v0 + s_waitcnt 0 + + write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W32 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_WITH_TCP_W32: +#endif + s_mov_b32 s3, 128 s_nop 0 s_nop 0 @@ -654,7 +711,7 @@ L_SAVE_LDS_LOOP_W32: s_waitcnt 0 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 @@ -663,6 +720,26 @@ L_SAVE_LDS_LOOP_W32: s_branch L_SAVE_LDS_DONE L_SAVE_LDS_W64: +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_LDS_WITH_TCP_W64 + +L_SAVE_LDS_LOOP_SQC_W64: + ds_read_b32 v1, v0 + s_waitcnt 0 + + write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_SQC_W64 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_WITH_TCP_W64: +#endif + s_mov_b32 s3, 256 s_nop 0 s_nop 0 @@ -712,6 +789,25 @@ L_SAVE_VGPR_NORMAL: s_cmp_lt_u32 m0, s_save_alloc_size s_cbranch_scc0 L_SAVE_VGPR_END +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP + +L_SAVE_VGPR_LOOP_SQC_W32: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + write_vgprs_to_mem_with_sqc_w32(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W32 + + s_branch L_SAVE_VGPR_END +#endif + L_SAVE_VGPR_W32_LOOP: v_movrels_b32 v0, v0 //v0 = v[0+m0] v_movrels_b32 v1, v1 //v1 = v[1+m0] @@ -738,6 +834,25 @@ L_SAVE_VGPR_WAVE64: s_cmp_lt_u32 m0, s_save_alloc_size s_cbranch_scc0 L_SAVE_SHARED_VGPR +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP + +L_SAVE_VGPR_LOOP_SQC_W64: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + write_vgprs_to_mem_with_sqc_w64(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC_W64 + + s_branch L_SAVE_VGPR_END +#endif + L_SAVE_VGPR_W64_LOOP: v_movrels_b32 v0, v0 //v0 = v[0+m0] v_movrels_b32 v1, v1 //v1 = v[1+m0] @@ -765,6 +880,23 @@ L_SAVE_SHARED_VGPR: s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 s_mov_b32 exec_lo, 0xFFFFFFFF s_mov_b32 exec_hi, 0x00000000 + +#if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP + +L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC: + v_movrels_b32 v0, v0 + + write_vgprs_to_mem_with_sqc_w64(v0, 1, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 1 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC + + s_branch L_SAVE_VGPR_END +#endif + L_SAVE_SHARED_VGPR_WAVE64_LOOP: v_movrels_b32 v0, v0 //v0 = v[0+m0] buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 @@ -1175,6 +1307,43 @@ function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1 end +#if SAVE_AFTER_XNACK_ERROR +function check_if_tcp_store_ok + // If TRAPSTS.XNACK_ERROR=1 then TCP stores will fail. + s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS) + s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp + +L_TCP_STORE_CHECK_DONE: +end + +function write_vgpr_to_mem_with_sqc(vgpr, n_lanes, s_rsrc, s_mem_offset) + s_mov_b32 s4, 0 + +L_WRITE_VGPR_LANE_LOOP: + for var lane = 0; lane < 4; ++lane + v_readlane_b32 s[lane], vgpr, s4 + s_add_u32 s4, s4, 1 + end + + s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1 + + s_add_u32 s_mem_offset, s_mem_offset, 0x10 + s_cmp_eq_u32 s4, n_lanes + s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP +end + +function write_vgprs_to_mem_with_sqc_w32(vgpr0, n_vgprs, s_rsrc, s_mem_offset) + for var vgpr = 0; vgpr < n_vgprs; ++vgpr + write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 32, s_rsrc, s_mem_offset) + end +end + +function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset) + for var vgpr = 0; vgpr < n_vgprs; ++vgpr + write_vgpr_to_mem_with_sqc(vgpr0[vgpr], 64, s_rsrc, s_mem_offset) + end +end +#endif function get_lds_size_bytes(s_lds_size_byte) s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 80e90fdef291..fdf171ad4a3c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -63,8 +63,10 @@ static const struct file_operations kfd_fops = { }; static int kfd_char_dev_major = -1; -static struct class *kfd_class; struct device *kfd_device; +static const struct class kfd_class = { + .name = kfd_dev_name, +}; static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) { @@ -94,14 +96,13 @@ int kfd_chardev_init(void) if (err < 0) goto err_register_chrdev; - kfd_class = class_create(kfd_dev_name); - err = PTR_ERR(kfd_class); - if (IS_ERR(kfd_class)) + err = class_register(&kfd_class); + if (err) goto err_class_create; - kfd_device = device_create(kfd_class, NULL, - MKDEV(kfd_char_dev_major, 0), - NULL, kfd_dev_name); + kfd_device = device_create(&kfd_class, NULL, + MKDEV(kfd_char_dev_major, 0), + NULL, kfd_dev_name); err = PTR_ERR(kfd_device); if (IS_ERR(kfd_device)) goto err_device_create; @@ -109,7 +110,7 @@ int kfd_chardev_init(void) return 0; err_device_create: - class_destroy(kfd_class); + class_unregister(&kfd_class); err_class_create: unregister_chrdev(kfd_char_dev_major, kfd_dev_name); err_register_chrdev: @@ -118,8 +119,8 @@ err_register_chrdev: void kfd_chardev_exit(void) { - device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); - class_destroy(kfd_class); + device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0)); + class_unregister(&kfd_class); unregister_chrdev(kfd_char_dev_major, kfd_dev_name); kfd_device = NULL; } @@ -370,8 +371,13 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err = -EINVAL; goto err_wptr_map_gart; } + if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + err = -EINVAL; + goto err_wptr_map_gart; + } - err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo); + err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo); if (err) { pr_err("Failed to map wptr bo to GART\n"); goto err_wptr_map_gart; @@ -778,8 +784,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; @@ -1138,7 +1144,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } offset = dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { err = -ENOMEM; goto err_unlock; } @@ -1522,7 +1528,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, /* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) - if (dev) + if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL; @@ -1544,7 +1550,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (xcp_id >= 0) args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; else - args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; + args->gpu_id = dev->id; args->flags = flags; /* Copy metadata buffer to user mode */ @@ -2306,7 +2312,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, return -EINVAL; } offset = pdd->dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n"); return -ENOMEM; } @@ -2935,6 +2941,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v if (IS_ERR_OR_NULL(target)) { pr_debug("Cannot find process PID %i to debug\n", args->pid); r = target ? PTR_ERR(target) : -ESRCH; + target = NULL; goto out; } @@ -3347,6 +3354,9 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; + if (PAGE_SIZE > 4096) + return -EINVAL; + address = dev->adev->rmmio_remap.bus_addr; vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd8e459201f1..7f2ae0d15d4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -55,6 +55,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -64,6 +65,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = { /* Scalar L1 Instruction Cache (in SQC module) per bank */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -73,6 +75,7 @@ static struct kfd_gpu_cache_info kaveri_cache_info[] = { /* Scalar L1 Data Cache (in SQC module) per bank */ .cache_size = 8, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -88,6 +91,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -95,8 +99,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { }, { /* Scalar L1 Instruction Cache (in SQC module) per bank */ - .cache_size = 8, + .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -104,8 +109,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { }, { /* Scalar L1 Data Cache (in SQC module) per bank. */ - .cache_size = 4, + .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -135,6 +141,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -144,6 +151,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -153,6 +161,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -162,6 +171,7 @@ static struct kfd_gpu_cache_info vega10_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 4096, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -174,6 +184,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -183,6 +194,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -192,6 +204,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -201,6 +214,7 @@ static struct kfd_gpu_cache_info raven_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 1024, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -213,6 +227,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -222,6 +237,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -231,6 +247,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -240,6 +257,7 @@ static struct kfd_gpu_cache_info renoir_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 1024, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -252,6 +270,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -261,6 +280,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -270,6 +290,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -279,6 +300,7 @@ static struct kfd_gpu_cache_info vega12_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 2048, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -291,6 +313,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -300,6 +323,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -309,6 +333,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -318,6 +343,7 @@ static struct kfd_gpu_cache_info vega20_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 8192, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -330,6 +356,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -339,6 +366,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -348,6 +376,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -357,6 +386,7 @@ static struct kfd_gpu_cache_info aldebaran_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 8192, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -369,6 +399,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -378,6 +409,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -387,6 +419,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -396,6 +429,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -405,6 +439,7 @@ static struct kfd_gpu_cache_info navi10_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 4096, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -417,6 +452,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -426,6 +462,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -435,6 +472,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -444,6 +482,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -453,6 +492,7 @@ static struct kfd_gpu_cache_info vangogh_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 1024, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -465,6 +505,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -474,6 +515,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -483,6 +525,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -492,6 +535,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -501,6 +545,7 @@ static struct kfd_gpu_cache_info navi14_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 2048, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -513,6 +558,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -522,6 +568,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -531,6 +578,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -540,6 +588,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -549,6 +598,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 4096, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -558,6 +608,7 @@ static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { /* L3 Data Cache per GPU */ .cache_size = 128*1024, .cache_level = 3, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -570,6 +621,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -579,6 +631,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -588,6 +641,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -597,6 +651,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -606,6 +661,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 3072, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -615,6 +671,7 @@ static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { /* L3 Data Cache per GPU */ .cache_size = 96*1024, .cache_level = 3, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -627,6 +684,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -636,6 +694,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -645,6 +704,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -654,6 +714,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -663,6 +724,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 2048, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -672,6 +734,7 @@ static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { /* L3 Data Cache per GPU */ .cache_size = 32*1024, .cache_level = 3, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -684,6 +747,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -693,6 +757,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -702,6 +767,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -711,6 +777,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -720,6 +787,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 1024, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -729,6 +797,7 @@ static struct kfd_gpu_cache_info beige_goby_cache_info[] = { /* L3 Data Cache per GPU */ .cache_size = 16*1024, .cache_level = 3, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -741,6 +810,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -750,6 +820,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -759,6 +830,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -768,6 +840,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -777,6 +850,7 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 2048, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -789,6 +863,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -798,6 +873,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -807,6 +883,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -816,6 +893,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -825,6 +903,7 @@ static struct kfd_gpu_cache_info gfx1037_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 256, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -837,6 +916,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -846,6 +926,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -855,6 +936,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -864,6 +946,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -873,6 +956,7 @@ static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 256, .cache_level = 2, + .cache_line_size = 128, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -885,6 +969,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = { /* TCP L1 Cache per CU */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -894,6 +979,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = { /* Scalar L1 Instruction Cache per SQC */ .cache_size = 32, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -903,6 +989,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = { /* Scalar L1 Data Cache per SQC */ .cache_size = 16, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -912,6 +999,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = { /* GL1 Data Cache per SA */ .cache_size = 128, .cache_level = 1, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -921,6 +1009,7 @@ static struct kfd_gpu_cache_info dummy_cache_info[] = { /* L2 Data Cache per GPU (Total Tex Cache) */ .cache_size = 2048, .cache_level = 2, + .cache_line_size = 64, .flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE), @@ -1587,6 +1676,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): num_of_cache_types = kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 74c2d7a0d628..300634b9f668 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -303,6 +303,7 @@ struct kfd_node; struct kfd_gpu_cache_info { uint32_t cache_size; uint32_t cache_level; + uint32_t cache_line_size; uint32_t flags; /* Indicates how many Compute Units share this cache * within a SA. Value = 1 indicates the cache is not shared diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 9ec750666382..d889e3545120 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -1018,12 +1018,14 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target, uint32_t *entry_size) { struct kfd_dbg_device_info_entry device_info; - uint32_t tmp_entry_size = *entry_size, tmp_num_devices; + uint32_t tmp_entry_size, tmp_num_devices; int i, r = 0; if (!(target && user_info && number_of_device_infos && entry_size)) return -EINVAL; + tmp_entry_size = *entry_size; + tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds); *number_of_device_infos = target->n_pdds; *entry_size = min_t(size_t, *entry_size, sizeof(device_info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0a9cf9dfc224..9596bca57212 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -96,6 +96,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): kfd->device_info.num_sdma_queues_per_engine = 8; break; default: @@ -113,6 +114,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): /* Reserve 1 for paging and 1 for gfx */ kfd->device_info.num_reserved_sdma_queues_per_engine = 2; /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ @@ -165,6 +167,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; break; default: @@ -420,6 +423,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 110500; f2g = &gfx_v11_kfd2kgd; break; + case IP_VERSION(11, 5, 1): + gfx_target_version = 110501; + f2g = &gfx_v11_kfd2kgd; + break; default: break; } @@ -428,12 +435,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) if (!f2g) { if (amdgpu_ip_version(adev, GC_HWIP, 0)) - dev_err(kfd_device, + dev_info(kfd_device, "GC IP %06x %s not supported in kfd\n", amdgpu_ip_version(adev, GC_HWIP, 0), vf ? "VF" : ""); else - dev_err(kfd_device, "%s %s not supported in kfd\n", + dev_info(kfd_device, "%s %s not supported in kfd\n", amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); return NULL; } @@ -459,34 +466,43 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info.supports_cwsr) { if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx8_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { - BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_arcturus_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { - BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx9_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { - BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_nv1x_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) + > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx10_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); } else { + /* The gfx11 cwsr trap handler must fit inside a single + page. */ BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx11_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); @@ -944,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { struct kfd_node *node; int i; - int count; if (!kfd->init_complete) return; @@ -952,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) /* for runtime suspend, skip locking kfd */ if (!run_pm) { mutex_lock(&kfd_processes_mutex); - count = ++kfd_locked; - mutex_unlock(&kfd_processes_mutex); - /* For first KFD device suspend all the KFD processes */ - if (count == 1) + if (++kfd_locked == 1) kfd_suspend_all_processes(); + mutex_unlock(&kfd_processes_mutex); } for (i = 0; i < kfd->num_nodes; i++) { @@ -968,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { - int ret, count, i; + int ret, i; if (!kfd->init_complete) return 0; @@ -982,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) /* for runtime resume, skip unlocking kfd */ if (!run_pm) { mutex_lock(&kfd_processes_mutex); - count = --kfd_locked; - mutex_unlock(&kfd_processes_mutex); - - WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); - if (count == 0) + if (--kfd_locked == 0) ret = kfd_resume_all_processes(); + WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error"); + mutex_unlock(&kfd_processes_mutex); } return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c0e71543389a..c08b6ee25289 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1903,6 +1903,10 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, uint64_t *fence_addr = dqm->fence_addr; while (*fence_addr != fence_value) { + /* Fatal err detected, this response won't come */ + if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) + return -EIO; + if (time_after(jiffies, end_jiffies)) { dev_err(dev, "qcm fence wait loop timeout expired\n"); /* In HWS case, this is used to halt the driver thread @@ -1993,10 +1997,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, * check those fields */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; - if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { - dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); + if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { while (halt_if_hws_hang) schedule(); + kfd_hws_hang(dqm); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 739721254a5d..9b33d9d2c9ad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1285,8 +1285,10 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid) uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID; int user_gpu_id; - if (!p) + if (!p) { + dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid); return; /* Presumably process exited. */ + } user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); if (unlikely(user_gpu_id == -EINVAL)) { @@ -1322,6 +1324,8 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid) } } + dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n", + p->lead_thread->comm, pasid); rcu_read_unlock(); /* user application will handle SIGBUS signal */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 6604a3f99c5e..4a64307bc438 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -36,6 +36,7 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/processor.h> +#include "amdgpu_vm.h" /* * The primary memory I/O features being added for revisions of gfxip @@ -326,10 +327,16 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) * with small reserved space for kernel. * Set them to CANONICAL addresses. */ - pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM); pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; + /* dGPUs: the reserved space for kernel + * before SVM + */ + pdd->qpd.cwsr_base = SVM_CWSR_BASE; + pdd->qpd.ib_base = SVM_IB_BASE; + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); } @@ -339,18 +346,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) pdd->lds_base = MAKE_LDS_APP_BASE_V9(); pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - /* Raven needs SVM to support graphic handle, etc. Leave the small - * reserved space before SVM on Raven as well, even though we don't - * have to. - * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they - * are used in Thunk to reserve SVM. - */ - pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM; pdd->gpuvm_limit = pdd->dev->kfd->shared_resources.gpuvm_size - 1; pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + + /* + * Place TBA/TMA on opposite side of VM hole to prevent + * stray faults from triggering SVM on these pages. + */ + pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(pdd->dev->adev); } int kfd_init_apertures(struct kfd_process *process) @@ -407,12 +414,6 @@ int kfd_init_apertures(struct kfd_process *process) return -EINVAL; } } - - /* dGPUs: the reserved space for kernel - * before SVM - */ - pdd->qpd.cwsr_base = SVM_CWSR_BASE; - pdd->qpd.ib_base = SVM_IB_BASE; } dev_dbg(kfd_device, "node id %u\n", id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index a7697ec8188e..8e0d0356e810 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -132,7 +132,9 @@ enum SQ_INTERRUPT_ERROR_TYPE { static void event_interrupt_poison_consumption(struct kfd_node *dev, uint16_t pasid, uint16_t client_id) { + enum amdgpu_ras_block block = 0; int old_poison, ret = -EINVAL; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -151,12 +153,17 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, case SOC15_IH_CLIENTID_SE3SH: case SOC15_IH_CLIENTID_UTCL2: ret = kfd_dqm_evict_pasid(dev->dqm, pasid); + block = AMDGPU_RAS_BLOCK__GFX; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; case SOC15_IH_CLIENTID_SDMA0: case SOC15_IH_CLIENTID_SDMA1: case SOC15_IH_CLIENTID_SDMA2: case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: + block = AMDGPU_RAS_BLOCK__SDMA; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; default: break; @@ -167,17 +174,16 @@ static void event_interrupt_poison_consumption(struct kfd_node *dev, /* resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - if (!ret) { + if (!ret) dev_warn(dev->adev->dev, "RAS poison consumption, unmap queue flow succeeded: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); - } else { + else dev_warn(dev->adev->dev, "RAS poison consumption, fall back to gpu reset flow: client id %d\n", client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); - } + + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); } static bool event_interrupt_isr_v10(struct kfd_node *dev, @@ -336,7 +342,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), @@ -364,10 +371,25 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); + int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - if (client_id == SOC15_IH_CLIENTID_UTCL2 && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + /* gfxhub */ + if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { + hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + node_id); + if (hub_inst < 0) + hub_inst = 0; + } + + /* mmhub */ + if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) + hub_inst = node_id / 4; + + if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, + hub_inst, vmid_type)) { event_interrupt_poison_consumption(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index 2a65792fd116..f524a55eee11 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -191,7 +191,9 @@ static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, uint16_t pasid, uint16_t source_id) { + enum amdgpu_ras_block block = 0; int ret = -EINVAL; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -210,9 +212,14 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, case SOC15_INTSRC_SQ_INTERRUPT_MSG: if (dev->dqm->ops.reset_queues) ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); + block = AMDGPU_RAS_BLOCK__GFX; + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; case SOC21_INTSRC_SDMA_ECC: default: + block = AMDGPU_RAS_BLOCK__GFX; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; } @@ -220,10 +227,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, /* resetting queue passes, do page retirement without gpu reset resetting queue fails, fallback to gpu reset solution */ - if (!ret) - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); - else - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); + amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); } static bool event_interrupt_isr_v11(struct kfd_node *dev, @@ -325,7 +329,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, /* CP */ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_CTXID0_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 27cdaea40501..e1c21d250611 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -143,7 +143,9 @@ enum SQ_INTERRUPT_ERROR_TYPE { static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, uint16_t pasid, uint16_t client_id) { - int old_poison, ret = -EINVAL; + enum amdgpu_ras_block block = 0; + int old_poison; + uint32_t reset = 0; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) @@ -161,34 +163,35 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SE2SH: case SOC15_IH_CLIENTID_SE3SH: case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); + block = AMDGPU_RAS_BLOCK__GFX; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + break; + case SOC15_IH_CLIENTID_VMC: + case SOC15_IH_CLIENTID_VMC1: + block = AMDGPU_RAS_BLOCK__MMHUB; + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; break; case SOC15_IH_CLIENTID_SDMA0: case SOC15_IH_CLIENTID_SDMA1: case SOC15_IH_CLIENTID_SDMA2: case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: + block = AMDGPU_RAS_BLOCK__SDMA; + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; break; default: - break; + dev_warn(dev->adev->dev, + "client %d does not support poison consumption\n", client_id); + return; } kfd_signal_poison_consumed_event(dev, pasid); - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) { - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); - } else { - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); - } + dev_warn(dev->adev->dev, + "poison is consumed by client %d, kick off gpu reset flow\n", client_id); + + amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev, + block, pasid, NULL, NULL, reset); } static bool context_id_expected(struct kfd_dev *dev) @@ -385,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, sq_int_data, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && + KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) { kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), @@ -410,10 +414,25 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); + uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); + int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - if (client_id == SOC15_IH_CLIENTID_UTCL2 && - amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + /* gfxhub */ + if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { + hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, + node_id); + if (hub_inst < 0) + hub_inst = 0; + } + + /* mmhub */ + if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) + hub_inst = node_id / 4; + + if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, + hub_inst, vmid_type)) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index dd3c43c1ad70..9b6b6e882593 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -104,6 +104,8 @@ void kfd_interrupt_exit(struct kfd_node *node) */ flush_workqueue(node->ih_wq); + destroy_workqueue(node->ih_wq); + kfifo_free(&node->ih_fifo); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 1bea629c49ca..32c926986dbb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -286,7 +286,7 @@ err_no_space: return -ENOMEM; } -void kq_submit_packet(struct kernel_queue *kq) +int kq_submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; @@ -298,6 +298,10 @@ void kq_submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif + /* Fatal err detected, packet submission won't go through */ + if (amdgpu_amdkfd_is_fed(kq->dev->adev)) + return -EIO; + if (kq->dev->kfd->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, @@ -307,6 +311,8 @@ void kq_submit_packet(struct kernel_queue *kq) write_kernel_doorbell(kq->queue->properties.doorbell_ptr, kq->pending_wptr); } + + return 0; } void kq_rollback_packet(struct kernel_queue *kq) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 9a6244430845..e24ee50acdf0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -47,7 +47,7 @@ int kq_acquire_packet_buffer(struct kernel_queue *kq, size_t packet_size_in_dwords, unsigned int **buffer_ptr); -void kq_submit_packet(struct kernel_queue *kq); +int kq_submit_packet(struct kernel_queue *kq); void kq_rollback_packet(struct kernel_queue *kq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index bdc01ca9609a..4816fcb9803a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -77,7 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); + dst_addr, num_bytes, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, } r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, - NULL, &next, false, true, false); + NULL, &next, false, true, 0); if (r) { dev_err(adev->dev, "fail %d to copy memory\n", r); goto out_unlock; @@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = start_mgr << PAGE_SHIFT; end = (last_mgr + 1) << PAGE_SHIFT; + r = amdgpu_amdkfd_reserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); + if (r) { + dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r); + return -ENOSPC; + } + r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); - return r; + goto out; } ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; @@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_range_vram_node_free(prange); } +out: + amdgpu_amdkfd_unreserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); return r < 0 ? r : 0; } @@ -1009,7 +1023,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1)) return -EINVAL; - if (adev->gmc.is_app_apu) + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) return 0; pgmap = &kfddev->pgmap; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 050a6936ff84..8746a61a852d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -290,3 +290,21 @@ uint64_t kfd_mqd_stride(struct mqd_manager *mm, { return mm->mqd_size; } + +bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id, + uint32_t inst) +{ + if (doorbell_id) { + struct device *dev = node->adev->dev; + + if (node->adev->xcp_mgr && node->adev->xcp_mgr->num_xcps > 0) + dev_err(dev, "XCC %d: Queue preemption failed for queue with doorbell_id: %x\n", + inst, doorbell_id); + else + dev_err(dev, "Queue preemption failed for queue with doorbell_id: %x\n", + doorbell_id); + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 57bf5e513f4d..17cc1f25c8d0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -119,7 +119,7 @@ struct mqd_manager { #if defined(CONFIG_DEBUG_FS) int (*debugfs_show_mqd)(struct seq_file *m, void *data); #endif - uint32_t (*read_doorbell_id)(void *mqd); + bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd); uint64_t (*mqd_stride)(struct mqd_manager *mm, struct queue_properties *p); @@ -128,6 +128,31 @@ struct mqd_manager { uint32_t mqd_size; }; +struct mqd_user_context_save_area_header { + /* Byte offset from start of user context + * save area to the last saved top (lowest + * address) of control stack data. Must be + * 4 byte aligned. + */ + uint32_t control_stack_offset; + + /* Byte size of the last saved control stack + * data. Must be 4 byte aligned. + */ + uint32_t control_stack_size; + + /* Byte offset from start of user context save + * area to the last saved base (lowest address) + * of wave state data. Must be 4 byte aligned. + */ + uint32_t wave_state_offset; + + /* Byte size of the last saved wave state data. + * Must be 4 byte aligned. + */ + uint32_t wave_state_size; +}; + struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properties *q); @@ -173,4 +198,6 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev); uint64_t kfd_mqd_stride(struct mqd_manager *mm, struct queue_properties *q); +bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id, + uint32_t inst); #endif /* KFD_MQD_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 1a4a69943c71..05f3ac2eaef9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -206,11 +206,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = QUEUE_IS_ACTIVE(*q); } -static uint32_t read_doorbell_id(void *mqd) +static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct cik_mqd *m = (struct cik_mqd *)mqd; - return m->queue_doorbell_id0; + return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); } static void update_mqd(struct mqd_manager *mm, void *mqd, @@ -423,7 +423,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif - mqd->read_doorbell_id = read_doorbell_id; + mqd->check_preemption_failed = check_preemption_failed; break; case KFD_MQD_TYPE_DIQ: mqd->allocate_mqd = allocate_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 22cbfa1bdadd..2eff37aaf827 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -224,11 +224,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = QUEUE_IS_ACTIVE(*q); } -static uint32_t read_doorbell_id(void *mqd) +static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd; - return m->queue_doorbell_id0; + return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -488,7 +488,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif - mqd->read_doorbell_id = read_doorbell_id; + mqd->check_preemption_failed = check_preemption_failed; pr_debug("%s@%i\n", __func__, __LINE__); break; case KFD_MQD_TYPE_DIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 826bc4f6c8a7..68dbc0399c87 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -278,11 +278,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = QUEUE_IS_ACTIVE(*q); } -static uint32_t read_doorbell_id(void *mqd) +static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd; - return m->queue_doorbell_id0; + return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -517,7 +517,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif - mqd->read_doorbell_id = read_doorbell_id; + mqd->check_preemption_failed = check_preemption_failed; pr_debug("%s@%i\n", __func__, __LINE__); break; case KFD_MQD_TYPE_DIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 697b6d530d12..6bddc16808d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -316,11 +316,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, } -static uint32_t read_doorbell_id(void *mqd) +static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v9_mqd *m = (struct v9_mqd *)mqd; - return m->queue_doorbell_id0; + return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -607,6 +607,24 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, return err; } +static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd) +{ + uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev); + uint32_t xcc_mask = mm->dev->xcc_mask; + int inst = 0, xcc_id; + struct v9_mqd *m; + bool ret = false; + + for_each_inst(xcc_id, xcc_mask) { + m = get_mqd(mqd + hiq_mqd_size * inst); + ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev, + m->queue_doorbell_id0, inst); + ++inst; + } + + return ret; +} + static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj, struct kfd_mem_obj *xcc_mqd_mem_obj, uint64_t offset) @@ -881,15 +899,16 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif - mqd->read_doorbell_id = read_doorbell_id; if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) { mqd->init_mqd = init_mqd_hiq_v9_4_3; mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3; + mqd->check_preemption_failed = check_preemption_failed_v9_4_3; } else { mqd->init_mqd = init_mqd_hiq; mqd->load_mqd = kfd_hiq_load_mqd_kiq; mqd->destroy_mqd = destroy_hiq_mqd; + mqd->check_preemption_failed = check_preemption_failed; } break; case KFD_MQD_TYPE_DIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 3e1a574d4ea6..c1fafc502515 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -237,11 +237,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, q->is_active = QUEUE_IS_ACTIVE(*q); } -static uint32_t read_doorbell_id(void *mqd) +static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct vi_mqd *m = (struct vi_mqd *)mqd; - return m->queue_doorbell_id0; + return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); } static void update_mqd(struct mqd_manager *mm, void *mqd, @@ -482,7 +482,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif - mqd->read_doorbell_id = read_doorbell_id; + mqd->check_preemption_failed = check_preemption_failed; break; case KFD_MQD_TYPE_DIQ: mqd->allocate_mqd = allocate_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 401096c103b2..d6f65f39072b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -288,7 +288,7 @@ int pm_send_set_resources(struct packet_manager *pm, retval = pm->pmf->set_resources(pm, buffer, res); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -325,7 +325,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_create_runlist; - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); mutex_unlock(&pm->lock); @@ -361,7 +361,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); @@ -392,7 +392,7 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) retval = pm->pmf->set_grace_period(pm, buffer, grace_period); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); } @@ -421,7 +421,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); if (!retval) - kq_submit_packet(pm->priv_queue); + retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 80320b8603fc..a81ef232fdef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -99,11 +99,11 @@ /* * Size of the per-process TBA+TMA buffer: 2 pages * - * The first page is the TBA used for the CWSR ISA code. The second - * page is used as TMA for user-mode trap handler setup in daisy-chain mode. + * The first chunk is the TBA used for the CWSR ISA code. The second + * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode. */ #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) -#define KFD_CWSR_TMA_OFFSET PAGE_SIZE +#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048) #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ (KFD_MAX_NUM_OF_PROCESSES * \ @@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd, static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) { - return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) || + return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) || (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 717a60d7a4ea..451bb058cc62 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) mutex_lock(&kfd_processes_mutex); if (kfd_is_locked()) { - mutex_unlock(&kfd_processes_mutex); pr_debug("KFD is locked! Cannot create process"); - return ERR_PTR(-EINVAL); + process = ERR_PTR(-EINVAL); + goto out; } /* A prior open of /dev/kfd could have already created the process. */ @@ -829,6 +829,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) if (process) { pr_debug("Process already found\n"); } else { + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + process = create_process(thread); if (IS_ERR(process)) goto out; @@ -1922,6 +1930,8 @@ static int signal_eviction_fence(struct kfd_process *p) rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); rcu_read_unlock(); + if (!ef) + return -EINVAL; ret = dma_fence_signal(ef); dma_fence_put(ef); @@ -1949,10 +1959,9 @@ static void evict_process_worker(struct work_struct *work) * they are responsible stopping the queues and scheduling * the restore work. */ - if (!signal_eviction_fence(p)) - queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - else + if (signal_eviction_fence(p) || + mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) kfd_process_restore_queues(p); pr_debug("Finished evicting pasid 0x%x\n", p->pasid); @@ -2011,9 +2020,9 @@ static void restore_process_worker(struct work_struct *work) if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); - ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); - WARN(!ret, "reschedule restore work failed\n"); + if (mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) + kfd_process_restore_queues(p); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index d9953c2b2661..06ac835190f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -238,16 +238,16 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(dev->adev, pasid, &task_info); - /* Report VM faults from user applications, not retry from kernel */ - if (!task_info.pid) - return; - - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", - task_info.pid, task_info.task_name); + struct amdgpu_task_info *task_info; + + task_info = amdgpu_vm_get_task_info_pasid(dev->adev, pasid); + if (task_info) { + /* Report VM faults from user applications, not retry from kernel */ + if (task_info->pid) + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", + task_info->pid, task_info->task_name); + amdgpu_vm_put_task_info(task_info); + } } void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c50a0dc9c9c0..069b81eeea03 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1515,9 +1515,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr) goto unreserve_out; } - r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, - drm_priv_to_vm(pdd->drm_priv), - svm_range_bo_validate, NULL); + r = amdgpu_vm_validate(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), NULL, + svm_range_bo_validate, NULL); if (r) { pr_debug("failed %d validate pt bos\n", r); goto unreserve_out; @@ -1641,7 +1641,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, goto free_ctx; } - svm_range_reserve_bos(ctx, intr); + r = svm_range_reserve_bos(ctx, intr); + if (r) + goto free_ctx; p = container_of(prange->svms, struct kfd_process, svms); owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap, @@ -2617,7 +2619,8 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } - if (node->adev->gmc.is_app_apu) + if (node->adev->gmc.is_app_apu || + node->adev->flags & AMD_IS_APU) return 0; if (prange->preferred_loc == gpuid || @@ -3335,7 +3338,8 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; } - if (bo_node->adev->gmc.is_app_apu) { + if (bo_node->adev->gmc.is_app_apu || + bo_node->adev->flags & AMD_IS_APU) { best_loc = 0; goto out; } @@ -3424,7 +3428,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; - return r; + return 0; } int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 026863a0abcd..9c37bd0567ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -201,7 +201,8 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s * is initialized to not 0 when page migration register device memory. */ #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\ - (adev)->gmc.is_app_apu) + (adev)->gmc.is_app_apu ||\ + ((adev)->flags & AMD_IS_APU)) void svm_range_bo_unref_async(struct svm_range_bo *svm_bo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6ed2ec381aaa..bc9eb847ecfe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1564,6 +1564,7 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext, pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); pcache->cache_level = pcache_info[cache_type].cache_level; pcache->cache_size = pcache_info[cache_type].cache_size; + pcache->cacheline_size = pcache_info[cache_type].cache_line_size; if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) pcache->cache_type |= HSA_CACHE_TYPE_DATA; @@ -1632,6 +1633,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->processor_id_low = cu_processor_id + (first_active_cu - 1); pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cacheline_size = pcache_info[cache_type].cache_line_size; if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3)) mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); @@ -1703,6 +1705,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct gpu_processor_id = dev->node_props.simd_id_base; + memset(cache_info, 0, sizeof(cache_info)); pcache_info = cache_info; num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info); if (!num_of_cache_types) { |