diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drm.c | 63 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_svm.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c | 16 |
6 files changed, 116 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index b65ae817eabf..2d4c899e1f8b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -618,6 +618,64 @@ nouveau_drm_device_fini(struct drm_device *dev) kfree(drm); } +/* + * On some Intel PCIe bridge controllers doing a + * D0 -> D3hot -> D3cold -> D0 sequence causes Nvidia GPUs to not reappear. + * Skipping the intermediate D3hot step seems to make it work again. This is + * probably caused by not meeting the expectation the involved AML code has + * when the GPU is put into D3hot state before invoking it. + * + * This leads to various manifestations of this issue: + * - AML code execution to power on the GPU hits an infinite loop (as the + * code waits on device memory to change). + * - kernel crashes, as all PCI reads return -1, which most code isn't able + * to handle well enough. + * + * In all cases dmesg will contain at least one line like this: + * 'nouveau 0000:01:00.0: Refused to change power state, currently in D3' + * followed by a lot of nouveau timeouts. + * + * In the \_SB.PCI0.PEG0.PG00._OFF code deeper down writes bit 0x80 to the not + * documented PCI config space register 0x248 of the Intel PCIe bridge + * controller (0x1901) in order to change the state of the PCIe link between + * the PCIe port and the GPU. There are alternative code paths using other + * registers, which seem to work fine (executed pre Windows 8): + * - 0xbc bit 0x20 (publicly available documentation claims 'reserved') + * - 0xb0 bit 0x10 (link disable) + * Changing the conditions inside the firmware by poking into the relevant + * addresses does resolve the issue, but it seemed to be ACPI private memory + * and not any device accessible memory at all, so there is no portable way of + * changing the conditions. + * On a XPS 9560 that means bits [0,3] on \CPEX need to be cleared. + * + * The only systems where this behavior can be seen are hybrid graphics laptops + * with a secondary Nvidia Maxwell, Pascal or Turing GPU. It's unclear whether + * this issue only occurs in combination with listed Intel PCIe bridge + * controllers and the mentioned GPUs or other devices as well. + * + * documentation on the PCIe bridge controller can be found in the + * "7th Generation IntelĀ® Processor Families for H Platforms Datasheet Volume 2" + * Section "12 PCI Express* Controller (x16) Registers" + */ + +static void quirk_broken_nv_runpm(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + struct nouveau_drm *drm = nouveau_drm(dev); + struct pci_dev *bridge = pci_upstream_bridge(pdev); + + if (!bridge || bridge->vendor != PCI_VENDOR_ID_INTEL) + return; + + switch (bridge->device) { + case 0x1901: + drm->old_pm_cap = pdev->pm_cap; + pdev->pm_cap = 0; + NV_INFO(drm, "Disabling PCI power management to avoid bug\n"); + break; + } +} + static int nouveau_drm_probe(struct pci_dev *pdev, const struct pci_device_id *pent) { @@ -699,6 +757,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, if (ret) goto fail_drm_dev_init; + quirk_broken_nv_runpm(pdev); return 0; fail_drm_dev_init: @@ -734,7 +793,11 @@ static void nouveau_drm_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + struct nouveau_drm *drm = nouveau_drm(dev); + /* revert our workaround */ + if (drm->old_pm_cap) + pdev->pm_cap = drm->old_pm_cap; nouveau_drm_device_remove(dev); pci_disable_device(pdev); } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index c2c332fbde97..2a6519737800 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -140,6 +140,8 @@ struct nouveau_drm { struct list_head clients; + u8 old_pm_cap; + struct { struct agp_bridge_data *bridge; u32 base; diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index df9bf1fd1bc0..c567526b75b8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -171,6 +171,11 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, mm = get_task_mm(current); down_read(&mm->mmap_sem); + if (!cli->svm.svmm) { + up_read(&mm->mmap_sem); + return -EINVAL; + } + for (addr = args->va_start, end = args->va_start + size; addr < end;) { struct vm_area_struct *vma; unsigned long next; @@ -179,6 +184,7 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, if (!vma) break; + addr = max(addr, vma->vm_start); next = min(vma->vm_end, end); /* This is a best effort so we ignore errors */ nouveau_dmem_migrate_vma(cli->drm, vma, addr, next); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index dd8f85b8b3a7..f2f5636efac4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1981,8 +1981,34 @@ gf100_gr_init_(struct nvkm_gr *base) { struct gf100_gr *gr = gf100_gr(base); struct nvkm_subdev *subdev = &base->engine.subdev; + struct nvkm_device *device = subdev->device; + bool reset = device->chipset == 0x137 || device->chipset == 0x138; u32 ret; + /* On certain GP107/GP108 boards, we trigger a weird issue where + * GR will stop responding to PRI accesses after we've asked the + * SEC2 RTOS to boot the GR falcons. This happens with far more + * frequency when cold-booting a board (ie. returning from D3). + * + * The root cause for this is not known and has proven difficult + * to isolate, with many avenues being dead-ends. + * + * A workaround was discovered by Karol, whereby putting GR into + * reset for an extended period right before initialisation + * prevents the problem from occuring. + * + * XXX: As RM does not require any such workaround, this is more + * of a hack than a true fix. + */ + reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset); + if (reset) { + nvkm_mask(device, 0x000200, 0x00001000, 0x00000000); + nvkm_rd32(device, 0x000200); + msleep(50); + nvkm_mask(device, 0x000200, 0x00001000, 0x00001000); + nvkm_rd32(device, 0x000200); + } + nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false); ret = nvkm_falcon_get(&gr->fecs.falcon, subdev); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c index 232a9d7c51e5..e770c9497871 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp108.c @@ -25,6 +25,9 @@ MODULE_FIRMWARE("nvidia/gp108/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/sig.bin"); static const struct nvkm_sec2_fwif gp108_sec2_fwif[] = { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c index b6ebd95c9ba1..a8295653ceab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c @@ -56,6 +56,22 @@ tu102_sec2_nofw(struct nvkm_sec2 *sec2, int ver, return 0; } +MODULE_FIRMWARE("nvidia/tu102/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu102/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu102/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu104/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu106/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu116/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/tu117/sec2/sig.bin"); + static const struct nvkm_sec2_fwif tu102_sec2_fwif[] = { { 0, gp102_sec2_load, &tu102_sec2, &gp102_sec2_acr_1 }, |