28 files changed, 1182 insertions, 74 deletions
diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
index 2e76b427ba1e..18868abe2a91 100644
--- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv
+++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv
@@ -7,6 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
 Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
 Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
-Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
+Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
 Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
-Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
-\ No newline at end of file
+Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
+Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst
index 40c55a618918..817631b1dbf3 100644
--- a/Documentation/gpu/amdgpu/display/dc-debug.rst
+++ b/Documentation/gpu/amdgpu/display/dc-debug.rst
@@ -75,3 +75,44 @@ change in real-time by using something like::
 
 When reporting a bug related to DC, consider attaching this log before and
 after you reproduce the bug.
+
+DMUB Firmware Debug
+===================
+
+Sometimes, dmesg logs aren't enough. This is especially true if a feature is
+implemented primarily in DMUB firmware. In such cases, all we see in dmesg when
+an issue arises is some generic timeout error. So, to get more relevant
+information, we can trace DMUB commands by enabling the relevant bits in
+`amdgpu_dm_dmub_trace_mask`.
+
+Currently, we support the tracing of the following groups:
+
+Trace Groups
+------------
+
+.. csv-table::
+   :header-rows: 1
+   :widths: 1, 1
+   :file: ./trace-groups-table.csv
+
+**Note: Not all ASICs support all of the listed trace groups**
+
+So, to enable just PSR tracing you can use the following command::
+
+  # echo 0x8020 > /sys/kernel/debug/dri/0/amdgpu_dm_dmub_trace_mask
+
+Then, you need to enable logging trace events to the buffer, which you can do
+using the following::
+
+  # echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+
+Lastly, after you are able to reproduce the issue you are trying to debug,
+you can disable tracing and read the trace log by using the following::
+
+  # echo 0 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en
+  # cat /sys/kernel/debug/dri/0/amdgpu_dm_dmub_tracebuffer
+
+So, when reporting bugs related to features such as PSR and ABM, consider
+enabling the relevant bits in the mask before reproducing the issue and
+attach the log that you obtain from the trace buffer in any bug reports that you
+create.
diff --git a/Documentation/gpu/amdgpu/display/trace-groups-table.csv b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
new file mode 100644
index 000000000000..3f6a50d1d883
--- /dev/null
+++ b/Documentation/gpu/amdgpu/display/trace-groups-table.csv
@@ -0,0 +1,29 @@
+Name, Mask Value
+INFO, 0x1
+IRQ SVC, 0x2
+VBIOS, 0x4
+REGISTER, 0x8
+PHY DBG, 0x10
+PSR, 0x20
+AUX, 0x40
+SMU, 0x80
+MALL, 0x100
+ABM, 0x200
+ALPM, 0x400
+TIMER, 0x800
+HW LOCK MGR, 0x1000
+INBOX1, 0x2000
+PHY SEQ, 0x4000
+PSR STATE, 0x8000
+ZSTATE, 0x10000
+TRANSMITTER CTL, 0x20000
+PANEL CNTL, 0x40000
+FAMS, 0x80000
+DPIA, 0x100000
+SUBVP, 0x200000
+INBOX0, 0x400000
+SDP, 0x4000000
+REPLAY, 0x8000000
+REPLAY RESIDENCY, 0x20000000
+CURSOR INFO, 0x80000000
+IPS, 0x100000000
diff --git a/Documentation/gpu/automated_testing.rst b/Documentation/gpu/automated_testing.rst
index 240e29d5ba68..2d5a28866afe 100644
--- a/Documentation/gpu/automated_testing.rst
+++ b/Documentation/gpu/automated_testing.rst
@@ -69,14 +69,15 @@ the result. They will still be run.
 
 Each new flake entry must be associated with a link to the email reporting the
 bug to the author of the affected driver, the board name or Device Tree name of
-the board, the first kernel version affected, and an approximation of the
-failure rate.
+the board, the first kernel version affected, the IGT version used for tests,
+and an approximation of the failure rate.
 
 They should be provided under the following format::
 
   # Bug Report: $LORE_OR_PATCHWORK_URL
   # Board Name: broken-board.dtb
-  # Version: 6.6-rc1
+  # Linux Version: 6.6-rc1
+  # IGT Version: 1.28-gd2af13d9f
   # Failure Rate: 100
   flaky-test
 
diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
index c08bcbb95fb3..e5070a0e95ab 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI
     :doc: Overview
 
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
+
+drm/xe uAPI
+===========
+
+.. kernel-doc:: include/uapi/drm/xe_drm.h
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index 45a12e552091..b899cbc5c2b4 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -3,9 +3,11 @@ GPU Driver Documentation
 ========================
 
 .. toctree::
+   :maxdepth: 3
 
    amdgpu/index
    i915
+   imagination/index
    mcde
    meson
    pl111
@@ -16,6 +18,7 @@ GPU Driver Documentation
    vkms
    bridge/dw-hdmi
    xen-front
+   xe/index
    afbc
    komeda-kms
    panfrost
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index b748b8ae70b2..59cfe8a7a8ba 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -363,6 +363,12 @@ EDID Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_edid.c
    :export:
 
+.. kernel-doc:: include/drm/drm_eld.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_eld.c
+   :export:
+
 SCDC Helper Functions Reference
 ===============================
 
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst
index 270d320407c7..13d3627d8bc0 100644
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@@ -548,6 +548,8 @@ Plane Composition Properties
 .. kernel-doc:: drivers/gpu/drm/drm_blend.c
    :doc: overview
 
+.. _damage_tracking_properties:
+
 Damage Tracking Properties
 --------------------------
 
@@ -579,6 +581,12 @@ Variable Refresh Properties
 .. kernel-doc:: drivers/gpu/drm/drm_connector.c
    :doc: Variable refresh properties
 
+Cursor Hotspot Properties
+---------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane.c
+   :doc: hotspot properties
+
 Existing KMS Properties
 -----------------------
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 602010cb6894..d55751cad67c 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -466,6 +466,8 @@ DRM MM Range Allocator Function References
 .. kernel-doc:: drivers/gpu/drm/drm_mm.c
    :export:
 
+.. _drm_gpuvm:
+
 DRM GPUVM
 =========
 
@@ -481,6 +483,8 @@ Split and Merge
 .. kernel-doc:: drivers/gpu/drm/drm_gpuvm.c
    :doc: Split and Merge
 
+.. _drm_gpuvm_locking:
+
 Locking
 -------
 
@@ -552,6 +556,12 @@ Overview
 .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
    :doc: Overview
 
+Flow Control
+------------
+
+.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
+   :doc: Flow Control
+
 Scheduler Function References
 -----------------------------
 
diff --git a/Documentation/gpu/drm-vm-bind-locking.rst b/Documentation/gpu/drm-vm-bind-locking.rst
new file mode 100644
index 000000000000..a345aa513d12
--- /dev/null
+++ b/Documentation/gpu/drm-vm-bind-locking.rst
@@ -0,0 +1,582 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+===============
+VM_BIND locking
+===============
+
+This document attempts to describe what's needed to get VM_BIND locking right,
+including the userptr mmu_notifier locking. It also discusses some
+optimizations to get rid of the looping through of all userptr mappings and
+external / shared object mappings that is needed in the simplest
+implementation. In addition, there is a section describing the VM_BIND locking
+required for implementing recoverable pagefaults.
+
+The DRM GPUVM set of helpers
+============================
+
+There is a set of helpers for drivers implementing VM_BIND, and this
+set of helpers implements much, but not all of the locking described
+in this document. In particular, it is currently lacking a userptr
+implementation. This document does not intend to describe the DRM GPUVM
+implementation in detail, but it is covered in :ref:`its own
+documentation <drm_gpuvm>`. It is highly recommended for any driver
+implementing VM_BIND to use the DRM GPUVM helpers and to extend it if
+common functionality is missing.
+
+Nomenclature
+============
+
+* ``gpu_vm``: Abstraction of a virtual GPU address space with
+  meta-data. Typically one per client (DRM file-private), or one per
+  execution context.
+* ``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with
+  associated meta-data. The backing storage of a gpu_vma can either be
+  a GEM object or anonymous or page-cache pages mapped also into the CPU
+  address space for the process.
+* ``gpu_vm_bo``: Abstracts the association of a GEM object and
+  a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
+  maintains a list of gpu_vmas.
+* ``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store
+  is anonymous or page-cache pages as described above.
+* ``revalidating``: Revalidating a gpu_vma means making the latest version
+  of the backing store resident and making sure the gpu_vma's
+  page-table entries point to that backing store.
+* ``dma_fence``: A struct dma_fence that is similar to a struct completion
+  and which tracks GPU activity. When the GPU activity is finished,
+  the dma_fence signals. Please refer to the ``DMA Fences`` section of
+  the :doc:`dma-buf doc </driver-api/dma-buf>`.
+* ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used
+  to track GPU activity in the form of multiple dma_fences on a
+  gpu_vm or a GEM object. The dma_resv contains an array / list
+  of dma_fences and a lock that needs to be held when adding
+  additional dma_fences to the dma_resv. The lock is of a type that
+  allows deadlock-safe locking of multiple dma_resvs in arbitrary
+  order. Please refer to the ``Reservation Objects`` section of the
+  :doc:`dma-buf doc </driver-api/dma-buf>`.
+* ``exec function``: An exec function is a function that revalidates all
+  affected gpu_vmas, submits a GPU command batch and registers the
+  dma_fence representing the GPU command's activity with all affected
+  dma_resvs. For completeness, although not covered by this document,
+  it's worth mentioning that an exec function may also be the
+  revalidation worker that is used by some drivers in compute /
+  long-running mode.
+* ``local object``: A GEM object which is only mapped within a
+  single VM. Local GEM objects share the gpu_vm's dma_resv.
+* ``external object``: a.k.a shared object: A GEM object which may be shared
+  by multiple gpu_vms and whose backing storage may be shared with
+  other drivers.
+
+Locks and locking order
+=======================
+
+One of the benefits of VM_BIND is that local GEM objects share the gpu_vm's
+dma_resv object and hence the dma_resv lock. So, even with a huge
+number of local GEM objects, only one lock is needed to make the exec
+sequence atomic.
+
+The following locks and locking orders are used:
+
+* The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's
+  data structure keeping track of gpu_vmas. It can also protect the
+  gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would
+  correspond to the mmap_lock. An rwsem allows several readers to walk
+  the VM tree concurrently, but the benefit of that concurrency most
+  likely varies from driver to driver.
+* The ``userptr_seqlock``. This lock is taken in read mode for each
+  userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu
+  notifier invalidation. This is not a real seqlock but described in
+  ``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side
+  'lock' a lot like a seqcount. However this allows multiple
+  write-sides to hold it at once...". The read side critical section
+  is enclosed by ``mmu_interval_read_begin() /
+  mmu_interval_read_retry()`` with ``mmu_interval_read_begin()``
+  sleeping if the write side is held.
+  The write side is held by the core mm while calling mmu interval
+  invalidation notifiers.
+* The ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing
+  rebinding, as well as the residency state of all the gpu_vm's local
+  GEM objects.
+  Furthermore, it typically protects the gpu_vm's list of evicted and
+  external GEM objects.
+* The ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is
+  taken in read mode during exec and write mode during a mmu notifier
+  invalidation. The userptr notifier lock is per gpu_vm.
+* The ``gem_object->gpuva_lock`` This lock protects the GEM object's
+  list of gpu_vm_bos. This is usually the same lock as the GEM
+  object's dma_resv, but some drivers protects this list differently,
+  see below.
+* The ``gpu_vm list spinlocks``. With some implementations they are needed
+  to be able to update the gpu_vm evicted- and external object
+  list. For those implementations, the spinlocks are grabbed when the
+  lists are manipulated. However, to avoid locking order violations
+  with the dma_resv locks, a special scheme is needed when iterating
+  over the lists.
+
+.. _gpu_vma lifetime:
+
+Protection and lifetime of gpu_vm_bos and gpu_vmas
+==================================================
+
+The GEM object's list of gpu_vm_bos, and the gpu_vm_bo's list of gpu_vmas
+is protected by the ``gem_object->gpuva_lock``, which is typically the
+same as the GEM object's dma_resv, but if the driver
+needs to access these lists from within a dma_fence signalling
+critical section, it can instead choose to protect it with a
+separate lock, which can be locked from within the dma_fence signalling
+critical section. Such drivers then need to pay additional attention
+to what locks need to be taken from within the loop when iterating
+over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations.
+
+The DRM GPUVM set of helpers provide lockdep asserts that this lock is
+held in relevant situations and also provides a means of making itself
+aware of which lock is actually used: :c:func:`drm_gem_gpuva_set_lock`.
+
+Each gpu_vm_bo holds a reference counted pointer to the underlying GEM
+object, and each gpu_vma holds a reference counted pointer to the
+gpu_vm_bo. When iterating over the GEM object's list of gpu_vm_bos and
+over the gpu_vm_bo's list of gpu_vmas, the ``gem_object->gpuva_lock`` must
+not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may
+disappear without notice since those are not reference-counted. A
+driver may implement its own scheme to allow this at the expense of
+additional complexity, but this is outside the scope of this document.
+
+In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma
+holds a reference count on the gpu_vm itself. Due to this, and to avoid circular
+reference counting, cleanup of the gpu_vm's gpu_vmas must not be done from the
+gpu_vm's destructor. Drivers typically implements a gpu_vm close
+function for this cleanup. The gpu_vm close function will abort gpu
+execution using this VM, unmap all gpu_vmas and release page-table memory.
+
+Revalidation and eviction of local objects
+==========================================
+
+Note that in all the code examples given below we use simplified
+pseudo-code. In particular, the dma_resv deadlock avoidance algorithm
+as well as reserving memory for dma_resv fences is left out.
+
+Revalidation
+____________
+With VM_BIND, all local objects need to be resident when the gpu is
+executing using the gpu_vm, and the objects need to have valid
+gpu_vmas set up pointing to them. Typically, each gpu command buffer
+submission is therefore preceded with a re-validation section:
+
+.. code-block:: C
+
+   dma_resv_lock(gpu_vm->resv);
+
+   // Validation section starts here.
+   for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
+           validate_gem_bo(&gpu_vm_bo->gem_bo);
+
+           // The following list iteration needs the Gem object's
+           // dma_resv to be held (it protects the gpu_vm_bo's list of
+           // gpu_vmas, but since local gem objects share the gpu_vm's
+           // dma_resv, it is already held at this point.
+           for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
+                  move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
+   }
+
+   for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
+           rebind_gpu_vma(&gpu_vma);
+           remove_gpu_vma_from_rebind_list(&gpu_vma);
+   }
+   // Validation section ends here, and job submission starts.
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+   dma_resv_unlock(gpu_vm->resv);
+
+The reason for having a separate gpu_vm rebind list is that there
+might be userptr gpu_vmas that are not mapping a buffer object that
+also need rebinding.
+
+Eviction
+________
+
+Eviction of one of these local objects will then look similar to the
+following:
+
+.. code-block:: C
+
+   obj = get_object_from_lru();
+
+   dma_resv_lock(obj->resv);
+   for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo);
+           add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+
+   add_dependencies(&eviction_job, &obj->resv);
+   job_dma_fence = gpu_submit(&eviction_job);
+   add_dma_fence(&obj->resv, job_dma_fence);
+
+   dma_resv_unlock(&obj->resv);
+   put_object(obj);
+
+Note that since the object is local to the gpu_vm, it will share the gpu_vm's
+dma_resv lock such that ``obj->resv == gpu_vm->resv``.
+The gpu_vm_bos marked for eviction are put on the gpu_vm's evict list,
+which is protected by ``gpu_vm->resv``. During eviction all local
+objects have their dma_resv locked and, due to the above equality, also
+the gpu_vm's dma_resv protecting the gpu_vm's evict list is locked.
+
+With VM_BIND, gpu_vmas don't need to be unbound before eviction,
+since the driver must ensure that the eviction blit or copy will wait
+for GPU idle or depend on all previous GPU activity. Furthermore, any
+subsequent attempt by the GPU to access freed memory through the
+gpu_vma will be preceded by a new exec function, with a revalidation
+section which will make sure all gpu_vmas are rebound. The eviction
+code holding the object's dma_resv while revalidating will ensure a
+new exec function may not race with the eviction.
+
+A driver can be implemented in such a way that, on each exec function,
+only a subset of vmas are selected for rebind.  In this case, all vmas that are
+*not* selected for rebind must be unbound before the exec
+function workload is submitted.
+
+Locking with external buffer objects
+====================================
+
+Since external buffer objects may be shared by multiple gpu_vm's they
+can't share their reservation object with a single gpu_vm. Instead
+they need to have a reservation object of their own. The external
+objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a
+per-gpu_vm list which is protected by the gpu_vm's dma_resv lock or
+one of the :ref:`gpu_vm list spinlocks <Spinlock iteration>`. Once
+the gpu_vm's reservation object is locked, it is safe to traverse the
+external object list and lock the dma_resvs of all external
+objects. However, if instead a list spinlock is used, a more elaborate
+iteration scheme needs to be used.
+
+At eviction time, the gpu_vm_bos of *all* the gpu_vms an external
+object is bound to need to be put on their gpu_vm's evict list.
+However, when evicting an external object, the dma_resvs of the
+gpu_vms the object is bound to are typically not held. Only
+the object's private dma_resv can be guaranteed to be held. If there
+is a ww_acquire context at hand at eviction time we could grab those
+dma_resvs but that could cause expensive ww_mutex rollbacks. A simple
+option is to just mark the gpu_vm_bos of the evicted gem object with
+an ``evicted`` bool that is inspected before the next time the
+corresponding gpu_vm evicted list needs to be traversed. For example, when
+traversing the list of external objects and locking them. At that time,
+both the gpu_vm's dma_resv and the object's dma_resv is held, and the
+gpu_vm_bo marked evicted, can then be added to the gpu_vm's list of
+evicted gpu_vm_bos. The ``evicted`` bool is formally protected by the
+object's dma_resv.
+
+The exec function becomes
+
+.. code-block:: C
+
+   dma_resv_lock(gpu_vm->resv);
+
+   // External object list is protected by the gpu_vm->resv lock.
+   for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) {
+           dma_resv_lock(gpu_vm_bo.gem_obj->resv);
+           if (gpu_vm_bo_marked_evicted(&gpu_vm_bo))
+                   add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+   }
+
+   for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
+           validate_gem_bo(&gpu_vm_bo->gem_bo);
+
+           for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
+                  move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
+   }
+
+   for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
+           rebind_gpu_vma(&gpu_vma);
+           remove_gpu_vma_from_rebind_list(&gpu_vma);
+   }
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+   for_each_external_obj(gpu_vm, &obj)
+          add_dma_fence(job_dma_fence, &obj->resv);
+   dma_resv_unlock_all_resv_locks();
+
+And the corresponding shared-object aware eviction would look like:
+
+.. code-block:: C
+
+   obj = get_object_from_lru();
+
+   dma_resv_lock(obj->resv);
+   for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo)
+           if (object_is_vm_local(obj))
+                add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
+           else
+                mark_gpu_vm_bo_evicted(&gpu_vm_bo);
+
+   add_dependencies(&eviction_job, &obj->resv);
+   job_dma_fence = gpu_submit(&eviction_job);
+   add_dma_fence(&obj->resv, job_dma_fence);
+
+   dma_resv_unlock(&obj->resv);
+   put_object(obj);
+
+.. _Spinlock iteration:
+
+Accessing the gpu_vm's lists without the dma_resv lock held
+===========================================================
+
+Some drivers will hold the gpu_vm's dma_resv lock when accessing the
+gpu_vm's evict list and external objects lists. However, there are
+drivers that need to access these lists without the dma_resv lock
+held, for example due to asynchronous state updates from within the
+dma_fence signalling critical path. In such cases, a spinlock can be
+used to protect manipulation of the lists. However, since higher level
+sleeping locks need to be taken for each list item while iterating
+over the lists, the items already iterated over need to be
+temporarily moved to a private list and the spinlock released
+while processing each item:
+
+.. code block:: C
+
+    struct list_head still_in_list;
+
+    INIT_LIST_HEAD(&still_in_list);
+
+    spin_lock(&gpu_vm->list_lock);
+    do {
+            struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head);
+
+            if (!entry)
+                    break;
+
+            list_move_tail(&entry->head, &still_in_list);
+            list_entry_get_unless_zero(entry);
+            spin_unlock(&gpu_vm->list_lock);
+
+            process(entry);
+
+            spin_lock(&gpu_vm->list_lock);
+            list_entry_put(entry);
+    } while (true);
+
+    list_splice_tail(&still_in_list, &gpu_vm->list);
+    spin_unlock(&gpu_vm->list_lock);
+
+Due to the additional locking and atomic operations, drivers that *can*
+avoid accessing the gpu_vm's list outside of the dma_resv lock
+might want to avoid also this iteration scheme. Particularly, if the
+driver anticipates a large number of list items. For lists where the
+anticipated number of list items is small, where list iteration doesn't
+happen very often or if there is a significant additional cost
+associated with each iteration, the atomic operation overhead
+associated with this type of iteration is, most likely, negligible. Note that
+if this scheme is used, it is necessary to make sure this list
+iteration is protected by an outer level lock or semaphore, since list
+items are temporarily pulled off the list while iterating, and it is
+also worth mentioning that the local list ``still_in_list`` should
+also be considered protected by the ``gpu_vm->list_lock``, and it is
+thus possible that items can be removed also from the local list
+concurrently with list iteration.
+
+Please refer to the :ref:`DRM GPUVM locking section
+<drm_gpuvm_locking>` and its internal
+:c:func:`get_next_vm_bo_from_list` function.
+
+
+userptr gpu_vmas
+================
+
+A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a
+GPU virtual address range, directly maps a CPU mm range of anonymous-
+or file page-cache pages.
+A very simple approach would be to just pin the pages using
+pin_user_pages() at bind time and unpin them at unbind time, but this
+creates a Denial-Of-Service vector since a single user-space process
+would be able to pin down all of system memory, which is not
+desirable. (For special use-cases and assuming proper accounting pinning might
+still be a desirable feature, though). What we need to do in the
+general case is to obtain a reference to the desired pages, make sure
+we are notified using a MMU notifier just before the CPU mm unmaps the
+pages, dirty them if they are not mapped read-only to the GPU, and
+then drop the reference.
+When we are notified by the MMU notifier that CPU mm is about to drop the
+pages, we need to stop GPU access to the pages by waiting for VM idle
+in the MMU notifier and make sure that before the next time the GPU
+tries to access whatever is now present in the CPU mm range, we unmap
+the old pages from the GPU page tables and repeat the process of
+obtaining new page references. (See the :ref:`notifier example
+<Invalidation example>` below). Note that when the core mm decides to
+laundry pages, we get such an unmap MMU notification and can mark the
+pages dirty again before the next GPU access. We also get similar MMU
+notifications for NUMA accounting which the GPU driver doesn't really
+need to care about, but so far it has proven difficult to exclude
+certain notifications.
+
+Using a MMU notifier for device DMA (and other methods) is described in
+:ref:`the pin_user_pages() documentation <mmu-notifier-registration-case>`.
+
+Now, the method of obtaining struct page references using
+get_user_pages() unfortunately can't be used under a dma_resv lock
+since that would violate the locking order of the dma_resv lock vs the
+mmap_lock that is grabbed when resolving a CPU pagefault. This means
+the gpu_vm's list of userptr gpu_vmas needs to be protected by an
+outer lock, which in our example below is the ``gpu_vm->lock``.
+
+The MMU interval seqlock for a userptr gpu_vma is used in the following
+way:
+
+.. code-block:: C
+
+   // Exclusive locking mode here is strictly needed only if there are
+   // invalidated userptr gpu_vmas present, to avoid concurrent userptr
+   // revalidations of the same userptr gpu_vma.
+   down_write(&gpu_vm->lock);
+   retry:
+
+   // Note: mmu_interval_read_begin() blocks until there is no
+   // invalidation notifier running anymore.
+   seq = mmu_interval_read_begin(&gpu_vma->userptr_interval);
+   if (seq != gpu_vma->saved_seq) {
+           obtain_new_page_pointers(&gpu_vma);
+           dma_resv_lock(&gpu_vm->resv);
+           add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm);
+           dma_resv_unlock(&gpu_vm->resv);
+           gpu_vma->saved_seq = seq;
+   }
+
+   // The usual revalidation goes here.
+
+   // Final userptr sequence validation may not happen before the
+   // submission dma_fence is added to the gpu_vm's resv, from the POW
+   // of the MMU invalidation notifier. Hence the
+   // userptr_notifier_lock that will make them appear atomic.
+
+   add_dependencies(&gpu_job, &gpu_vm->resv);
+   down_read(&gpu_vm->userptr_notifier_lock);
+   if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) {
+          up_read(&gpu_vm->userptr_notifier_lock);
+          goto retry;
+   }
+
+   job_dma_fence = gpu_submit(&gpu_job));
+
+   add_dma_fence(job_dma_fence, &gpu_vm->resv);
+
+   for_each_external_obj(gpu_vm, &obj)
+          add_dma_fence(job_dma_fence, &obj->resv);
+
+   dma_resv_unlock_all_resv_locks();
+   up_read(&gpu_vm->userptr_notifier_lock);
+   up_write(&gpu_vm->lock);
+
+The code between ``mmu_interval_read_begin()`` and the
+``mmu_interval_read_retry()`` marks the read side critical section of
+what we call the ``userptr_seqlock``. In reality, the gpu_vm's userptr
+gpu_vma list is looped through, and the check is done for *all* of its
+userptr gpu_vmas, although we only show a single one here.
+
+The userptr gpu_vma MMU invalidation notifier might be called from
+reclaim context and, again, to avoid locking order violations, we can't
+take any dma_resv lock nor the gpu_vm->lock from within it.
+
+.. _Invalidation example:
+.. code-block:: C
+
+  bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq)
+  {
+          // Make sure the exec function either sees the new sequence
+          // and backs off or we wait for the dma-fence:
+
+          down_write(&gpu_vm->userptr_notifier_lock);
+          mmu_interval_set_seq(userptr_interval, cur_seq);
+          up_write(&gpu_vm->userptr_notifier_lock);
+
+          // At this point, the exec function can't succeed in
+          // submitting a new job, because cur_seq is an invalid
+          // sequence number and will always cause a retry. When all
+          // invalidation callbacks, the mmu notifier core will flip
+          // the sequence number to a valid one. However we need to
+          // stop gpu access to the old pages here.
+
+          dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP,
+                                false, MAX_SCHEDULE_TIMEOUT);
+          return true;
+  }
+
+When this invalidation notifier returns, the GPU can no longer be
+accessing the old pages of the userptr gpu_vma and needs to redo the
+page-binding before a new GPU submission can succeed.
+
+Efficient userptr gpu_vma exec_function iteration
+_________________________________________________
+
+If the gpu_vm's list of userptr gpu_vmas becomes large, it's
+inefficient to iterate through the complete lists of userptrs on each
+exec function to check whether each userptr gpu_vma's saved
+sequence number is stale. A solution to this is to put all
+*invalidated* userptr gpu_vmas on a separate gpu_vm list and
+only check the gpu_vmas present on this list on each exec
+function. This list will then lend itself very-well to the spinlock
+locking scheme that is
+:ref:`described in the spinlock iteration section <Spinlock iteration>`, since
+in the mmu notifier, where we add the invalidated gpu_vmas to the
+list, it's not possible to take any outer locks like the
+``gpu_vm->lock`` or the ``gpu_vm->resv`` lock. Note that the
+``gpu_vm->lock`` still needs to be taken while iterating to ensure the list is
+complete, as also mentioned in that section.
+
+If using an invalidated userptr list like this, the retry check in the
+exec function trivially becomes a check for invalidated list empty.
+
+Locking at bind and unbind time
+===============================
+
+At bind time, assuming a GEM object backed gpu_vma, each
+gpu_vma needs to be associated with a gpu_vm_bo and that
+gpu_vm_bo in turn needs to be added to the GEM object's
+gpu_vm_bo list, and possibly to the gpu_vm's external object
+list. This is referred to as *linking* the gpu_vma, and typically
+requires that the ``gpu_vm->lock`` and the ``gem_object->gpuva_lock``
+are held. When unlinking a gpu_vma the same locks should be held,
+and that ensures that when iterating over ``gpu_vmas`, either under
+the ``gpu_vm->resv`` or the GEM object's dma_resv, that the gpu_vmas
+stay alive as long as the lock under which we iterate is not released. For
+userptr gpu_vmas it's similarly required that during vma destroy, the
+outer ``gpu_vm->lock`` is held, since otherwise when iterating over
+the invalidated userptr list as described in the previous section,
+there is nothing keeping those userptr gpu_vmas alive.
+
+Locking for recoverable page-fault page-table updates
+=====================================================
+
+There are two important things we need to ensure with locking for
+recoverable page-faults:
+
+* At the time we return pages back to the system / allocator for
+  reuse, there should be no remaining GPU mappings and any GPU TLB
+  must have been flushed.
+* The unmapping and mapping of a gpu_vma must not race.
+
+Since the unmapping (or zapping) of GPU ptes is typically taking place
+where it is hard or even impossible to take any outer level locks we
+must either introduce a new lock that is held at both mapping and
+unmapping time, or look at the locks we do hold at unmapping time and
+make sure that they are held also at mapping time. For userptr
+gpu_vmas, the ``userptr_seqlock`` is held in write mode in the mmu
+invalidation notifier where zapping happens. Hence, if the
+``userptr_seqlock`` as well as the ``gpu_vm->userptr_notifier_lock``
+is held in read mode during mapping, it will not race with the
+zapping. For GEM object backed gpu_vmas, zapping will take place under
+the GEM object's dma_resv and ensuring that the dma_resv is held also
+when populating the page-tables for any gpu_vma pointing to the GEM
+object, will similarly ensure we are race-free.
+
+If any part of the mapping is performed asynchronously
+under a dma-fence with these locks released, the zapping will need to
+wait for that dma-fence to signal under the relevant lock before
+starting to modify the page-table.
+
+Since modifying the
+page-table structure in a way that frees up page-table memory
+might also require outer level locks, the zapping of GPU ptes
+typically focuses only on zeroing page-table or page-directory entries
+and flushing TLB, whereas freeing of page-table memory is deferred to
+unbind or rebind time.
diff --git a/Documentation/gpu/imagination/index.rst b/Documentation/gpu/imagination/index.rst
new file mode 100644
index 000000000000..0c1e247cea41
--- /dev/null
+++ b/Documentation/gpu/imagination/index.rst
@@ -0,0 +1,13 @@
+=======================================
+drm/imagination PowerVR Graphics Driver
+=======================================
+
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_drv.c
+   :doc: PowerVR (Series 6 and later) and IMG Graphics Driver
+
+Contents
+========
+.. toctree::
+   :maxdepth: 2
+
+   uapi
diff --git a/Documentation/gpu/imagination/uapi.rst b/Documentation/gpu/imagination/uapi.rst
new file mode 100644
index 000000000000..7502413d0a93
--- /dev/null
+++ b/Documentation/gpu/imagination/uapi.rst
@@ -0,0 +1,171 @@
+====
+UAPI
+====
+The sources associated with this section can be found in ``pvr_drm.h``.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR UAPI
+
+OBJECT ARRAYS
+=============
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_obj_array
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: DRM_PVR_OBJ_ARRAY
+
+IOCTLS
+======
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: PVR_IOCTL
+
+DEV_QUERY
+---------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL DEV_QUERY interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_dev_query
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_dev_query_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_dev_query_gpu_info
+                 drm_pvr_dev_query_runtime_info
+                 drm_pvr_dev_query_hwrt_info
+                 drm_pvr_dev_query_quirks
+                 drm_pvr_dev_query_enhancements
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_heap_id
+                 drm_pvr_heap
+                 drm_pvr_dev_query_heap_info
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_static_data_area_usage
+                 drm_pvr_static_data_area
+                 drm_pvr_dev_query_static_data_areas
+
+CREATE_BO
+---------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_BO interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_bo_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for CREATE_BO
+
+GET_BO_MMAP_OFFSET
+------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL GET_BO_MMAP_OFFSET interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_get_bo_mmap_offset_args
+
+CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT
+----------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_vm_context_args
+                 drm_pvr_ioctl_destroy_vm_context_args
+
+VM_MAP and VM_UNMAP
+-------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_vm_map_args
+                 drm_pvr_ioctl_vm_unmap_args
+
+CREATE_CONTEXT and DESTROY_CONTEXT
+----------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_context_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ctx_priority
+                 drm_pvr_ctx_type
+                 drm_pvr_static_render_context_state
+                 drm_pvr_static_render_context_state_format
+                 drm_pvr_reset_framework
+                 drm_pvr_reset_framework_format
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_context_args
+
+CREATE_FREE_LIST and DESTROY_FREE_LIST
+--------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_free_list_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_free_list_args
+
+CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET
+--------------------------------------------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_create_hwrt_dataset_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_create_hwrt_geom_data_args
+                 drm_pvr_create_hwrt_rt_data_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_destroy_hwrt_dataset_args
+
+SUBMIT_JOBS
+-----------
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: PowerVR IOCTL SUBMIT_JOBS interface
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for the drm_pvr_sync_op object.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_ioctl_submit_jobs_args
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl geometry command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl fragment command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl compute command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :doc: Flags for SUBMIT_JOB ioctl transfer command.
+
+.. kernel-doc:: include/uapi/drm/pvr_drm.h
+   :identifiers: drm_pvr_sync_op
+                 drm_pvr_job_type
+                 drm_pvr_hwrt_data_ref
+                 drm_pvr_job
+
+Internal notes
+==============
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
+   :doc: IOCTL validation helpers
+
+.. kernel-doc:: drivers/gpu/drm/imagination/pvr_device.h
+   :identifiers: PVR_STATIC_ASSERT_64BIT_ALIGNED PVR_IOCTL_UNION_PADDING_CHECK
+                 pvr_ioctl_union_padding_check
diff --git a/Documentation/gpu/implementation_guidelines.rst b/Documentation/gpu/implementation_guidelines.rst
index 138e637dcc6b..dbccfa72f1c9 100644
--- a/Documentation/gpu/implementation_guidelines.rst
+++ b/Documentation/gpu/implementation_guidelines.rst
@@ -7,3 +7,4 @@ Misc DRM driver uAPI- and feature implementation guidelines
 .. toctree::
 
    drm-vm-bind-async
+   drm-vm-bind-locking
diff --git a/Documentation/gpu/rfc/xe.rst b/Documentation/gpu/rfc/xe.rst
index c29113a0ac30..97cf87578f97 100644
--- a/Documentation/gpu/rfc/xe.rst
+++ b/Documentation/gpu/rfc/xe.rst
@@ -70,35 +70,42 @@ When the time comes for Xe, the protection will be lifted on Xe and kept in i915
 Xe – Pre-Merge Goals - Work-in-Progress
 =======================================
 
-Drm_scheduler
--------------
-Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
-drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to
-resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
-not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
-drm_sched_entity.
+Display integration with i915
+-----------------------------
+In order to share the display code with the i915 driver so that there is maximum
+reuse, the i915/display/ code is built twice, once for i915.ko and then for
+xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs'
+depending on the build target. The goal is to refactor both Xe and i915/display
+code simultaneously in order to get a clean result before they land upstream, so
+that display can already be part of the initial pull request towards drm-next.
 
-Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
-some consensus needs to be reached between Xe and other community drivers that
-could also benefit from this work, for coupling FW based/assisted submission such
-as the ARM’s new Mali GPU driver, and others.
+However, display code should not gate the acceptance of Xe in upstream. Xe
+patches will be refactored in a way that display code can be removed, if needed,
+from the first pull request of Xe towards drm-next. The expectation is that when
+both drivers are part of the drm-tip, the introduction of cleaner patches will be
+easier and speed up.
 
-As a key measurable result, the patch series introducing Xe itself shall not
-depend on any other patch touching drm_scheduler itself that was not yet merged
-through drm-misc. This, by itself, already includes the reach of an agreement for
-uniform 1 to 1 relationship implementation / usage across drivers.
+Xe – uAPI high level overview
+=============================
 
-ASYNC VM_BIND
--------------
-Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
-Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
-It needs to be clear how to handle async VM_BIND and interactions with userspace
-memory fences. Ideally with helper support so people don't get it wrong in all
-possible ways.
+...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
 
-As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
-various flavors, error handling and sample API suggestions are documented in
-:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
+Xe – Pre-Merge Goals - Completed
+================================
+
+Drm_exec
+--------
+Helper to make dma_resv locking for a big number of buffers is getting removed in
+the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/
+If that happens, Xe needs to change and incorporate the changes in the driver.
+The goal is to engage with the Community to understand if the best approach is to
+move that to the drivers that are using it or if we should keep the helpers in
+place waiting for Xe to get merged.
+
+This item ties into the GPUVA, VM_BIND, and even long-running compute support.
+
+As a key measurable result, we need to have a community consensus documented in
+this document and the Xe driver prepared for the changes, if necessary.
 
 Userptr integration and vm_bind
 -------------------------------
@@ -123,10 +130,45 @@ Documentation should include:
 
  * O(1) complexity under VM_BIND.
 
+The document is now included in the drm documentation :doc:`here </gpu/drm-vm-bind-async>`.
+
 Some parts of userptr like mmu_notifiers should become GPUVA or DRM helpers when
 the second driver supporting VM_BIND+userptr appears. Details to be defined when
 the time comes.
 
+The DRM GPUVM helpers do not yet include the userptr parts, but discussions
+about implementing them are ongoing.
+
+ASYNC VM_BIND
+-------------
+Although having a common DRM level IOCTL for VM_BIND is not a requirement to get
+Xe merged, it is mandatory to have a consensus with other drivers and Mesa.
+It needs to be clear how to handle async VM_BIND and interactions with userspace
+memory fences. Ideally with helper support so people don't get it wrong in all
+possible ways.
+
+As a key measurable result, the benefits of ASYNC VM_BIND and a discussion of
+various flavors, error handling and sample API suggestions are documented in
+:doc:`The ASYNC VM_BIND document </gpu/drm-vm-bind-async>`.
+
+Drm_scheduler
+-------------
+Xe primarily uses Firmware based scheduling (GuC FW). However, it will use
+drm_scheduler as the scheduler ‘frontend’ for userspace submission in order to
+resolve syncobj and dma-buf implicit sync dependencies. However, drm_scheduler is
+not yet prepared to handle the 1-to-1 relationship between drm_gpu_scheduler and
+drm_sched_entity.
+
+Deeper changes to drm_scheduler should *not* be required to get Xe accepted, but
+some consensus needs to be reached between Xe and other community drivers that
+could also benefit from this work, for coupling FW based/assisted submission such
+as the ARM’s new Mali GPU driver, and others.
+
+As a key measurable result, the patch series introducing Xe itself shall not
+depend on any other patch touching drm_scheduler itself that was not yet merged
+through drm-misc. This, by itself, already includes the reach of an agreement for
+uniform 1 to 1 relationship implementation / usage across drivers.
+
 Long running compute: minimal data structure/scaffolding
 --------------------------------------------------------
 The generic scheduler code needs to include the handling of endless compute
@@ -139,46 +181,6 @@ this minimal drm/scheduler work, if needed, merged to drm-misc in a way that any
 drm driver, including Xe, could re-use and add their own individual needs on top
 in a next stage. However, this should not block the initial merge.
 
-This is a non-blocker item since the driver without the support for the long
-running compute enabled is not a showstopper.
-
-Display integration with i915
------------------------------
-In order to share the display code with the i915 driver so that there is maximum
-reuse, the i915/display/ code is built twice, once for i915.ko and then for
-xe.ko. Currently, the i915/display code in Xe tree is polluted with many 'ifdefs'
-depending on the build target. The goal is to refactor both Xe and i915/display
-code simultaneously in order to get a clean result before they land upstream, so
-that display can already be part of the initial pull request towards drm-next.
-
-However, display code should not gate the acceptance of Xe in upstream. Xe
-patches will be refactored in a way that display code can be removed, if needed,
-from the first pull request of Xe towards drm-next. The expectation is that when
-both drivers are part of the drm-tip, the introduction of cleaner patches will be
-easier and speed up.
-
-Drm_exec
---------
-Helper to make dma_resv locking for a big number of buffers is getting removed in
-the drm_exec series proposed in https://patchwork.freedesktop.org/patch/524376/
-If that happens, Xe needs to change and incorporate the changes in the driver.
-The goal is to engage with the Community to understand if the best approach is to
-move that to the drivers that are using it or if we should keep the helpers in
-place waiting for Xe to get merged.
-
-This item ties into the GPUVA, VM_BIND, and even long-running compute support.
-
-As a key measurable result, we need to have a community consensus documented in
-this document and the Xe driver prepared for the changes, if necessary.
-
-Xe – uAPI high level overview
-=============================
-
-...Warning: To be done in follow up patches after/when/where the main consensus in various items are individually reached.
-
-Xe – Pre-Merge Goals - Completed
-================================
-
 Dev_coredump
 ------------
 
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 03fe5d1247be..41a264bf84ce 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -337,8 +337,8 @@ connector register/unregister fixes
 
 Level: Intermediate
 
-Remove load/unload callbacks from all non-DRIVER_LEGACY drivers
----------------------------------------------------------------
+Remove load/unload callbacks
+----------------------------
 
 The load/unload callbacks in struct &drm_driver are very much midlayers, plus
 for historical reasons they get the ordering wrong (and we can't fix that)
@@ -347,8 +347,7 @@ between setting up the &drm_driver structure and calling drm_dev_register().
 - Rework drivers to no longer use the load/unload callbacks, directly coding the
   load/unload sequence into the driver's probe function.
 
-- Once all non-DRIVER_LEGACY drivers are converted, disallow the load/unload
-  callbacks for all modern drivers.
+- Once all drivers are converted, remove the load/unload callbacks.
 
 Contact: Daniel Vetter
 
@@ -621,6 +620,23 @@ Contact: Javier Martinez Canillas <javierm@redhat.com>
 
 Level: Intermediate
 
+Clean up and document former selftests suites
+---------------------------------------------
+
+Some KUnit test suites (drm_buddy, drm_cmdline_parser, drm_damage_helper,
+drm_format, drm_framebuffer, drm_dp_mst_helper, drm_mm, drm_plane_helper and
+drm_rect) are former selftests suites that have been converted over when KUnit
+was first introduced.
+
+These suites were fairly undocumented, and with different goals than what unit
+tests can be. Trying to identify what each test in these suites actually test
+for, whether that makes sense for a unit test, and either remove it if it
+doesn't or document it if it does would be of great help.
+
+Contact: Maxime Ripard <mripard@kernel.org>
+
+Level: Intermediate
+
 Enable trinity for DRM
 ----------------------
 
@@ -765,6 +781,29 @@ Contact: Hans de Goede
 
 Level: Advanced
 
+Buffer age or other damage accumulation algorithm for buffer damage
+===================================================================
+
+Drivers that do per-buffer uploads, need a buffer damage handling (rather than
+frame damage like drivers that do per-plane or per-CRTC uploads), but there is
+no support to get the buffer age or any other damage accumulation algorithm.
+
+For this reason, the damage helpers just fallback to a full plane update if the
+framebuffer attached to a plane has changed since the last page-flip. Drivers
+set &drm_plane_state.ignore_damage_clips to true as indication to
+drm_atomic_helper_damage_iter_init() and drm_atomic_helper_damage_iter_next()
+helpers that the damage clips should be ignored.
+
+This should be improved to get damage tracking properly working on drivers that
+do per-buffer uploads.
+
+More information about damage tracking and references to learning materials can
+be found in :ref:`damage_tracking_properties`.
+
+Contact: Javier Martinez Canillas <javierm@redhat.com>
+
+Level: Advanced
+
 Outside DRM
 ===========
 
diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst
new file mode 100644
index 000000000000..c224ecaee81e
--- /dev/null
+++ b/Documentation/gpu/xe/index.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=======================
+drm/xe Intel GFX Driver
+=======================
+
+The drm/xe driver supports some future GFX cards with rendering, display,
+compute and media. Support for currently available platforms like TGL, ADL,
+DG2, etc is provided to prototype the driver.
+
+.. toctree::
+   :titlesonly:
+
+   xe_mm
+   xe_map
+   xe_migrate
+   xe_cs
+   xe_pm
+   xe_pcode
+   xe_gt_mcr
+   xe_wa
+   xe_rtp
+   xe_firmware
+   xe_tile
+   xe_debugging
diff --git a/Documentation/gpu/xe/xe_cs.rst b/Documentation/gpu/xe/xe_cs.rst
new file mode 100644
index 000000000000..e379aed4f5a8
--- /dev/null
+++ b/Documentation/gpu/xe/xe_cs.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Command submission
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_exec.c
+   :doc: Execbuf (User GPU command submission)
diff --git a/Documentation/gpu/xe/xe_debugging.rst b/Documentation/gpu/xe/xe_debugging.rst
new file mode 100644
index 000000000000..d65e56ff3500
--- /dev/null
+++ b/Documentation/gpu/xe/xe_debugging.rst
@@ -0,0 +1,7 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Debugging
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_assert.h
diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst
new file mode 100644
index 000000000000..afcb561cd37d
--- /dev/null
+++ b/Documentation/gpu/xe/xe_firmware.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========
+Firmware
+========
+
+Firmware Layout
+===============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: CSS-based Firmware Layout
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_uc_fw_abi.h
+   :doc: GSC-based Firmware Layout
+
+Write Once Protected Content Memory (WOPCM) Layout
+==================================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wopcm.c
+   :doc: Write Once Protected Content Memory (WOPCM) Layout
+
+GuC CTB Blob
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_ct.c
+   :doc: GuC CTB Blob
+
+GuC Power Conservation (PC)
+===========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c
+   :doc: GuC Power Conservation (PC)
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_gt_mcr.rst b/Documentation/gpu/xe/xe_gt_mcr.rst
new file mode 100644
index 000000000000..848c07bc36d0
--- /dev/null
+++ b/Documentation/gpu/xe/xe_gt_mcr.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==============================================
+GT Multicast/Replicated (MCR) Register Support
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+Internal API
+============
+
+TODO
diff --git a/Documentation/gpu/xe/xe_map.rst b/Documentation/gpu/xe/xe_map.rst
new file mode 100644
index 000000000000..a098cfd2df04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_map.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========
+Map Layer
+=========
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_map.h
+   :doc: Map layer
diff --git a/Documentation/gpu/xe/xe_migrate.rst b/Documentation/gpu/xe/xe_migrate.rst
new file mode 100644
index 000000000000..f92faec0ac94
--- /dev/null
+++ b/Documentation/gpu/xe/xe_migrate.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=============
+Migrate Layer
+=============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_migrate_doc.h
+   :doc: Migrate Layer
diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst
new file mode 100644
index 000000000000..6c8fd8b4a466
--- /dev/null
+++ b/Documentation/gpu/xe/xe_mm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=================
+Memory Management
+=================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h
+   :doc: Buffer Objects (BO)
+
+Pagetable building
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pt.c
+   :doc: Pagetable building
diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst
new file mode 100644
index 000000000000..d2e22cc45061
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pcode.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=====
+Pcode
+=====
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :doc: PCODE
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_pm.rst b/Documentation/gpu/xe/xe_pm.rst
new file mode 100644
index 000000000000..6781cdfb24f6
--- /dev/null
+++ b/Documentation/gpu/xe/xe_pm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+========================
+Runtime Power Management
+========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :doc: Xe Power Management
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_pm.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_rtp.rst b/Documentation/gpu/xe/xe_rtp.rst
new file mode 100644
index 000000000000..7fdf4b6c1a04
--- /dev/null
+++ b/Documentation/gpu/xe/xe_rtp.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+=========================
+Register Table Processing
+=========================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :doc: Register Table Processing
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp_types.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_rtp.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_tile.rst b/Documentation/gpu/xe/xe_tile.rst
new file mode 100644
index 000000000000..c33f68dd95b6
--- /dev/null
+++ b/Documentation/gpu/xe/xe_tile.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+==================
+Multi-tile Devices
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :doc: Multi-tile Design
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_tile.c
+   :internal:
diff --git a/Documentation/gpu/xe/xe_wa.rst b/Documentation/gpu/xe/xe_wa.rst
new file mode 100644
index 000000000000..f8811cc6adcc
--- /dev/null
+++ b/Documentation/gpu/xe/xe_wa.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+
+====================
+Hardware workarounds
+====================
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :doc: Hardware workarounds
+
+Internal API
+============
+
+.. kernel-doc:: drivers/gpu/drm/xe/xe_wa.c
+   :internal: