diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1720-drm-amdgpu-add-timeline-support-in-amdgpu-CS-v3.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1720-drm-amdgpu-add-timeline-support-in-amdgpu-CS-v3.patch | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1720-drm-amdgpu-add-timeline-support-in-amdgpu-CS-v3.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1720-drm-amdgpu-add-timeline-support-in-amdgpu-CS-v3.patch new file mode 100644 index 00000000..510c933b --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1720-drm-amdgpu-add-timeline-support-in-amdgpu-CS-v3.patch @@ -0,0 +1,432 @@ +From 4a09827743cbca0afa96df25660f494cbcd8e61a Mon Sep 17 00:00:00 2001 +From: Chunming Zhou <david1.zhou@amd.com> +Date: Mon, 1 Apr 2019 17:51:00 +0800 +Subject: [PATCH 1720/2940] drm/amdgpu: add timeline support in amdgpu CS v3 + +syncobj wait/signal operation is appending in command submission. +v2: separate to two kinds in/out_deps functions +v3: fix checking for timeline syncobj + +Signed-off-by: Chunming Zhou <david1.zhou@amd.com> +Cc: Tobias Hector <Tobias.Hector@amd.com> +Cc: Jason Ekstrand <jason@jlekstrand.net> +Cc: Dave Airlie <airlied@redhat.com> +Cc: Chris Wilson <chris@chris-wilson.co.uk> +Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> +Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> +Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 146 +++++++++++++++++++++---- + drivers/gpu/drm/drm_syncobj.c | 57 ++++++++-- + include/drm/drm_syncobj.h | 10 +- + include/uapi/drm/amdgpu_drm.h | 8 ++ + 5 files changed, 197 insertions(+), 34 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 9bc360586eaa..dcc54022579f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -441,6 +441,12 @@ struct amdgpu_cs_chunk { + void *kdata; + }; + ++struct amdgpu_cs_post_dep { ++ struct drm_syncobj *syncobj; ++ struct dma_fence_chain *chain; ++ u64 point; ++}; ++ + struct amdgpu_cs_parser { + struct amdgpu_device *adev; + struct drm_file *filp; +@@ -470,8 +476,8 @@ struct amdgpu_cs_parser { + /* user fence */ + struct amdgpu_bo_list_entry uf_entry; + +- unsigned num_post_dep_syncobjs; +- struct drm_syncobj **post_dep_syncobjs; ++ unsigned num_post_deps; ++ struct amdgpu_cs_post_dep *post_deps; + }; + + static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index adfebc79124b..e68dde501f44 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: ++ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: ++ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + break; + + default: +@@ -759,9 +761,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, + ttm_eu_backoff_reservation(&parser->ticket, + &parser->validated); + +- for (i = 0; i < parser->num_post_dep_syncobjs; i++) +- drm_syncobj_put(parser->post_dep_syncobjs[i]); +- kfree(parser->post_dep_syncobjs); ++ for (i = 0; i < parser->num_post_deps; i++) { ++ drm_syncobj_put(parser->post_deps[i].syncobj); ++ kfree(parser->post_deps[i].chain); ++ } ++ kfree(parser->post_deps); + + dma_fence_put(parser->fence); + +@@ -1074,12 +1078,16 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle) + { +- int r; + struct dma_fence *fence; ++ int r; ++ + r = drm_syncobj_find_fence(p->filp, handle, &fence); +- if (r) +- return r; ++ if (r) { ++ DRM_ERROR("syncobj %u failed to find fence @ (%d)!\n", ++ handle, r); + ++ return r; ++ } + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); + dma_fence_put(fence); + +@@ -1089,46 +1097,115 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) + { ++ struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i, r; +- struct drm_amdgpu_cs_chunk_sem *deps; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); +- + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + if (r) + return r; + } ++ ++ return 0; ++} ++ ++ ++static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, ++ struct amdgpu_cs_chunk *chunk) ++{ ++ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; ++ unsigned num_deps; ++ int i, r; ++ ++ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; ++ num_deps = chunk->length_dw * 4 / ++ sizeof(struct drm_amdgpu_cs_chunk_syncobj); ++ for (i = 0; i < num_deps; ++i) { ++ r = amdgpu_syncobj_lookup_and_add_to_sync(p, ++ syncobj_deps[i].handle); ++ if (r) ++ return r; ++ } ++ + return 0; + } + + static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) + { ++ struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i; +- struct drm_amdgpu_cs_chunk_sem *deps; ++ + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + +- p->post_dep_syncobjs = kmalloc_array(num_deps, +- sizeof(struct drm_syncobj *), +- GFP_KERNEL); +- p->num_post_dep_syncobjs = 0; ++ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), ++ GFP_KERNEL); ++ p->num_post_deps = 0; ++ ++ if (!p->post_deps) ++ return -ENOMEM; ++ ++ ++ for (i = 0; i < num_deps; ++i) { ++ p->post_deps[i].syncobj = ++ drm_syncobj_find(p->filp, deps[i].handle); ++ if (!p->post_deps[i].syncobj) ++ return -EINVAL; ++ p->post_deps[i].chain = NULL; ++ p->post_deps[i].point = 0; ++ p->num_post_deps++; ++ } ++ ++ return 0; ++} + +- if (!p->post_dep_syncobjs) ++ ++static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, ++ struct amdgpu_cs_chunk ++ *chunk) ++{ ++ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; ++ unsigned num_deps; ++ int i; ++ ++ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; ++ num_deps = chunk->length_dw * 4 / ++ sizeof(struct drm_amdgpu_cs_chunk_syncobj); ++ ++ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), ++ GFP_KERNEL); ++ p->num_post_deps = 0; ++ ++ if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { +- p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); +- if (!p->post_dep_syncobjs[i]) ++ struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; ++ ++ dep->chain = NULL; ++ if (syncobj_deps[i].point) { ++ dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); ++ if (!dep->chain) ++ return -ENOMEM; ++ } ++ ++ dep->syncobj = drm_syncobj_find(p->filp, ++ syncobj_deps[i].handle); ++ if (!dep->syncobj) { ++ kfree(dep->chain); + return -EINVAL; +- p->num_post_dep_syncobjs++; ++ } ++ dep->point = syncobj_deps[i].point; ++ p->num_post_deps++; + } ++ + return 0; + } + +@@ -1142,19 +1219,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + + chunk = &p->chunks[i]; + +- if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || +- chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { ++ switch (chunk->chunk_id) { ++ case AMDGPU_CHUNK_ID_DEPENDENCIES: ++ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) + return r; +- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { ++ break; ++ case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; +- } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { ++ break; ++ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; ++ break; ++ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: ++ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); ++ if (r) ++ return r; ++ break; ++ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: ++ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); ++ if (r) ++ return r; ++ break; + } + } + +@@ -1165,8 +1256,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) + { + int i; + +- for (i = 0; i < p->num_post_dep_syncobjs; ++i) +- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); ++ for (i = 0; i < p->num_post_deps; ++i) { ++ if (p->post_deps[i].chain && p->post_deps[i].point) { ++ drm_syncobj_add_point(p->post_deps[i].syncobj, ++ p->post_deps[i].chain, ++ p->fence, p->post_deps[i].point); ++ p->post_deps[i].chain = NULL; ++ } else { ++ drm_syncobj_replace_fence(p->post_deps[i].syncobj, ++ p->fence); ++ } ++ } + } + + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, +diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c +index 759278fef35a..74482832c759 100644 +--- a/drivers/gpu/drm/drm_syncobj.c ++++ b/drivers/gpu/drm/drm_syncobj.c +@@ -56,6 +56,17 @@ + #include "drm_internal.h" + #include <drm/drm_syncobj.h> + ++struct syncobj_wait_entry { ++ struct list_head node; ++ struct task_struct *task; ++ struct dma_fence *fence; ++ struct dma_fence_cb fence_cb; ++ struct drm_syncobj_cb syncobj_cb; ++}; ++ ++static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, ++ struct drm_syncobj_cb *cb); ++ + /** + * drm_syncobj_find - lookup and reference a sync object. + * @file_private: drm file private pointer +@@ -82,6 +93,45 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, + } + EXPORT_SYMBOL(drm_syncobj_find); + ++ ++/** ++ * drm_syncobj_add_point - add new timeline point to the syncobj ++ * @syncobj: sync object to add timeline point do ++ * @chain: chain node to use to add the point ++ * @fence: fence to encapsulate in the chain node ++ * @point: sequence number to use for the point ++ * ++ * Add the chain node as new timeline point to the syncobj. ++ */ ++void drm_syncobj_add_point(struct drm_syncobj *syncobj, ++ struct dma_fence_chain *chain, ++ struct dma_fence *fence, ++ uint64_t point) ++{ ++ struct syncobj_wait_entry *cur, *tmp; ++ struct dma_fence *prev; ++ ++ dma_fence_get(fence); ++ ++ spin_lock(&syncobj->lock); ++ ++ prev = drm_syncobj_fence_get(syncobj); ++ /* You are adding an unorder point to timeline, which could cause payload returned from query_ioctl is 0! */ ++ if (prev && prev->seqno >= point) ++ DRM_ERROR("You are adding an unorder point to timeline!\n"); ++ dma_fence_chain_init(chain, prev, fence, point); ++ rcu_assign_pointer(syncobj->fence, &chain->base); ++ ++ list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) ++ syncobj_wait_syncobj_func(syncobj, &cur->syncobj_cb); ++ spin_unlock(&syncobj->lock); ++ ++ /* Walk the chain once to trigger garbage collection */ ++ dma_fence_chain_for_each(fence, prev); ++ dma_fence_put(prev); ++} ++EXPORT_SYMBOL(drm_syncobj_add_point); ++ + static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) +@@ -657,13 +707,6 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, + &args->handle); + } + +-struct syncobj_wait_entry { +- struct task_struct *task; +- struct dma_fence *fence; +- struct dma_fence_cb fence_cb; +- struct drm_syncobj_cb syncobj_cb; +-}; +- + static void syncobj_wait_fence_func(struct dma_fence *fence, + struct dma_fence_cb *cb) + { +diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h +index 3980602472c0..2ad80447ad5b 100644 +--- a/include/drm/drm_syncobj.h ++++ b/include/drm/drm_syncobj.h +@@ -27,6 +27,7 @@ + #define __DRM_SYNCOBJ_H__ + + #include "linux/dma-fence.h" ++#include <linux/dma-fence-chain.h> + + struct drm_syncobj_cb; + +@@ -131,6 +132,11 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj) + + struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, + u32 handle); ++void drm_syncobj_add_point(struct drm_syncobj *syncobj, ++ struct dma_fence_chain *chain, ++ struct dma_fence *fence, ++ uint64_t point); ++ + void drm_syncobj_add_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func); +@@ -139,8 +145,8 @@ void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, + void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, + struct dma_fence *fence); + int drm_syncobj_find_fence(struct drm_file *file_private, +- u32 handle, +- struct dma_fence **fence); ++ u32 handle, ++ struct dma_fence **fence); + void drm_syncobj_free(struct kref *kref); + int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, + struct dma_fence *fence); +diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h +index b28fe74b7725..ac96bed48f38 100644 +--- a/include/uapi/drm/amdgpu_drm.h ++++ b/include/uapi/drm/amdgpu_drm.h +@@ -587,6 +587,8 @@ struct drm_amdgpu_gem_va { + #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 + #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 + #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 ++#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 ++#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 + + struct drm_amdgpu_cs_chunk { + __u32 chunk_id; +@@ -667,6 +669,12 @@ struct drm_amdgpu_cs_chunk_sem { + __u32 handle; + }; + ++struct drm_amdgpu_cs_chunk_syncobj { ++ __u32 handle; ++ __u32 flags; ++ __u64 point; ++}; ++ + #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 + #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 + #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 +-- +2.17.1 + |