diff options
Diffstat (limited to 'common/recipes-kernel/linux')
12 files changed, 1753 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch new file mode 100644 index 00000000..f27f1afc --- /dev/null +++ b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch @@ -0,0 +1,504 @@ +From d29a89414316f4c54a1a619527398714b091d3db Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Mon, 14 Nov 2016 12:26:18 +0530 +Subject: [PATCH] add new semaphore object in kernel side + +So that semaphore can be shared across porcess across devices. + +Change-Id: Ie82cace6af81e2ddf45f4bbf9f3c0dafd6bcc499 +Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 + + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 267 ++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h | 44 ++++ + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 29 +++ + 8 files changed, 361 insertions(+), 4 deletions(-) + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h + +diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile +index 28e8e4c..2acc7c1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/Makefile ++++ b/drivers/gpu/drm/amd/amdgpu/Makefile +@@ -31,7 +31,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ + amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ + atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ + amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ +- amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o ++ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ ++ amdgpu_sem.o + + # add asic specific block + amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index d3de21d..3f5d2ad 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1000,6 +1000,8 @@ struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence **fences; + struct amd_sched_entity entity; ++ struct list_head sem_list; ++ struct mutex sem_lock; + /* client id */ + u64 client_id; + }; +@@ -1699,6 +1701,8 @@ struct amdgpu_vce { + struct amdgpu_irq_src irq; + unsigned harvest_config; + struct amd_sched_entity entity; ++ struct list_head sem_list; ++ struct mutex sem_lock; + }; + + /* +@@ -1872,6 +1876,13 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, + int amdgpu_freesync_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + ++int amdgpu_sem_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp); ++ ++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct amdgpu_sync *sync); ++ ++ + /* VRAM scratch page for HDP bug, default vram page */ + struct amdgpu_vram_scratch { + struct amdgpu_bo *robj; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index 0d1346c..bb6057a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -882,7 +882,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + } + } + +- return 0; ++ return amdgpu_sem_add_cs(p->ctx, p->job->ring, &p->job->sync); + } + + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +index 17e1362..a020e22 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +@@ -42,6 +42,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + ctx->rings[i].sequence = 1; + ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; ++ INIT_LIST_HEAD(&ctx->rings[i].sem_list); ++ mutex_init(&ctx->rings[i].sem_lock); + } + /* create context entity for each ring */ + for (i = 0; i < adev->num_rings; i++) { +@@ -74,8 +76,10 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) + return; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) +- for (j = 0; j < amdgpu_sched_jobs; ++j) ++ for (j = 0; j < amdgpu_sched_jobs; ++j) { + fence_put(ctx->rings[i].fences[j]); ++ mutex_destroy(&ctx->rings[i].sem_lock); ++ } + kfree(ctx->fences); + + for (i = 0; i < adev->num_rings; i++) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index f6ae587..a48783e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -786,6 +786,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +- DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER) ++ DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER), ++ DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + }; + const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c +new file mode 100644 +index 0000000..db16baa +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c +@@ -0,0 +1,267 @@ ++/* ++ * Copyright 2016 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Chunming Zhou <david1.zhou@amd.com> ++ */ ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/kernel.h> ++#include <linux/poll.h> ++#include <linux/seq_file.h> ++#include <linux/export.h> ++#include <linux/sched.h> ++#include <linux/slab.h> ++#include <linux/uaccess.h> ++#include <linux/anon_inodes.h> ++#include "amdgpu_sem.h" ++#include "amdgpu.h" ++#include <drm/drmP.h> ++ ++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in, ++ struct amdgpu_sem *sem); ++ ++static const struct file_operations amdgpu_sem_fops; ++ ++static struct amdgpu_sem *amdgpu_sem_alloc(struct fence *fence) ++{ ++ struct amdgpu_sem *sem; ++ ++ sem = kzalloc(sizeof(struct amdgpu_sem), GFP_KERNEL); ++ if (!sem) ++ return NULL; ++ ++ sem->file = anon_inode_getfile("sem_file", ++ &amdgpu_sem_fops, ++ sem, 0); ++ if (IS_ERR(sem->file)) ++ goto err; ++ ++ kref_init(&sem->kref); ++ INIT_LIST_HEAD(&sem->list); ++ /* fence should be get before passing here */ ++ sem->fence = fence; ++ ++ return sem; ++err: ++ kfree(sem); ++ return NULL; ++} ++ ++static void amdgpu_sem_free(struct kref *kref) ++{ ++ struct amdgpu_sem *sem = container_of( ++ kref, struct amdgpu_sem, kref); ++ ++ fence_put(sem->fence); ++ kfree(sem); ++} ++ ++static int amdgpu_sem_release(struct inode *inode, struct file *file) ++{ ++ struct amdgpu_sem *sem = file->private_data; ++ ++ kref_put(&sem->kref, amdgpu_sem_free); ++ return 0; ++} ++ ++static unsigned int amdgpu_sem_poll(struct file *file, poll_table *wait) ++{ ++ return 0; ++} ++ ++static long amdgpu_sem_file_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ return 0; ++} ++ ++static const struct file_operations amdgpu_sem_fops = { ++ .release = amdgpu_sem_release, ++ .poll = amdgpu_sem_poll, ++ .unlocked_ioctl = amdgpu_sem_file_ioctl, ++ .compat_ioctl = amdgpu_sem_file_ioctl, ++}; ++ ++static int amdgpu_sem_create(void) ++{ ++ return get_unused_fd_flags(O_CLOEXEC); ++} ++ ++static int amdgpu_sem_signal(int fd, struct fence *fence) ++{ ++ struct amdgpu_sem *sem; ++ ++ sem = amdgpu_sem_alloc(fence); ++ if (!sem) ++ return -ENOMEM; ++ fd_install(fd, sem->file); ++ ++ return 0; ++} ++ ++static int amdgpu_sem_wait(int fd, struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in) ++{ ++ struct file *file = fget(fd); ++ struct amdgpu_sem *sem; ++ int r; ++ ++ if (!file) ++ return -EINVAL; ++ ++ sem = file->private_data; ++ if (!sem) { ++ r = -EINVAL; ++ goto err; ++ } ++ r = amdgpu_sem_cring_add(fpriv, in, sem); ++err: ++ fput(file); ++ return r; ++} ++ ++static void amdgpu_sem_destroy(void) ++{ ++ /* userspace should close fd when they try to destroy sem, ++ * closing fd will free semaphore object. ++ */ ++} ++ ++static struct fence *amdgpu_sem_get_fence(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in) ++{ ++ struct amdgpu_ring *out_ring; ++ struct amdgpu_ctx *ctx; ++ struct fence *fence; ++ uint32_t ctx_id, ip_type, ip_instance, ring; ++ int r; ++ ++ ctx_id = in->ctx_id; ++ ip_type = in->ip_type; ++ ip_instance = in->ip_instance; ++ ring = in->ring; ++ ctx = amdgpu_ctx_get(fpriv, ctx_id); ++ if (!ctx) ++ return NULL; ++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring, ++ &out_ring); ++ if (r) { ++ amdgpu_ctx_put(ctx); ++ return NULL; ++ } ++ /* get the last fence of this entity */ ++ fence = amdgpu_ctx_get_fence(ctx, out_ring, ++ in->seq ? in->seq : ++ ctx->rings[out_ring->idx].sequence - 1); ++ amdgpu_ctx_put(ctx); ++ ++ return fence; ++} ++ ++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in, ++ struct amdgpu_sem *sem) ++{ ++ struct amdgpu_ring *out_ring; ++ struct amdgpu_ctx *ctx; ++ uint32_t ctx_id, ip_type, ip_instance, ring; ++ int r; ++ ++ ctx_id = in->ctx_id; ++ ip_type = in->ip_type; ++ ip_instance = in->ip_instance; ++ ring = in->ring; ++ ctx = amdgpu_ctx_get(fpriv, ctx_id); ++ if (!ctx) ++ return -EINVAL; ++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring, ++ &out_ring); ++ if (r) ++ goto err; ++ mutex_lock(&ctx->rings[out_ring->idx].sem_lock); ++ list_add(&sem->list, &ctx->rings[out_ring->idx].sem_list); ++ mutex_unlock(&ctx->rings[out_ring->idx].sem_lock); ++ ++err: ++ amdgpu_ctx_put(ctx); ++ return r; ++} ++ ++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct amdgpu_sync *sync) ++{ ++ struct amdgpu_sem *sem, *tmp; ++ int r = 0; ++ ++ if (list_empty(&ctx->rings[ring->idx].sem_list)) ++ return 0; ++ ++ mutex_lock(&ctx->rings[ring->idx].sem_lock); ++ list_for_each_entry_safe(sem, tmp, &ctx->rings[ring->idx].sem_list, ++ list) { ++ r = amdgpu_sync_fence(ctx->adev, sync, sem->fence); ++ fence_put(sem->fence); ++ if (r) ++ goto err; ++ list_del(&sem->list); ++ kfree(sem); ++ } ++err: ++ mutex_unlock(&ctx->rings[ring->idx].sem_lock); ++ return r; ++} ++ ++int amdgpu_sem_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp) ++{ ++ union drm_amdgpu_sem *args = data; ++ struct amdgpu_fpriv *fpriv = filp->driver_priv; ++ struct fence *fence; ++ int r = 0; ++ int fd = args->in.fd; ++ ++ switch (args->in.op) { ++ case AMDGPU_SEM_OP_CREATE_SEM: ++ args->out.fd = amdgpu_sem_create(); ++ break; ++ case AMDGPU_SEM_OP_WAIT_SEM: ++ r = amdgpu_sem_wait(fd, fpriv, &args->in); ++ break; ++ case AMDGPU_SEM_OP_SIGNAL_SEM: ++ fence = amdgpu_sem_get_fence(fpriv, &args->in); ++ if (IS_ERR(fence)) { ++ r = PTR_ERR(fence); ++ return r; ++ } ++ r = amdgpu_sem_signal(fd, fence); ++ fence_put(fence); ++ break; ++ case AMDGPU_SEM_OP_DESTROY_SEM: ++ amdgpu_sem_destroy(); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return r; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h +new file mode 100644 +index 0000000..56d59d3 +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h +@@ -0,0 +1,44 @@ ++/* ++ * Copyright 2016 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: Chunming Zhou <david1.zhou@amd.com> ++ * ++ */ ++ ++ ++#ifndef _LINUX_AMDGPU_SEM_H ++#define _LINUX_AMDGPU_SEM_H ++ ++#include <linux/types.h> ++#include <linux/kref.h> ++#include <linux/ktime.h> ++#include <linux/list.h> ++#include <linux/spinlock.h> ++#include <linux/fence.h> ++ ++struct amdgpu_sem { ++ struct file *file; ++ struct kref kref; ++ struct fence *fence; ++ struct list_head list; ++}; ++ ++#endif /* _LINUX_AMDGPU_SEM_H */ +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index b06e3dc..65153bf 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -46,6 +46,7 @@ + #define DRM_AMDGPU_WAIT_CS 0x09 + #define DRM_AMDGPU_GEM_OP 0x10 + #define DRM_AMDGPU_GEM_USERPTR 0x11 ++#define DRM_AMDGPU_SEM 0x5b + #define DRM_AMDGPU_FREESYNC 0x14 + + #define DRM_AMDGPU_WAIT_FENCES 0x5e +@@ -64,6 +65,7 @@ + #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) + #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) + #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync) ++#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) + + #define AMDGPU_GEM_DOMAIN_CPU 0x1 + #define AMDGPU_GEM_DOMAIN_GTT 0x2 +@@ -185,6 +187,33 @@ union drm_amdgpu_ctx { + union drm_amdgpu_ctx_out out; + }; + ++/* sem related */ ++#define AMDGPU_SEM_OP_CREATE_SEM 1 ++#define AMDGPU_SEM_OP_WAIT_SEM 2 ++#define AMDGPU_SEM_OP_SIGNAL_SEM 3 ++#define AMDGPU_SEM_OP_DESTROY_SEM 4 ++ ++struct drm_amdgpu_sem_in { ++ /** AMDGPU_SEM_OP_* */ ++ uint32_t op; ++ int32_t fd; ++ uint32_t ctx_id; ++ uint32_t ip_type; ++ uint32_t ip_instance; ++ uint32_t ring; ++ uint64_t seq; ++}; ++ ++union drm_amdgpu_sem_out { ++ int32_t fd; ++ uint32_t _pad; ++}; ++ ++union drm_amdgpu_sem { ++ struct drm_amdgpu_sem_in in; ++ union drm_amdgpu_sem_out out; ++}; ++ + /* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch new file mode 100644 index 00000000..b7c965a3 --- /dev/null +++ b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch @@ -0,0 +1,113 @@ +From 314642915b4a2bda146fb9d900ca99eabeab36c0 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Mon, 14 Nov 2016 12:13:41 +0530 +Subject: [PATCH 01/10] unify memory query info interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Change-Id: I74d2b7379bc4febe714a91daf4e1786895de90f2 +Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> +Reviewed-by: Marek Olšák <marek.olsak@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 32 +++++++++++++++++++++++ + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 32 +++++++++++++++++++++++ + 2 files changed, 64 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 31c20ba..a48783e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -390,6 +390,38 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + return copy_to_user(out, &vram_gtt, + min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; + } ++ ++ case AMDGPU_INFO_MEMORY: { ++ struct drm_amdgpu_memory_info mem; ++ ++ memset(&mem, 0, sizeof(mem)); ++ mem.vram.total_heap_size = adev->mc.real_vram_size; ++ mem.vram.usable_heap_size = ++ adev->mc.real_vram_size - adev->vram_pin_size; ++ mem.vram.heap_usage = atomic64_read(&adev->vram_usage); ++ mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; ++ ++ mem.cpu_accessible_vram.total_heap_size = ++ adev->mc.visible_vram_size; ++ mem.cpu_accessible_vram.usable_heap_size = ++ adev->mc.visible_vram_size - ++ (adev->vram_pin_size - adev->invisible_pin_size); ++ mem.cpu_accessible_vram.heap_usage = ++ atomic64_read(&adev->vram_vis_usage); ++ mem.cpu_accessible_vram.max_allocation = ++ mem.cpu_accessible_vram.usable_heap_size * 3 / 4; ++ ++ mem.gtt.total_heap_size = adev->mc.gtt_size; ++ mem.gtt.usable_heap_size = ++ adev->mc.gtt_size - adev->gart_pin_size; ++ mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); ++ mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; ++ ++ return copy_to_user(out, &mem, ++ min((size_t)size, sizeof(mem))) ++ ? -EFAULT : 0; ++ } ++ + case AMDGPU_INFO_READ_MMR_REG: { + unsigned n, alloc_size; + uint32_t *regs; +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index 4d7d982..3f13a87 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -540,6 +540,10 @@ struct drm_amdgpu_cs_chunk_data { + #define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 + /* virtual range */ + #define AMDGPU_INFO_VIRTUAL_RANGE 0x18 ++ ++/* Query memory about VRAM and GTT domains */ ++#define AMDGPU_INFO_MEMORY 0x19 ++ + /* gpu capability */ + #define AMDGPU_INFO_CAPABILITY 0x50 + /* query pin memory capability */ +@@ -705,6 +709,34 @@ struct drm_amdgpu_info_hw_ip { + __u32 _pad; + }; + ++struct drm_amdgpu_heap_info { ++ /** max. physical memory */ ++ __u64 total_heap_size; ++ ++ /** Theoretical max. available memory in the given heap */ ++ __u64 usable_heap_size; ++ ++ /** ++ * Number of bytes allocated in the heap. This includes all processes ++ * and private allocations in the kernel. It changes when new buffers ++ * are allocated, freed, and moved. It cannot be larger than ++ * heap_size. ++ */ ++ __u64 heap_usage; ++ ++ /** ++ * Theoretical possible max. size of buffer which ++ * could be allocated in the given heap ++ */ ++ __u64 max_allocation; ++}; ++ ++struct drm_amdgpu_memory_info { ++ struct drm_amdgpu_heap_info vram; ++ struct drm_amdgpu_heap_info cpu_accessible_vram; ++ struct drm_amdgpu_heap_info gtt; ++}; ++ + /* + * Supported GPU families + */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch new file mode 100644 index 00000000..76815764 --- /dev/null +++ b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch @@ -0,0 +1,188 @@ +From 7e06443930ab2fabda1977c20ff82ff6bc42e3be Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 15 Nov 2016 14:30:58 +0530 +Subject: [PATCH 02/10] dma-buf: return index of the first signaled fence + +Return the index of the first signaled fence. This information +is useful in some APIs like Vulkan. + +Signed-off-by: monk.liu <monk.liu@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Cc: Sumit Semwal <sumit.semwal@linaro.org> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/dma-buf/fence.c | 19 ++++++++++++++----- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- + drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 21 +++++++++++---------- + include/linux/fence.h | 2 +- + 5 files changed, 30 insertions(+), 18 deletions(-) + +diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c +index 7b05dbe..192f99b 100644 +--- a/drivers/dma-buf/fence.c ++++ b/drivers/dma-buf/fence.c +@@ -398,14 +398,17 @@ out: + EXPORT_SYMBOL(fence_default_wait); + + static bool +-fence_test_signaled_any(struct fence **fences, uint32_t count) ++fence_test_signaled_any(struct fence **fences, uint32_t count, uint32_t *idx) + { + int i; + + for (i = 0; i < count; ++i) { + struct fence *fence = fences[i]; +- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) ++ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { ++ if (idx) ++ *idx = i; + return true; ++ } + } + return false; + } +@@ -417,6 +420,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count) + * @count: [in] number of fences to wait on + * @intr: [in] if true, do an interruptible wait + * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT ++ * @idx: [out] the first signaled fence index, meaninful only on Returns positive + * + * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if + * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies +@@ -428,7 +432,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count) + */ + signed long + fence_wait_any_timeout(struct fence **fences, uint32_t count, +- bool intr, signed long timeout) ++ bool intr, signed long timeout, uint32_t *idx) + { + struct default_wait_cb *cb; + signed long ret = timeout; +@@ -439,8 +443,11 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + + if (timeout == 0) { + for (i = 0; i < count; ++i) +- if (fence_is_signaled(fences[i])) ++ if (fence_is_signaled(fences[i])) { ++ if (idx) ++ *idx = i; + return 1; ++ } + + return 0; + } +@@ -463,6 +470,8 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + if (fence_add_callback(fence, &cb[i].base, + fence_default_wait_cb)) { + /* This fence is already signaled */ ++ if (idx) ++ *idx = i; + goto fence_rm_cb; + } + } +@@ -473,7 +482,7 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + else + set_current_state(TASK_UNINTERRUPTIBLE); + +- if (fence_test_signaled_any(fences, count)) ++ if (fence_test_signaled_any(fences, count, idx)) + break; + + ret = schedule_timeout(ret); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index bb6057a..181e2b7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -1107,6 +1107,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, + { + unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); + uint32_t fence_count = wait->in.fence_count; ++ uint32_t first = ~0; + struct fence **array; + unsigned i; + long r; +@@ -1132,13 +1133,14 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, + } + } + +- r = fence_wait_any_timeout(array, fence_count, true, timeout); ++ r = fence_wait_any_timeout(array, fence_count, true, timeout, &first); + if (r < 0) + goto err_free_fence_array; + + out: + memset(wait, 0, sizeof(*wait)); + wait->out.status = (r > 0); ++ wait->out.first_signaled = first; + /* set return value 0 to indicate success */ + r = 0; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +index 8bf84ef..9f4311c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +@@ -360,7 +360,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, + if (count) { + spin_unlock(&sa_manager->wq.lock); + t = fence_wait_any_timeout(fences, count, false, +- MAX_SCHEDULE_TIMEOUT); ++ MAX_SCHEDULE_TIMEOUT, NULL); + for (i = 0; i < count; ++i) + fence_put(fences[i]); + +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index 3f13a87..c2f06eb 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -334,23 +334,24 @@ union drm_amdgpu_wait_cs { + }; + + struct drm_amdgpu_fence { +- uint32_t ctx_id; +- uint32_t ip_type; +- uint32_t ip_instance; +- uint32_t ring; +- uint64_t seq_no; ++ __u32 ctx_id; ++ __u32 ip_type; ++ __u32 ip_instance; ++ __u32 ring; ++ __u64 seq_no; + }; + + struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ +- uint64_t fences; +- uint32_t fence_count; +- uint32_t wait_all; +- uint64_t timeout_ns; ++ __u64 fences; ++ __u32 fence_count; ++ __u32 wait_all; ++ __u64 timeout_ns; + }; + + struct drm_amdgpu_wait_fences_out { +- uint64_t status; ++ __u32 status; ++ __u32 first_signaled; + }; + + union drm_amdgpu_wait_fences { +diff --git a/include/linux/fence.h b/include/linux/fence.h +index bb52201..b8da489 100644 +--- a/include/linux/fence.h ++++ b/include/linux/fence.h +@@ -322,7 +322,7 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) + + signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); + signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, +- bool intr, signed long timeout); ++ bool intr, signed long timeout, uint32_t *idx); + + /** + * fence_wait - sleep until the fence gets signaled +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch new file mode 100644 index 00000000..25312ef1 --- /dev/null +++ b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch @@ -0,0 +1,121 @@ +From 5f3c992c00f95a483cf01d55b8ff0fa1fe6df216 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 23 Nov 2016 14:54:46 +0530 +Subject: [PATCH 03/10] Fix a deadlock affecting ww_mutexes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +his patch fixes a race condition involving 4 threads and 2 ww_mutexes +as indicated in the following example. Acquire context stamps are ordered +like the thread numbers, i.e. thread #1 should back off when it encounters +a mutex locked by thread #0 etc. + +Thread #0 Thread #1 Thread #2 Thread #3 +--------- --------- --------- --------- + lock(ww) + lock(ww') + lock(ww) + lock(ww) + unlock(ww) part 1 +lock(ww) + unlock(ww) part 2 + back off +lock(ww') + +Here, unlock(ww) part 1 is the part that sets lock->base.count to 1 +(without being protected by lock->base.wait_lock), meaning that thread #0 +can acquire ww in the fast path. Since lock->base.count == 0, thread #0 +won't wake up any of the waiters. + +Then, unlock(ww) part 2 wakes up _only_the_first_ waiter of ww. This is +thread #2, since waiters are added at the tail. Thread #2 wakes up and +backs off since it sees ww owned by a context with a lower stamp. + +Meanwhile, thread #1 is never woken up, and so it won't back off its lock +on ww'. So thread #0 gets stuck waiting for ww' to be released. + +This patch fixes the deadlock by waking up all waiters in the slow path +of ww_mutex_unlock. + +We have an internal test case for amdgpu which continuously submits +command streams from tens of threads, where all command stream reference +hundreds of GPU buffer objects with a lot of overlap in the buffer lists +between command streams. This test reliably caused a deadlock, and while I +haven't completely confirmed that it is exactly the scenario outlined +above, this patch does fix the test case. + +Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + kernel/locking/mutex.c | 26 ++++++++++++++++++++++---- + 1 file changed, 22 insertions(+), 4 deletions(-) + +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 0551c21..39fa58a 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -409,6 +409,10 @@ static bool mutex_optimistic_spin(struct mutex *lock, + __visible __used noinline + void __sched __mutex_unlock_slowpath(atomic_t *lock_count); + ++static __used noinline ++void __sched __mutex_unlock_slowpath_wakeall(atomic_t *lock_count); ++ ++ + /** + * mutex_unlock - release the mutex + * @lock: the mutex to be released +@@ -473,7 +477,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) + */ + mutex_clear_owner(&lock->base); + #endif +- __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); ++ __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath_wakeall); + } + EXPORT_SYMBOL(ww_mutex_unlock); + +@@ -713,7 +717,7 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); + * Release the lock, slowpath: + */ + static inline void +-__mutex_unlock_common_slowpath(struct mutex *lock, int nested) ++__mutex_unlock_common_slowpath(struct mutex *lock, int nested, int wake_all) + { + unsigned long flags; + +@@ -736,7 +740,13 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); + +- if (!list_empty(&lock->wait_list)) { ++ if (wake_all) { ++ struct mutex_waiter *waiter; ++ list_for_each_entry(waiter, &lock->wait_list, list) { ++ debug_mutex_wake_waiter(lock, waiter); ++ wake_up_process(waiter->task); ++ } ++ } else if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, +@@ -758,7 +768,15 @@ __mutex_unlock_slowpath(atomic_t *lock_count) + { + struct mutex *lock = container_of(lock_count, struct mutex, count); + +- __mutex_unlock_common_slowpath(lock, 1); ++ __mutex_unlock_common_slowpath(lock, 1, 0); ++} ++ ++static void ++__mutex_unlock_slowpath_wakeall(atomic_t *lock_count) ++{ ++ struct mutex *lock = container_of(lock_count, struct mutex, count); ++ ++ __mutex_unlock_common_slowpath(lock, 1, 1); + } + + #ifndef CONFIG_DEBUG_LOCK_ALLOC +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch new file mode 100644 index 00000000..c9c426f9 --- /dev/null +++ b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch @@ -0,0 +1,51 @@ +From caa6b72d0c01491114f017fe3bca7adc05194611 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 17:07:10 +0530 +Subject: [PATCH 04/10] add additional cached gca config variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We need to cache some additional values to handle SR-IOV +and PG. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 3f5d2ad..40497c2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1135,6 +1135,16 @@ struct amdgpu_scratch { + /* + * GFX configurations + */ ++#define AMDGPU_GFX_MAX_SE 4 ++#define AMDGPU_GFX_MAX_SH_PER_SE 2 ++ ++struct amdgpu_rb_config { ++ uint32_t rb_backend_disable; ++ uint32_t user_rb_backend_disable; ++ uint32_t raster_config; ++ uint32_t raster_config_1; ++}; ++ + struct amdgpu_gca_config { + unsigned max_shader_engines; + unsigned max_tile_pipes; +@@ -1163,6 +1173,8 @@ struct amdgpu_gca_config { + + uint32_t tile_mode_array[32]; + uint32_t macrotile_mode_array[16]; ++ ++ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + }; + + struct amdgpu_gfx { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch new file mode 100644 index 00000000..c1a271f2 --- /dev/null +++ b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch @@ -0,0 +1,262 @@ +From 705f105de150240594945703df70f82d5ab861ce Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 19:33:01 +0530 +Subject: [PATCH 05/10] implement raster configuration for gfx v8 + +This patch is to implement the raster configuration and harvested +configuration of gfx v8. + +Signed-off-by: Huang Rui <ray.huang@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 168 +++++++++++++++++++++++++++++++++- + drivers/gpu/drm/amd/amdgpu/vid.h | 37 ++++++++ + 2 files changed, 204 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index c5a3d04..20ac07f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3484,13 +3484,163 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) + return (~data) & mask; + } + ++static void ++gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) ++{ ++ switch (adev->asic_type) { ++ case CHIP_FIJI: ++ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | ++ RB_XSEL2(1) | PKR_MAP(2) | ++ PKR_XSEL(1) | PKR_YSEL(1) | ++ SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); ++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | ++ SE_PAIR_YSEL(2); ++ break; ++ case CHIP_TONGA: ++ case CHIP_POLARIS10: ++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | ++ SE_XSEL(1) | SE_YSEL(1); ++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | ++ SE_PAIR_YSEL(2); ++ break; ++ case CHIP_TOPAZ: ++ case CHIP_CARRIZO: ++ *rconf |= RB_MAP_PKR0(2); ++ *rconf1 |= 0x0; ++ break; ++ case CHIP_POLARIS11: ++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | ++ SE_XSEL(1) | SE_YSEL(1); ++ *rconf1 |= 0x0; ++ break; ++ case CHIP_STONEY: ++ *rconf |= 0x0; ++ *rconf1 |= 0x0; ++ break; ++ default: ++ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); ++ break; ++ } ++} ++ ++static void ++gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, ++ u32 raster_config, u32 raster_config_1, ++ unsigned rb_mask, unsigned num_rb) ++{ ++ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); ++ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); ++ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); ++ unsigned rb_per_se = num_rb / num_se; ++ unsigned se_mask[4]; ++ unsigned se; ++ ++ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; ++ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; ++ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; ++ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; ++ ++ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); ++ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); ++ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); ++ ++ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || ++ (!se_mask[2] && !se_mask[3]))) { ++ raster_config_1 &= ~SE_PAIR_MAP_MASK; ++ ++ if (!se_mask[0] && !se_mask[1]) { ++ raster_config_1 |= ++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); ++ } else { ++ raster_config_1 |= ++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); ++ } ++ } ++ ++ for (se = 0; se < num_se; se++) { ++ unsigned raster_config_se = raster_config; ++ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); ++ unsigned pkr1_mask = pkr0_mask << rb_per_pkr; ++ int idx = (se / 2) * 2; ++ ++ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { ++ raster_config_se &= ~SE_MAP_MASK; ++ ++ if (!se_mask[idx]) { ++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); ++ } else { ++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); ++ } ++ } ++ ++ pkr0_mask &= rb_mask; ++ pkr1_mask &= rb_mask; ++ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { ++ raster_config_se &= ~PKR_MAP_MASK; ++ ++ if (!pkr0_mask) { ++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); ++ } else { ++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); ++ } ++ } ++ ++ if (rb_per_se >= 2) { ++ unsigned rb0_mask = 1 << (se * rb_per_se); ++ unsigned rb1_mask = rb0_mask << 1; ++ ++ rb0_mask &= rb_mask; ++ rb1_mask &= rb_mask; ++ if (!rb0_mask || !rb1_mask) { ++ raster_config_se &= ~RB_MAP_PKR0_MASK; ++ ++ if (!rb0_mask) { ++ raster_config_se |= ++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); ++ } else { ++ raster_config_se |= ++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); ++ } ++ } ++ ++ if (rb_per_se > 2) { ++ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); ++ rb1_mask = rb0_mask << 1; ++ rb0_mask &= rb_mask; ++ rb1_mask &= rb_mask; ++ if (!rb0_mask || !rb1_mask) { ++ raster_config_se &= ~RB_MAP_PKR1_MASK; ++ ++ if (!rb0_mask) { ++ raster_config_se |= ++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); ++ } else { ++ raster_config_se |= ++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); ++ } ++ } ++ } ++ } ++ ++ /* GRBM_GFX_INDEX has a different offset on VI */ ++ gfx_v8_0_select_se_sh(adev, se, 0xffffffff); ++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); ++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); ++ } ++ ++ /* GRBM_GFX_INDEX has a different offset on VI */ ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++} ++ + static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + { + int i, j; + u32 data; ++ u32 raster_config = 0, raster_config_1 = 0; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; ++ unsigned num_rb_pipes; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { +@@ -3502,10 +3652,26 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + } + } + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); ++ ++ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * ++ adev->gfx.config.max_shader_engines, 16); ++ ++ gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); ++ ++ if (!adev->gfx.config.backend_enable_mask || ++ adev->gfx.config.num_rbs >= num_rb_pipes) { ++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config); ++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); ++ } else { ++ gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, ++ adev->gfx.config.backend_enable_mask, ++ num_rb_pipes); ++ } ++ ++ mutex_unlock(&adev->grbm_idx_mutex); + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h +index 3bf7172..4bd2bfd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vid.h ++++ b/drivers/gpu/drm/amd/amdgpu/vid.h +@@ -368,4 +368,41 @@ + #define VCE_CMD_IB_AUTO 0x00000005 + #define VCE_CMD_SEMAPHORE 0x00000006 + ++ ++/* mmPA_SC_RASTER_CONFIG mask */ ++#define RB_MAP_PKR0(x) ((x) << 0) ++#define RB_MAP_PKR0_MASK (0x3 << 0) ++#define RB_MAP_PKR1(x) ((x) << 2) ++#define RB_MAP_PKR1_MASK (0x3 << 2) ++#define RB_XSEL2(x) ((x) << 4) ++#define RB_XSEL2_MASK (0x3 << 4) ++#define RB_XSEL (1 << 6) ++#define RB_YSEL (1 << 7) ++#define PKR_MAP(x) ((x) << 8) ++#define PKR_MAP_MASK (0x3 << 8) ++#define PKR_XSEL(x) ((x) << 10) ++#define PKR_XSEL_MASK (0x3 << 10) ++#define PKR_YSEL(x) ((x) << 12) ++#define PKR_YSEL_MASK (0x3 << 12) ++#define SC_MAP(x) ((x) << 16) ++#define SC_MAP_MASK (0x3 << 16) ++#define SC_XSEL(x) ((x) << 18) ++#define SC_XSEL_MASK (0x3 << 18) ++#define SC_YSEL(x) ((x) << 20) ++#define SC_YSEL_MASK (0x3 << 20) ++#define SE_MAP(x) ((x) << 24) ++#define SE_MAP_MASK (0x3 << 24) ++#define SE_XSEL(x) ((x) << 26) ++#define SE_XSEL_MASK (0x3 << 26) ++#define SE_YSEL(x) ((x) << 28) ++#define SE_YSEL_MASK (0x3 << 28) ++ ++/* mmPA_SC_RASTER_CONFIG_1 mask */ ++#define SE_PAIR_MAP(x) ((x) << 0) ++#define SE_PAIR_MAP_MASK (0x3 << 0) ++#define SE_PAIR_XSEL(x) ((x) << 2) ++#define SE_PAIR_XSEL_MASK (0x3 << 2) ++#define SE_PAIR_YSEL(x) ((x) << 4) ++#define SE_PAIR_YSEL_MASK (0x3 << 4) ++ + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch new file mode 100644 index 00000000..0c8fe273 --- /dev/null +++ b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch @@ -0,0 +1,46 @@ +From 0aaf3d10e376981da3d92f037c6e36a5c4e8d348 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 19:40:46 +0530 +Subject: [PATCH 06/10] cache rb config values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Needed when for SR-IOV and when PG is enabled. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 20ac07f..479047e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3671,6 +3671,21 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + num_rb_pipes); + } + ++ /* cache the values for userspace */ ++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { ++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { ++ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); ++ adev->gfx.config.rb_config[i][j].rb_backend_disable = ++ RREG32(mmCC_RB_BACKEND_DISABLE); ++ adev->gfx.config.rb_config[i][j].user_rb_backend_disable = ++ RREG32(mmGC_USER_RB_BACKEND_DISABLE); ++ adev->gfx.config.rb_config[i][j].raster_config = ++ RREG32(mmPA_SC_RASTER_CONFIG); ++ adev->gfx.config.rb_config[i][j].raster_config_1 = ++ RREG32(mmPA_SC_RASTER_CONFIG_1); ++ } ++ } ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch new file mode 100644 index 00000000..b3b19e7b --- /dev/null +++ b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch @@ -0,0 +1,61 @@ +From 9fcd43d6a79011dd9ab3837d38ba27454be747ad Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 20:14:23 +0530 +Subject: [PATCH 07/10] use cached raster config values in csb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Simplify the code and properly set the csb for harvest values. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++---------------------------- + 1 file changed, 2 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 479047e..dcc59f3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -1110,34 +1110,8 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - + PACKET3_SET_CONTEXT_REG_START); +- switch (adev->asic_type) { +- case CHIP_TONGA: +- case CHIP_POLARIS10: +- buffer[count++] = cpu_to_le32(0x16000012); +- buffer[count++] = cpu_to_le32(0x0000002A); +- break; +- case CHIP_POLARIS11: +- buffer[count++] = cpu_to_le32(0x16000012); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- case CHIP_FIJI: +- buffer[count++] = cpu_to_le32(0x3a00161a); +- buffer[count++] = cpu_to_le32(0x0000002e); +- break; +- case CHIP_TOPAZ: +- case CHIP_CARRIZO: +- buffer[count++] = cpu_to_le32(0x00000002); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- case CHIP_STONEY: +- buffer[count++] = cpu_to_le32(0x00000000); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- default: +- buffer[count++] = cpu_to_le32(0x00000000); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- } ++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); ++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch new file mode 100644 index 00000000..ed7262c9 --- /dev/null +++ b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch @@ -0,0 +1,166 @@ +From 2ce0f44274368b2a6640c3062eb119a0de8c1056 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 7 Dec 2016 15:07:53 +0530 +Subject: [PATCH 08/10] used cached gca values for vi_read_register +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Using the cached values has less latency for bare metal +and SR-IOV, and prevents reading back bogus values if the +engine is powergated. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/vi.c | 115 +++++++++++++++++++++++++++++++++------- + 1 file changed, 96 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c +index 02ba429..3a42e83 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vi.c ++++ b/drivers/gpu/drm/amd/amdgpu/vi.c +@@ -513,21 +513,100 @@ static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = + {mmPA_SC_RASTER_CONFIG_1, false, true}, + }; + +-static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num, +- u32 sh_num, u32 reg_offset) ++static uint32_t vi_get_register_value(struct amdgpu_device *adev, ++ bool indexed, u32 se_num, ++ u32 sh_num, u32 reg_offset) + { +- uint32_t val; ++ if (indexed) { ++ uint32_t val; ++ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num; ++ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num; ++ ++ switch (reg_offset) { ++ case mmCC_RB_BACKEND_DISABLE: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable; ++ case mmGC_USER_RB_BACKEND_DISABLE: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable; ++ case mmPA_SC_RASTER_CONFIG: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config; ++ case mmPA_SC_RASTER_CONFIG_1: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1; ++ } + +- mutex_lock(&adev->grbm_idx_mutex); +- if (se_num != 0xffffffff || sh_num != 0xffffffff) +- gfx_v8_0_select_se_sh(adev, se_num, sh_num); ++ mutex_lock(&adev->grbm_idx_mutex); ++ if (se_num != 0xffffffff || sh_num != 0xffffffff) ++ gfx_v8_0_select_se_sh(adev, se_num, sh_num); + +- val = RREG32(reg_offset); ++ val = RREG32(reg_offset); + +- if (se_num != 0xffffffff || sh_num != 0xffffffff) +- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); +- return val; ++ if (se_num != 0xffffffff || sh_num != 0xffffffff) ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ mutex_unlock(&adev->grbm_idx_mutex); ++ return val; ++ } else { ++ unsigned idx; ++ ++ switch (reg_offset) { ++ case mmGB_ADDR_CONFIG: ++ return adev->gfx.config.gb_addr_config; ++ case mmMC_ARB_RAMCFG: ++ return adev->gfx.config.mc_arb_ramcfg; ++ case mmGB_TILE_MODE0: ++ case mmGB_TILE_MODE1: ++ case mmGB_TILE_MODE2: ++ case mmGB_TILE_MODE3: ++ case mmGB_TILE_MODE4: ++ case mmGB_TILE_MODE5: ++ case mmGB_TILE_MODE6: ++ case mmGB_TILE_MODE7: ++ case mmGB_TILE_MODE8: ++ case mmGB_TILE_MODE9: ++ case mmGB_TILE_MODE10: ++ case mmGB_TILE_MODE11: ++ case mmGB_TILE_MODE12: ++ case mmGB_TILE_MODE13: ++ case mmGB_TILE_MODE14: ++ case mmGB_TILE_MODE15: ++ case mmGB_TILE_MODE16: ++ case mmGB_TILE_MODE17: ++ case mmGB_TILE_MODE18: ++ case mmGB_TILE_MODE19: ++ case mmGB_TILE_MODE20: ++ case mmGB_TILE_MODE21: ++ case mmGB_TILE_MODE22: ++ case mmGB_TILE_MODE23: ++ case mmGB_TILE_MODE24: ++ case mmGB_TILE_MODE25: ++ case mmGB_TILE_MODE26: ++ case mmGB_TILE_MODE27: ++ case mmGB_TILE_MODE28: ++ case mmGB_TILE_MODE29: ++ case mmGB_TILE_MODE30: ++ case mmGB_TILE_MODE31: ++ idx = (reg_offset - mmGB_TILE_MODE0); ++ return adev->gfx.config.tile_mode_array[idx]; ++ case mmGB_MACROTILE_MODE0: ++ case mmGB_MACROTILE_MODE1: ++ case mmGB_MACROTILE_MODE2: ++ case mmGB_MACROTILE_MODE3: ++ case mmGB_MACROTILE_MODE4: ++ case mmGB_MACROTILE_MODE5: ++ case mmGB_MACROTILE_MODE6: ++ case mmGB_MACROTILE_MODE7: ++ case mmGB_MACROTILE_MODE8: ++ case mmGB_MACROTILE_MODE9: ++ case mmGB_MACROTILE_MODE10: ++ case mmGB_MACROTILE_MODE11: ++ case mmGB_MACROTILE_MODE12: ++ case mmGB_MACROTILE_MODE13: ++ case mmGB_MACROTILE_MODE14: ++ case mmGB_MACROTILE_MODE15: ++ idx = (reg_offset - mmGB_MACROTILE_MODE0); ++ return adev->gfx.config.macrotile_mode_array[idx]; ++ default: ++ return RREG32(reg_offset); ++ } ++ } + } + + static int vi_read_register(struct amdgpu_device *adev, u32 se_num, +@@ -562,10 +641,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, + if (reg_offset != asic_register_entry->reg_offset) + continue; + if (!asic_register_entry->untouched) +- *value = asic_register_entry->grbm_indexed ? +- vi_read_indexed_register(adev, se_num, +- sh_num, reg_offset) : +- RREG32(reg_offset); ++ *value = vi_get_register_value(adev, ++ asic_register_entry->grbm_indexed, ++ se_num, sh_num, reg_offset); + return 0; + } + } +@@ -575,10 +653,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, + continue; + + if (!vi_allowed_read_registers[i].untouched) +- *value = vi_allowed_read_registers[i].grbm_indexed ? +- vi_read_indexed_register(adev, se_num, +- sh_num, reg_offset) : +- RREG32(reg_offset); ++ *value = vi_get_register_value(adev, ++ vi_allowed_read_registers[i].grbm_indexed, ++ se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch new file mode 100644 index 00000000..3ee7aa52 --- /dev/null +++ b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch @@ -0,0 +1,26 @@ +From 7dc74a872ca0a5502f2c8e56fdfd9af97b8da1b6 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 7 Dec 2016 21:00:00 +0530 +Subject: [PATCH 09/10] Removed extra parameter + +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index dcc59f3..d1cb4db 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3648,7 +3648,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + /* cache the values for userspace */ + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { +- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); ++ gfx_v8_0_select_se_sh(adev, i, j); + adev->gfx.config.rb_config[i][j].rb_backend_disable = + RREG32(mmCC_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].user_rb_backend_disable = +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch new file mode 100644 index 00000000..4416497c --- /dev/null +++ b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch @@ -0,0 +1,204 @@ +From c5ef870413c64c25cfe2a646c395b0c0d293a4f5 Mon Sep 17 00:00:00 2001 +From: Ravi Patlegar <ravi.patlegar@amd.com> +Date: Tue, 13 Dec 2016 16:28:54 +0530 +Subject: [PATCH 10/10] refine pg code for gfx_v8. + +1. bit CP_PG_DISABLE was reversed. +2. load RLC_SRM_INDEX_CNTL_ADDR/DATA_x pairs + with valid addr/data. +3. always init gfx pg. +4. delete repeated check for pg mask. + +Signed-off-by: Rex Zhu <Rex.Zhu@amd.com> +Signed-off-by: Ravi Patlegar <ravi.patlegar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 118 ++++++++++++---------------------- + 2 files changed, 44 insertions(+), 76 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 40497c2..af04d3b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -2211,6 +2211,8 @@ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev); + #define REG_GET_FIELD(value, reg, field) \ + (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field)) + ++#define WREG32_FIELD(reg, field, val) \ ++ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + /* + * BIOS helpers. + */ +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index d1cb4db..b4c41f9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3934,8 +3934,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) + temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; + data = mmRLC_SRM_INDEX_CNTL_DATA_0; + for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { +- amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); +- amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); ++ if (unique_indices[i] != 0) { ++ amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); ++ amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); ++ } + } + kfree(register_list_format); + +@@ -3955,32 +3957,17 @@ static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) + { + uint32_t data; + +- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG | +- AMDGPU_PG_SUPPORT_GFX_SMG | +- AMDGPU_PG_SUPPORT_GFX_DMG)) { +- data = RREG32(mmCP_RB_WPTR_POLL_CNTL); +- data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; +- data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); +- WREG32(mmCP_RB_WPTR_POLL_CNTL, data); +- +- data = 0; +- data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); +- WREG32(mmRLC_PG_DELAY, data); +- +- data = RREG32(mmRLC_PG_DELAY_2); +- data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; +- data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); +- WREG32(mmRLC_PG_DELAY_2, data); +- +- data = RREG32(mmRLC_AUTO_PG_CTRL); +- data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; +- data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); +- WREG32(mmRLC_AUTO_PG_CTRL, data); +- } +- } ++ WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); ++ ++ data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); ++ WREG32(mmRLC_PG_DELAY, data); ++ ++ WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); ++ WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); ++} + + static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, + bool enable) +@@ -4016,18 +4003,8 @@ static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, + + static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) + { +- u32 data, orig; +- +- orig = data = RREG32(mmRLC_PG_CNTL); +- +- if (enable) +- data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; +- else +- data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; +- +- if (orig != data) +- WREG32(mmRLC_PG_CNTL, data); +- } ++ WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); ++} + + static void polaris11_init_power_gating(struct amdgpu_device *adev) + { +@@ -4062,39 +4039,30 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev) + + static void gfx_v8_0_init_pg(struct amdgpu_device *adev) + { +- if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | +- AMD_PG_SUPPORT_GFX_SMG | +- AMD_PG_SUPPORT_GFX_DMG | +- AMD_PG_SUPPORT_CP | +- AMD_PG_SUPPORT_GDS | +- AMD_PG_SUPPORT_RLC_SMU_HS)) { +- gfx_v8_0_init_csb(adev); +- gfx_v8_0_init_save_restore_list(adev); +- gfx_v8_0_enable_save_restore_machine(adev); ++ gfx_v8_0_init_csb(adev); ++ gfx_v8_0_init_save_restore_list(adev); ++ gfx_v8_0_enable_save_restore_machine(adev); + +- if ((adev->asic_type == CHIP_CARRIZO) || +- (adev->asic_type == CHIP_STONEY)) { +- struct amdgpu_cu_info cu_info; +- +- gfx_v8_0_get_cu_info(adev, &cu_info); +- +- WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); +- gfx_v8_0_init_power_gating(adev); +- WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask); +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) { +- cz_enable_sck_slow_down_on_power_up(adev, true); +- cz_enable_sck_slow_down_on_power_down(adev, true); +- } else { +- cz_enable_sck_slow_down_on_power_up(adev, false); +- cz_enable_sck_slow_down_on_power_down(adev, false); +- } +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP) +- cz_enable_cp_power_gating(adev, true); +- else +- cz_enable_cp_power_gating(adev, false); +- } else if (adev->asic_type == CHIP_POLARIS11) { +- polaris11_init_power_gating(adev); ++ if ((adev->asic_type == CHIP_CARRIZO) || ++ (adev->asic_type == CHIP_STONEY)) { ++ struct amdgpu_cu_info cu_info; ++ gfx_v8_0_get_cu_info(adev, &cu_info); ++ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); ++ gfx_v8_0_init_power_gating(adev); ++ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask); ++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) { ++ cz_enable_sck_slow_down_on_power_up(adev, true); ++ cz_enable_sck_slow_down_on_power_down(adev, true); ++ } else { ++ cz_enable_sck_slow_down_on_power_up(adev, false); ++ cz_enable_sck_slow_down_on_power_down(adev, false); + } ++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP) ++ cz_enable_cp_power_gating(adev, true); ++ else ++ cz_enable_cp_power_gating(adev, false); ++ } else if (adev->asic_type == CHIP_POLARIS11) { ++ polaris11_init_power_gating(adev); + } + } + +@@ -5513,7 +5481,7 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, + if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PIPELINE) + cz_enable_gfx_pipeline_power_gating(adev, true); + } else { +- cz_enable_gfx_cg_power_gating(adev, false); ++ cz_enable_gfx_cg_power_gating(adev, true); + cz_enable_gfx_pipeline_power_gating(adev, false); + } + } +@@ -5524,14 +5492,12 @@ static int gfx_v8_0_set_powergating_state(void *handle, + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + +- if (!(adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) +- return 0; + + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) +- cz_update_gfx_cg_power_gating(adev, enable); ++ ++ cz_update_gfx_cg_power_gating(adev, enable); + + if ((adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG) && enable) + gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc index cc005c0c..f4fdebb6 100644 --- a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc +++ b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc @@ -1139,3 +1139,14 @@ patch 1137-drm-amdgpu-acp-fix-resume-on-CZ-systems-with-AZ-audi.patch patch 0001-amdgpu-fix-various-compilation-issues.patch patch CVE-2016-5195.patch patch 0001-random-replace-non-blocking-pool-with-a-Chacha20-bas.patch +patch 1138-add-new-semaphore-object-in-kernel-side.patch +patch 1139-unify-memory-query-info-interface.patch +patch 1140-dma-buf-return-index-of-the-first-signaled-fence.patch +patch 1141-Fix-a-deadlock-affecting-ww_mutexes.patch +patch 1142-add-additional-cached-gca-config-variables.patch +patch 1143-implement-raster-configuration-for-gfx-v8.patch +patch 1144-cache-rb-config-values.patch +patch 1145-use-cached-raster-config-values-in-csb.patch +patch 1146-used-cached-gca-values-for-vi_read_register.patch +patch 1147-Removed-extra-parameter.patch +patch 1148-refine-pg-code-for-gfx_v8.patch |