aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux
diff options
context:
space:
mode:
authorSanjay R Mehta <sanju.mehta@amd.com>2017-01-03 12:40:56 +0530
committerSanjay R Mehta <sanju.mehta@amd.com>2017-01-03 12:49:01 +0530
commit1333036e1b3d84925a063a1ab283a3987402d2b2 (patch)
treef6a6ea6b78c3d04545ad35afddc61c5ab05866c1 /common/recipes-kernel/linux
parentebf7f30a66eee6d3ea2f24c08cdb19654c8249c3 (diff)
downloadmeta-amd-1333036e1b3d84925a063a1ab283a3987402d2b2.tar.gz
meta-amd-1333036e1b3d84925a063a1ab283a3987402d2b2.tar.bz2
meta-amd-1333036e1b3d84925a063a1ab283a3987402d2b2.zip
kernel: Add support for vulkan semaphore and cache raster config values
Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
Diffstat (limited to 'common/recipes-kernel/linux')
-rw-r--r--common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch504
-rw-r--r--common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch113
-rw-r--r--common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch188
-rw-r--r--common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch121
-rw-r--r--common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch51
-rw-r--r--common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch262
-rw-r--r--common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch46
-rw-r--r--common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch61
-rw-r--r--common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch166
-rw-r--r--common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch26
-rw-r--r--common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch204
-rw-r--r--common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc11
12 files changed, 1753 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch
new file mode 100644
index 00000000..f27f1afc
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch
@@ -0,0 +1,504 @@
+From d29a89414316f4c54a1a619527398714b091d3db Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Mon, 14 Nov 2016 12:26:18 +0530
+Subject: [PATCH] add new semaphore object in kernel side
+
+So that semaphore can be shared across porcess across devices.
+
+Change-Id: Ie82cace6af81e2ddf45f4bbf9f3c0dafd6bcc499
+Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/Makefile | 3 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 6 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 267 ++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h | 44 ++++
+ drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 29 +++
+ 8 files changed, 361 insertions(+), 4 deletions(-)
+ create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
+ create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
+index 28e8e4c..2acc7c1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/Makefile
++++ b/drivers/gpu/drm/amd/amdgpu/Makefile
+@@ -31,7 +31,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
+ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
+ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
+ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+- amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
++ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
++ amdgpu_sem.o
+
+ # add asic specific block
+ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index d3de21d..3f5d2ad 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1000,6 +1000,8 @@ struct amdgpu_ctx_ring {
+ uint64_t sequence;
+ struct fence **fences;
+ struct amd_sched_entity entity;
++ struct list_head sem_list;
++ struct mutex sem_lock;
+ /* client id */
+ u64 client_id;
+ };
+@@ -1699,6 +1701,8 @@ struct amdgpu_vce {
+ struct amdgpu_irq_src irq;
+ unsigned harvest_config;
+ struct amd_sched_entity entity;
++ struct list_head sem_list;
++ struct mutex sem_lock;
+ };
+
+ /*
+@@ -1872,6 +1876,13 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+ int amdgpu_freesync_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
++int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *filp);
++
++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++ struct amdgpu_sync *sync);
++
++
+ /* VRAM scratch page for HDP bug, default vram page */
+ struct amdgpu_vram_scratch {
+ struct amdgpu_bo *robj;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index 0d1346c..bb6057a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -882,7 +882,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ }
+ }
+
+- return 0;
++ return amdgpu_sem_add_cs(p->ctx, p->job->ring, &p->job->sync);
+ }
+
+ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+index 17e1362..a020e22 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+@@ -42,6 +42,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ ctx->rings[i].sequence = 1;
+ ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
++ INIT_LIST_HEAD(&ctx->rings[i].sem_list);
++ mutex_init(&ctx->rings[i].sem_lock);
+ }
+ /* create context entity for each ring */
+ for (i = 0; i < adev->num_rings; i++) {
+@@ -74,8 +76,10 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+ return;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+- for (j = 0; j < amdgpu_sched_jobs; ++j)
++ for (j = 0; j < amdgpu_sched_jobs; ++j) {
+ fence_put(ctx->rings[i].fences[j]);
++ mutex_destroy(&ctx->rings[i].sem_lock);
++ }
+ kfree(ctx->fences);
+
+ for (i = 0; i < adev->num_rings; i++)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index f6ae587..a48783e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -786,6 +786,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+- DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER)
++ DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER),
++ DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+ };
+ const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
+new file mode 100644
+index 0000000..db16baa
+--- /dev/null
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
+@@ -0,0 +1,267 @@
++/*
++ * Copyright 2016 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors:
++ * Chunming Zhou <david1.zhou@amd.com>
++ */
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/kernel.h>
++#include <linux/poll.h>
++#include <linux/seq_file.h>
++#include <linux/export.h>
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/uaccess.h>
++#include <linux/anon_inodes.h>
++#include "amdgpu_sem.h"
++#include "amdgpu.h"
++#include <drm/drmP.h>
++
++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv,
++ struct drm_amdgpu_sem_in *in,
++ struct amdgpu_sem *sem);
++
++static const struct file_operations amdgpu_sem_fops;
++
++static struct amdgpu_sem *amdgpu_sem_alloc(struct fence *fence)
++{
++ struct amdgpu_sem *sem;
++
++ sem = kzalloc(sizeof(struct amdgpu_sem), GFP_KERNEL);
++ if (!sem)
++ return NULL;
++
++ sem->file = anon_inode_getfile("sem_file",
++ &amdgpu_sem_fops,
++ sem, 0);
++ if (IS_ERR(sem->file))
++ goto err;
++
++ kref_init(&sem->kref);
++ INIT_LIST_HEAD(&sem->list);
++ /* fence should be get before passing here */
++ sem->fence = fence;
++
++ return sem;
++err:
++ kfree(sem);
++ return NULL;
++}
++
++static void amdgpu_sem_free(struct kref *kref)
++{
++ struct amdgpu_sem *sem = container_of(
++ kref, struct amdgpu_sem, kref);
++
++ fence_put(sem->fence);
++ kfree(sem);
++}
++
++static int amdgpu_sem_release(struct inode *inode, struct file *file)
++{
++ struct amdgpu_sem *sem = file->private_data;
++
++ kref_put(&sem->kref, amdgpu_sem_free);
++ return 0;
++}
++
++static unsigned int amdgpu_sem_poll(struct file *file, poll_table *wait)
++{
++ return 0;
++}
++
++static long amdgpu_sem_file_ioctl(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ return 0;
++}
++
++static const struct file_operations amdgpu_sem_fops = {
++ .release = amdgpu_sem_release,
++ .poll = amdgpu_sem_poll,
++ .unlocked_ioctl = amdgpu_sem_file_ioctl,
++ .compat_ioctl = amdgpu_sem_file_ioctl,
++};
++
++static int amdgpu_sem_create(void)
++{
++ return get_unused_fd_flags(O_CLOEXEC);
++}
++
++static int amdgpu_sem_signal(int fd, struct fence *fence)
++{
++ struct amdgpu_sem *sem;
++
++ sem = amdgpu_sem_alloc(fence);
++ if (!sem)
++ return -ENOMEM;
++ fd_install(fd, sem->file);
++
++ return 0;
++}
++
++static int amdgpu_sem_wait(int fd, struct amdgpu_fpriv *fpriv,
++ struct drm_amdgpu_sem_in *in)
++{
++ struct file *file = fget(fd);
++ struct amdgpu_sem *sem;
++ int r;
++
++ if (!file)
++ return -EINVAL;
++
++ sem = file->private_data;
++ if (!sem) {
++ r = -EINVAL;
++ goto err;
++ }
++ r = amdgpu_sem_cring_add(fpriv, in, sem);
++err:
++ fput(file);
++ return r;
++}
++
++static void amdgpu_sem_destroy(void)
++{
++ /* userspace should close fd when they try to destroy sem,
++ * closing fd will free semaphore object.
++ */
++}
++
++static struct fence *amdgpu_sem_get_fence(struct amdgpu_fpriv *fpriv,
++ struct drm_amdgpu_sem_in *in)
++{
++ struct amdgpu_ring *out_ring;
++ struct amdgpu_ctx *ctx;
++ struct fence *fence;
++ uint32_t ctx_id, ip_type, ip_instance, ring;
++ int r;
++
++ ctx_id = in->ctx_id;
++ ip_type = in->ip_type;
++ ip_instance = in->ip_instance;
++ ring = in->ring;
++ ctx = amdgpu_ctx_get(fpriv, ctx_id);
++ if (!ctx)
++ return NULL;
++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring,
++ &out_ring);
++ if (r) {
++ amdgpu_ctx_put(ctx);
++ return NULL;
++ }
++ /* get the last fence of this entity */
++ fence = amdgpu_ctx_get_fence(ctx, out_ring,
++ in->seq ? in->seq :
++ ctx->rings[out_ring->idx].sequence - 1);
++ amdgpu_ctx_put(ctx);
++
++ return fence;
++}
++
++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv,
++ struct drm_amdgpu_sem_in *in,
++ struct amdgpu_sem *sem)
++{
++ struct amdgpu_ring *out_ring;
++ struct amdgpu_ctx *ctx;
++ uint32_t ctx_id, ip_type, ip_instance, ring;
++ int r;
++
++ ctx_id = in->ctx_id;
++ ip_type = in->ip_type;
++ ip_instance = in->ip_instance;
++ ring = in->ring;
++ ctx = amdgpu_ctx_get(fpriv, ctx_id);
++ if (!ctx)
++ return -EINVAL;
++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring,
++ &out_ring);
++ if (r)
++ goto err;
++ mutex_lock(&ctx->rings[out_ring->idx].sem_lock);
++ list_add(&sem->list, &ctx->rings[out_ring->idx].sem_list);
++ mutex_unlock(&ctx->rings[out_ring->idx].sem_lock);
++
++err:
++ amdgpu_ctx_put(ctx);
++ return r;
++}
++
++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
++ struct amdgpu_sync *sync)
++{
++ struct amdgpu_sem *sem, *tmp;
++ int r = 0;
++
++ if (list_empty(&ctx->rings[ring->idx].sem_list))
++ return 0;
++
++ mutex_lock(&ctx->rings[ring->idx].sem_lock);
++ list_for_each_entry_safe(sem, tmp, &ctx->rings[ring->idx].sem_list,
++ list) {
++ r = amdgpu_sync_fence(ctx->adev, sync, sem->fence);
++ fence_put(sem->fence);
++ if (r)
++ goto err;
++ list_del(&sem->list);
++ kfree(sem);
++ }
++err:
++ mutex_unlock(&ctx->rings[ring->idx].sem_lock);
++ return r;
++}
++
++int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *filp)
++{
++ union drm_amdgpu_sem *args = data;
++ struct amdgpu_fpriv *fpriv = filp->driver_priv;
++ struct fence *fence;
++ int r = 0;
++ int fd = args->in.fd;
++
++ switch (args->in.op) {
++ case AMDGPU_SEM_OP_CREATE_SEM:
++ args->out.fd = amdgpu_sem_create();
++ break;
++ case AMDGPU_SEM_OP_WAIT_SEM:
++ r = amdgpu_sem_wait(fd, fpriv, &args->in);
++ break;
++ case AMDGPU_SEM_OP_SIGNAL_SEM:
++ fence = amdgpu_sem_get_fence(fpriv, &args->in);
++ if (IS_ERR(fence)) {
++ r = PTR_ERR(fence);
++ return r;
++ }
++ r = amdgpu_sem_signal(fd, fence);
++ fence_put(fence);
++ break;
++ case AMDGPU_SEM_OP_DESTROY_SEM:
++ amdgpu_sem_destroy();
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ return r;
++}
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h
+new file mode 100644
+index 0000000..56d59d3
+--- /dev/null
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h
+@@ -0,0 +1,44 @@
++/*
++ * Copyright 2016 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Chunming Zhou <david1.zhou@amd.com>
++ *
++ */
++
++
++#ifndef _LINUX_AMDGPU_SEM_H
++#define _LINUX_AMDGPU_SEM_H
++
++#include <linux/types.h>
++#include <linux/kref.h>
++#include <linux/ktime.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/fence.h>
++
++struct amdgpu_sem {
++ struct file *file;
++ struct kref kref;
++ struct fence *fence;
++ struct list_head list;
++};
++
++#endif /* _LINUX_AMDGPU_SEM_H */
+diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+index b06e3dc..65153bf 100644
+--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+@@ -46,6 +46,7 @@
+ #define DRM_AMDGPU_WAIT_CS 0x09
+ #define DRM_AMDGPU_GEM_OP 0x10
+ #define DRM_AMDGPU_GEM_USERPTR 0x11
++#define DRM_AMDGPU_SEM 0x5b
+ #define DRM_AMDGPU_FREESYNC 0x14
+
+ #define DRM_AMDGPU_WAIT_FENCES 0x5e
+@@ -64,6 +65,7 @@
+ #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+ #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+ #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync)
++#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
+
+ #define AMDGPU_GEM_DOMAIN_CPU 0x1
+ #define AMDGPU_GEM_DOMAIN_GTT 0x2
+@@ -185,6 +187,33 @@ union drm_amdgpu_ctx {
+ union drm_amdgpu_ctx_out out;
+ };
+
++/* sem related */
++#define AMDGPU_SEM_OP_CREATE_SEM 1
++#define AMDGPU_SEM_OP_WAIT_SEM 2
++#define AMDGPU_SEM_OP_SIGNAL_SEM 3
++#define AMDGPU_SEM_OP_DESTROY_SEM 4
++
++struct drm_amdgpu_sem_in {
++ /** AMDGPU_SEM_OP_* */
++ uint32_t op;
++ int32_t fd;
++ uint32_t ctx_id;
++ uint32_t ip_type;
++ uint32_t ip_instance;
++ uint32_t ring;
++ uint64_t seq;
++};
++
++union drm_amdgpu_sem_out {
++ int32_t fd;
++ uint32_t _pad;
++};
++
++union drm_amdgpu_sem {
++ struct drm_amdgpu_sem_in in;
++ union drm_amdgpu_sem_out out;
++};
++
+ /*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch
new file mode 100644
index 00000000..b7c965a3
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch
@@ -0,0 +1,113 @@
+From 314642915b4a2bda146fb9d900ca99eabeab36c0 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Mon, 14 Nov 2016 12:13:41 +0530
+Subject: [PATCH 01/10] unify memory query info interface
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Change-Id: I74d2b7379bc4febe714a91daf4e1786895de90f2
+Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
+Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 32 +++++++++++++++++++++++
+ drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 32 +++++++++++++++++++++++
+ 2 files changed, 64 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 31c20ba..a48783e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -390,6 +390,38 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ return copy_to_user(out, &vram_gtt,
+ min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
+ }
++
++ case AMDGPU_INFO_MEMORY: {
++ struct drm_amdgpu_memory_info mem;
++
++ memset(&mem, 0, sizeof(mem));
++ mem.vram.total_heap_size = adev->mc.real_vram_size;
++ mem.vram.usable_heap_size =
++ adev->mc.real_vram_size - adev->vram_pin_size;
++ mem.vram.heap_usage = atomic64_read(&adev->vram_usage);
++ mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
++
++ mem.cpu_accessible_vram.total_heap_size =
++ adev->mc.visible_vram_size;
++ mem.cpu_accessible_vram.usable_heap_size =
++ adev->mc.visible_vram_size -
++ (adev->vram_pin_size - adev->invisible_pin_size);
++ mem.cpu_accessible_vram.heap_usage =
++ atomic64_read(&adev->vram_vis_usage);
++ mem.cpu_accessible_vram.max_allocation =
++ mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
++
++ mem.gtt.total_heap_size = adev->mc.gtt_size;
++ mem.gtt.usable_heap_size =
++ adev->mc.gtt_size - adev->gart_pin_size;
++ mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage);
++ mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
++
++ return copy_to_user(out, &mem,
++ min((size_t)size, sizeof(mem)))
++ ? -EFAULT : 0;
++ }
++
+ case AMDGPU_INFO_READ_MMR_REG: {
+ unsigned n, alloc_size;
+ uint32_t *regs;
+diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+index 4d7d982..3f13a87 100644
+--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+@@ -540,6 +540,10 @@ struct drm_amdgpu_cs_chunk_data {
+ #define AMDGPU_INFO_VIS_VRAM_USAGE 0x17
+ /* virtual range */
+ #define AMDGPU_INFO_VIRTUAL_RANGE 0x18
++
++/* Query memory about VRAM and GTT domains */
++#define AMDGPU_INFO_MEMORY 0x19
++
+ /* gpu capability */
+ #define AMDGPU_INFO_CAPABILITY 0x50
+ /* query pin memory capability */
+@@ -705,6 +709,34 @@ struct drm_amdgpu_info_hw_ip {
+ __u32 _pad;
+ };
+
++struct drm_amdgpu_heap_info {
++ /** max. physical memory */
++ __u64 total_heap_size;
++
++ /** Theoretical max. available memory in the given heap */
++ __u64 usable_heap_size;
++
++ /**
++ * Number of bytes allocated in the heap. This includes all processes
++ * and private allocations in the kernel. It changes when new buffers
++ * are allocated, freed, and moved. It cannot be larger than
++ * heap_size.
++ */
++ __u64 heap_usage;
++
++ /**
++ * Theoretical possible max. size of buffer which
++ * could be allocated in the given heap
++ */
++ __u64 max_allocation;
++};
++
++struct drm_amdgpu_memory_info {
++ struct drm_amdgpu_heap_info vram;
++ struct drm_amdgpu_heap_info cpu_accessible_vram;
++ struct drm_amdgpu_heap_info gtt;
++};
++
+ /*
+ * Supported GPU families
+ */
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch
new file mode 100644
index 00000000..76815764
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch
@@ -0,0 +1,188 @@
+From 7e06443930ab2fabda1977c20ff82ff6bc42e3be Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Tue, 15 Nov 2016 14:30:58 +0530
+Subject: [PATCH 02/10] dma-buf: return index of the first signaled fence
+
+Return the index of the first signaled fence. This information
+is useful in some APIs like Vulkan.
+
+Signed-off-by: monk.liu <monk.liu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/dma-buf/fence.c | 19 ++++++++++++++-----
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +-
+ drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 21 +++++++++++----------
+ include/linux/fence.h | 2 +-
+ 5 files changed, 30 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
+index 7b05dbe..192f99b 100644
+--- a/drivers/dma-buf/fence.c
++++ b/drivers/dma-buf/fence.c
+@@ -398,14 +398,17 @@ out:
+ EXPORT_SYMBOL(fence_default_wait);
+
+ static bool
+-fence_test_signaled_any(struct fence **fences, uint32_t count)
++fence_test_signaled_any(struct fence **fences, uint32_t count, uint32_t *idx)
+ {
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ struct fence *fence = fences[i];
+- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
++ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
++ if (idx)
++ *idx = i;
+ return true;
++ }
+ }
+ return false;
+ }
+@@ -417,6 +420,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count)
+ * @count: [in] number of fences to wait on
+ * @intr: [in] if true, do an interruptible wait
+ * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
++ * @idx: [out] the first signaled fence index, meaninful only on Returns positive
+ *
+ * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
+ * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
+@@ -428,7 +432,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count)
+ */
+ signed long
+ fence_wait_any_timeout(struct fence **fences, uint32_t count,
+- bool intr, signed long timeout)
++ bool intr, signed long timeout, uint32_t *idx)
+ {
+ struct default_wait_cb *cb;
+ signed long ret = timeout;
+@@ -439,8 +443,11 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count,
+
+ if (timeout == 0) {
+ for (i = 0; i < count; ++i)
+- if (fence_is_signaled(fences[i]))
++ if (fence_is_signaled(fences[i])) {
++ if (idx)
++ *idx = i;
+ return 1;
++ }
+
+ return 0;
+ }
+@@ -463,6 +470,8 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count,
+ if (fence_add_callback(fence, &cb[i].base,
+ fence_default_wait_cb)) {
+ /* This fence is already signaled */
++ if (idx)
++ *idx = i;
+ goto fence_rm_cb;
+ }
+ }
+@@ -473,7 +482,7 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count,
+ else
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+- if (fence_test_signaled_any(fences, count))
++ if (fence_test_signaled_any(fences, count, idx))
+ break;
+
+ ret = schedule_timeout(ret);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+index bb6057a..181e2b7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -1107,6 +1107,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+ {
+ unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+ uint32_t fence_count = wait->in.fence_count;
++ uint32_t first = ~0;
+ struct fence **array;
+ unsigned i;
+ long r;
+@@ -1132,13 +1133,14 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+ }
+ }
+
+- r = fence_wait_any_timeout(array, fence_count, true, timeout);
++ r = fence_wait_any_timeout(array, fence_count, true, timeout, &first);
+ if (r < 0)
+ goto err_free_fence_array;
+
+ out:
+ memset(wait, 0, sizeof(*wait));
+ wait->out.status = (r > 0);
++ wait->out.first_signaled = first;
+ /* set return value 0 to indicate success */
+ r = 0;
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+index 8bf84ef..9f4311c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+@@ -360,7 +360,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ if (count) {
+ spin_unlock(&sa_manager->wq.lock);
+ t = fence_wait_any_timeout(fences, count, false,
+- MAX_SCHEDULE_TIMEOUT);
++ MAX_SCHEDULE_TIMEOUT, NULL);
+ for (i = 0; i < count; ++i)
+ fence_put(fences[i]);
+
+diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+index 3f13a87..c2f06eb 100644
+--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h
+@@ -334,23 +334,24 @@ union drm_amdgpu_wait_cs {
+ };
+
+ struct drm_amdgpu_fence {
+- uint32_t ctx_id;
+- uint32_t ip_type;
+- uint32_t ip_instance;
+- uint32_t ring;
+- uint64_t seq_no;
++ __u32 ctx_id;
++ __u32 ip_type;
++ __u32 ip_instance;
++ __u32 ring;
++ __u64 seq_no;
+ };
+
+ struct drm_amdgpu_wait_fences_in {
+ /** This points to uint64_t * which points to fences */
+- uint64_t fences;
+- uint32_t fence_count;
+- uint32_t wait_all;
+- uint64_t timeout_ns;
++ __u64 fences;
++ __u32 fence_count;
++ __u32 wait_all;
++ __u64 timeout_ns;
+ };
+
+ struct drm_amdgpu_wait_fences_out {
+- uint64_t status;
++ __u32 status;
++ __u32 first_signaled;
+ };
+
+ union drm_amdgpu_wait_fences {
+diff --git a/include/linux/fence.h b/include/linux/fence.h
+index bb52201..b8da489 100644
+--- a/include/linux/fence.h
++++ b/include/linux/fence.h
+@@ -322,7 +322,7 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
+
+ signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
+ signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
+- bool intr, signed long timeout);
++ bool intr, signed long timeout, uint32_t *idx);
+
+ /**
+ * fence_wait - sleep until the fence gets signaled
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch
new file mode 100644
index 00000000..25312ef1
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch
@@ -0,0 +1,121 @@
+From 5f3c992c00f95a483cf01d55b8ff0fa1fe6df216 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Wed, 23 Nov 2016 14:54:46 +0530
+Subject: [PATCH 03/10] Fix a deadlock affecting ww_mutexes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+his patch fixes a race condition involving 4 threads and 2 ww_mutexes
+as indicated in the following example. Acquire context stamps are ordered
+like the thread numbers, i.e. thread #1 should back off when it encounters
+a mutex locked by thread #0 etc.
+
+Thread #0 Thread #1 Thread #2 Thread #3
+--------- --------- --------- ---------
+ lock(ww)
+ lock(ww')
+ lock(ww)
+ lock(ww)
+ unlock(ww) part 1
+lock(ww)
+ unlock(ww) part 2
+ back off
+lock(ww')
+
+Here, unlock(ww) part 1 is the part that sets lock->base.count to 1
+(without being protected by lock->base.wait_lock), meaning that thread #0
+can acquire ww in the fast path. Since lock->base.count == 0, thread #0
+won't wake up any of the waiters.
+
+Then, unlock(ww) part 2 wakes up _only_the_first_ waiter of ww. This is
+thread #2, since waiters are added at the tail. Thread #2 wakes up and
+backs off since it sees ww owned by a context with a lower stamp.
+
+Meanwhile, thread #1 is never woken up, and so it won't back off its lock
+on ww'. So thread #0 gets stuck waiting for ww' to be released.
+
+This patch fixes the deadlock by waking up all waiters in the slow path
+of ww_mutex_unlock.
+
+We have an internal test case for amdgpu which continuously submits
+command streams from tens of threads, where all command stream reference
+hundreds of GPU buffer objects with a lot of overlap in the buffer lists
+between command streams. This test reliably caused a deadlock, and while I
+haven't completely confirmed that it is exactly the scenario outlined
+above, this patch does fix the test case.
+
+Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ kernel/locking/mutex.c | 26 ++++++++++++++++++++++----
+ 1 file changed, 22 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
+index 0551c21..39fa58a 100644
+--- a/kernel/locking/mutex.c
++++ b/kernel/locking/mutex.c
+@@ -409,6 +409,10 @@ static bool mutex_optimistic_spin(struct mutex *lock,
+ __visible __used noinline
+ void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+
++static __used noinline
++void __sched __mutex_unlock_slowpath_wakeall(atomic_t *lock_count);
++
++
+ /**
+ * mutex_unlock - release the mutex
+ * @lock: the mutex to be released
+@@ -473,7 +477,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
+ */
+ mutex_clear_owner(&lock->base);
+ #endif
+- __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
++ __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath_wakeall);
+ }
+ EXPORT_SYMBOL(ww_mutex_unlock);
+
+@@ -713,7 +717,7 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
+ * Release the lock, slowpath:
+ */
+ static inline void
+-__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
++__mutex_unlock_common_slowpath(struct mutex *lock, int nested, int wake_all)
+ {
+ unsigned long flags;
+
+@@ -736,7 +740,13 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
+ mutex_release(&lock->dep_map, nested, _RET_IP_);
+ debug_mutex_unlock(lock);
+
+- if (!list_empty(&lock->wait_list)) {
++ if (wake_all) {
++ struct mutex_waiter *waiter;
++ list_for_each_entry(waiter, &lock->wait_list, list) {
++ debug_mutex_wake_waiter(lock, waiter);
++ wake_up_process(waiter->task);
++ }
++ } else if (!list_empty(&lock->wait_list)) {
+ /* get the first entry from the wait-list: */
+ struct mutex_waiter *waiter =
+ list_entry(lock->wait_list.next,
+@@ -758,7 +768,15 @@ __mutex_unlock_slowpath(atomic_t *lock_count)
+ {
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+- __mutex_unlock_common_slowpath(lock, 1);
++ __mutex_unlock_common_slowpath(lock, 1, 0);
++}
++
++static void
++__mutex_unlock_slowpath_wakeall(atomic_t *lock_count)
++{
++ struct mutex *lock = container_of(lock_count, struct mutex, count);
++
++ __mutex_unlock_common_slowpath(lock, 1, 1);
+ }
+
+ #ifndef CONFIG_DEBUG_LOCK_ALLOC
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch
new file mode 100644
index 00000000..c9c426f9
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch
@@ -0,0 +1,51 @@
+From caa6b72d0c01491114f017fe3bca7adc05194611 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Tue, 6 Dec 2016 17:07:10 +0530
+Subject: [PATCH 04/10] add additional cached gca config variables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We need to cache some additional values to handle SR-IOV
+and PG.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 3f5d2ad..40497c2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1135,6 +1135,16 @@ struct amdgpu_scratch {
+ /*
+ * GFX configurations
+ */
++#define AMDGPU_GFX_MAX_SE 4
++#define AMDGPU_GFX_MAX_SH_PER_SE 2
++
++struct amdgpu_rb_config {
++ uint32_t rb_backend_disable;
++ uint32_t user_rb_backend_disable;
++ uint32_t raster_config;
++ uint32_t raster_config_1;
++};
++
+ struct amdgpu_gca_config {
+ unsigned max_shader_engines;
+ unsigned max_tile_pipes;
+@@ -1163,6 +1173,8 @@ struct amdgpu_gca_config {
+
+ uint32_t tile_mode_array[32];
+ uint32_t macrotile_mode_array[16];
++
++ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+ };
+
+ struct amdgpu_gfx {
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch
new file mode 100644
index 00000000..c1a271f2
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch
@@ -0,0 +1,262 @@
+From 705f105de150240594945703df70f82d5ab861ce Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Tue, 6 Dec 2016 19:33:01 +0530
+Subject: [PATCH 05/10] implement raster configuration for gfx v8
+
+This patch is to implement the raster configuration and harvested
+configuration of gfx v8.
+
+Signed-off-by: Huang Rui <ray.huang@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 168 +++++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdgpu/vid.h | 37 ++++++++
+ 2 files changed, 204 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index c5a3d04..20ac07f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -3484,13 +3484,163 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+ return (~data) & mask;
+ }
+
++static void
++gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
++{
++ switch (adev->asic_type) {
++ case CHIP_FIJI:
++ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
++ RB_XSEL2(1) | PKR_MAP(2) |
++ PKR_XSEL(1) | PKR_YSEL(1) |
++ SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
++ SE_PAIR_YSEL(2);
++ break;
++ case CHIP_TONGA:
++ case CHIP_POLARIS10:
++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
++ SE_XSEL(1) | SE_YSEL(1);
++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
++ SE_PAIR_YSEL(2);
++ break;
++ case CHIP_TOPAZ:
++ case CHIP_CARRIZO:
++ *rconf |= RB_MAP_PKR0(2);
++ *rconf1 |= 0x0;
++ break;
++ case CHIP_POLARIS11:
++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
++ SE_XSEL(1) | SE_YSEL(1);
++ *rconf1 |= 0x0;
++ break;
++ case CHIP_STONEY:
++ *rconf |= 0x0;
++ *rconf1 |= 0x0;
++ break;
++ default:
++ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
++ break;
++ }
++}
++
++static void
++gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
++ u32 raster_config, u32 raster_config_1,
++ unsigned rb_mask, unsigned num_rb)
++{
++ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
++ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
++ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
++ unsigned rb_per_se = num_rb / num_se;
++ unsigned se_mask[4];
++ unsigned se;
++
++ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
++ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
++ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
++ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
++
++ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
++ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
++ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
++
++ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
++ (!se_mask[2] && !se_mask[3]))) {
++ raster_config_1 &= ~SE_PAIR_MAP_MASK;
++
++ if (!se_mask[0] && !se_mask[1]) {
++ raster_config_1 |=
++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
++ } else {
++ raster_config_1 |=
++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
++ }
++ }
++
++ for (se = 0; se < num_se; se++) {
++ unsigned raster_config_se = raster_config;
++ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
++ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
++ int idx = (se / 2) * 2;
++
++ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
++ raster_config_se &= ~SE_MAP_MASK;
++
++ if (!se_mask[idx]) {
++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
++ } else {
++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
++ }
++ }
++
++ pkr0_mask &= rb_mask;
++ pkr1_mask &= rb_mask;
++ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
++ raster_config_se &= ~PKR_MAP_MASK;
++
++ if (!pkr0_mask) {
++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
++ } else {
++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
++ }
++ }
++
++ if (rb_per_se >= 2) {
++ unsigned rb0_mask = 1 << (se * rb_per_se);
++ unsigned rb1_mask = rb0_mask << 1;
++
++ rb0_mask &= rb_mask;
++ rb1_mask &= rb_mask;
++ if (!rb0_mask || !rb1_mask) {
++ raster_config_se &= ~RB_MAP_PKR0_MASK;
++
++ if (!rb0_mask) {
++ raster_config_se |=
++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
++ } else {
++ raster_config_se |=
++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
++ }
++ }
++
++ if (rb_per_se > 2) {
++ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
++ rb1_mask = rb0_mask << 1;
++ rb0_mask &= rb_mask;
++ rb1_mask &= rb_mask;
++ if (!rb0_mask || !rb1_mask) {
++ raster_config_se &= ~RB_MAP_PKR1_MASK;
++
++ if (!rb0_mask) {
++ raster_config_se |=
++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
++ } else {
++ raster_config_se |=
++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
++ }
++ }
++ }
++ }
++
++ /* GRBM_GFX_INDEX has a different offset on VI */
++ gfx_v8_0_select_se_sh(adev, se, 0xffffffff);
++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
++ }
++
++ /* GRBM_GFX_INDEX has a different offset on VI */
++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++}
++
+ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+ {
+ int i, j;
+ u32 data;
++ u32 raster_config = 0, raster_config_1 = 0;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
++ unsigned num_rb_pipes;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+@@ -3502,10 +3652,26 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+ }
+ }
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+- mutex_unlock(&adev->grbm_idx_mutex);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
++
++ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
++ adev->gfx.config.max_shader_engines, 16);
++
++ gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
++
++ if (!adev->gfx.config.backend_enable_mask ||
++ adev->gfx.config.num_rbs >= num_rb_pipes) {
++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
++ } else {
++ gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
++ adev->gfx.config.backend_enable_mask,
++ num_rb_pipes);
++ }
++
++ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ /**
+diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
+index 3bf7172..4bd2bfd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vid.h
++++ b/drivers/gpu/drm/amd/amdgpu/vid.h
+@@ -368,4 +368,41 @@
+ #define VCE_CMD_IB_AUTO 0x00000005
+ #define VCE_CMD_SEMAPHORE 0x00000006
+
++
++/* mmPA_SC_RASTER_CONFIG mask */
++#define RB_MAP_PKR0(x) ((x) << 0)
++#define RB_MAP_PKR0_MASK (0x3 << 0)
++#define RB_MAP_PKR1(x) ((x) << 2)
++#define RB_MAP_PKR1_MASK (0x3 << 2)
++#define RB_XSEL2(x) ((x) << 4)
++#define RB_XSEL2_MASK (0x3 << 4)
++#define RB_XSEL (1 << 6)
++#define RB_YSEL (1 << 7)
++#define PKR_MAP(x) ((x) << 8)
++#define PKR_MAP_MASK (0x3 << 8)
++#define PKR_XSEL(x) ((x) << 10)
++#define PKR_XSEL_MASK (0x3 << 10)
++#define PKR_YSEL(x) ((x) << 12)
++#define PKR_YSEL_MASK (0x3 << 12)
++#define SC_MAP(x) ((x) << 16)
++#define SC_MAP_MASK (0x3 << 16)
++#define SC_XSEL(x) ((x) << 18)
++#define SC_XSEL_MASK (0x3 << 18)
++#define SC_YSEL(x) ((x) << 20)
++#define SC_YSEL_MASK (0x3 << 20)
++#define SE_MAP(x) ((x) << 24)
++#define SE_MAP_MASK (0x3 << 24)
++#define SE_XSEL(x) ((x) << 26)
++#define SE_XSEL_MASK (0x3 << 26)
++#define SE_YSEL(x) ((x) << 28)
++#define SE_YSEL_MASK (0x3 << 28)
++
++/* mmPA_SC_RASTER_CONFIG_1 mask */
++#define SE_PAIR_MAP(x) ((x) << 0)
++#define SE_PAIR_MAP_MASK (0x3 << 0)
++#define SE_PAIR_XSEL(x) ((x) << 2)
++#define SE_PAIR_XSEL_MASK (0x3 << 2)
++#define SE_PAIR_YSEL(x) ((x) << 4)
++#define SE_PAIR_YSEL_MASK (0x3 << 4)
++
+ #endif
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch
new file mode 100644
index 00000000..0c8fe273
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch
@@ -0,0 +1,46 @@
+From 0aaf3d10e376981da3d92f037c6e36a5c4e8d348 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Tue, 6 Dec 2016 19:40:46 +0530
+Subject: [PATCH 06/10] cache rb config values
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Needed when for SR-IOV and when PG is enabled.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 20ac07f..479047e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -3671,6 +3671,21 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+ num_rb_pipes);
+ }
+
++ /* cache the values for userspace */
++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
++ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
++ adev->gfx.config.rb_config[i][j].rb_backend_disable =
++ RREG32(mmCC_RB_BACKEND_DISABLE);
++ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
++ RREG32(mmGC_USER_RB_BACKEND_DISABLE);
++ adev->gfx.config.rb_config[i][j].raster_config =
++ RREG32(mmPA_SC_RASTER_CONFIG);
++ adev->gfx.config.rb_config[i][j].raster_config_1 =
++ RREG32(mmPA_SC_RASTER_CONFIG_1);
++ }
++ }
++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch
new file mode 100644
index 00000000..b3b19e7b
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch
@@ -0,0 +1,61 @@
+From 9fcd43d6a79011dd9ab3837d38ba27454be747ad Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Tue, 6 Dec 2016 20:14:23 +0530
+Subject: [PATCH 07/10] use cached raster config values in csb
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Simplify the code and properly set the csb for harvest values.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++----------------------------
+ 1 file changed, 2 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 479047e..dcc59f3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -1110,34 +1110,8 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
+ PACKET3_SET_CONTEXT_REG_START);
+- switch (adev->asic_type) {
+- case CHIP_TONGA:
+- case CHIP_POLARIS10:
+- buffer[count++] = cpu_to_le32(0x16000012);
+- buffer[count++] = cpu_to_le32(0x0000002A);
+- break;
+- case CHIP_POLARIS11:
+- buffer[count++] = cpu_to_le32(0x16000012);
+- buffer[count++] = cpu_to_le32(0x00000000);
+- break;
+- case CHIP_FIJI:
+- buffer[count++] = cpu_to_le32(0x3a00161a);
+- buffer[count++] = cpu_to_le32(0x0000002e);
+- break;
+- case CHIP_TOPAZ:
+- case CHIP_CARRIZO:
+- buffer[count++] = cpu_to_le32(0x00000002);
+- buffer[count++] = cpu_to_le32(0x00000000);
+- break;
+- case CHIP_STONEY:
+- buffer[count++] = cpu_to_le32(0x00000000);
+- buffer[count++] = cpu_to_le32(0x00000000);
+- break;
+- default:
+- buffer[count++] = cpu_to_le32(0x00000000);
+- buffer[count++] = cpu_to_le32(0x00000000);
+- break;
+- }
++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch
new file mode 100644
index 00000000..ed7262c9
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch
@@ -0,0 +1,166 @@
+From 2ce0f44274368b2a6640c3062eb119a0de8c1056 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Wed, 7 Dec 2016 15:07:53 +0530
+Subject: [PATCH 08/10] used cached gca values for vi_read_register
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Using the cached values has less latency for bare metal
+and SR-IOV, and prevents reading back bogus values if the
+engine is powergated.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/vi.c | 115 +++++++++++++++++++++++++++++++++-------
+ 1 file changed, 96 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
+index 02ba429..3a42e83 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vi.c
++++ b/drivers/gpu/drm/amd/amdgpu/vi.c
+@@ -513,21 +513,100 @@ static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] =
+ {mmPA_SC_RASTER_CONFIG_1, false, true},
+ };
+
+-static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+- u32 sh_num, u32 reg_offset)
++static uint32_t vi_get_register_value(struct amdgpu_device *adev,
++ bool indexed, u32 se_num,
++ u32 sh_num, u32 reg_offset)
+ {
+- uint32_t val;
++ if (indexed) {
++ uint32_t val;
++ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
++ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
++
++ switch (reg_offset) {
++ case mmCC_RB_BACKEND_DISABLE:
++ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
++ case mmGC_USER_RB_BACKEND_DISABLE:
++ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
++ case mmPA_SC_RASTER_CONFIG:
++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
++ case mmPA_SC_RASTER_CONFIG_1:
++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1;
++ }
+
+- mutex_lock(&adev->grbm_idx_mutex);
+- if (se_num != 0xffffffff || sh_num != 0xffffffff)
+- gfx_v8_0_select_se_sh(adev, se_num, sh_num);
++ mutex_lock(&adev->grbm_idx_mutex);
++ if (se_num != 0xffffffff || sh_num != 0xffffffff)
++ gfx_v8_0_select_se_sh(adev, se_num, sh_num);
+
+- val = RREG32(reg_offset);
++ val = RREG32(reg_offset);
+
+- if (se_num != 0xffffffff || sh_num != 0xffffffff)
+- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+- mutex_unlock(&adev->grbm_idx_mutex);
+- return val;
++ if (se_num != 0xffffffff || sh_num != 0xffffffff)
++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
++ mutex_unlock(&adev->grbm_idx_mutex);
++ return val;
++ } else {
++ unsigned idx;
++
++ switch (reg_offset) {
++ case mmGB_ADDR_CONFIG:
++ return adev->gfx.config.gb_addr_config;
++ case mmMC_ARB_RAMCFG:
++ return adev->gfx.config.mc_arb_ramcfg;
++ case mmGB_TILE_MODE0:
++ case mmGB_TILE_MODE1:
++ case mmGB_TILE_MODE2:
++ case mmGB_TILE_MODE3:
++ case mmGB_TILE_MODE4:
++ case mmGB_TILE_MODE5:
++ case mmGB_TILE_MODE6:
++ case mmGB_TILE_MODE7:
++ case mmGB_TILE_MODE8:
++ case mmGB_TILE_MODE9:
++ case mmGB_TILE_MODE10:
++ case mmGB_TILE_MODE11:
++ case mmGB_TILE_MODE12:
++ case mmGB_TILE_MODE13:
++ case mmGB_TILE_MODE14:
++ case mmGB_TILE_MODE15:
++ case mmGB_TILE_MODE16:
++ case mmGB_TILE_MODE17:
++ case mmGB_TILE_MODE18:
++ case mmGB_TILE_MODE19:
++ case mmGB_TILE_MODE20:
++ case mmGB_TILE_MODE21:
++ case mmGB_TILE_MODE22:
++ case mmGB_TILE_MODE23:
++ case mmGB_TILE_MODE24:
++ case mmGB_TILE_MODE25:
++ case mmGB_TILE_MODE26:
++ case mmGB_TILE_MODE27:
++ case mmGB_TILE_MODE28:
++ case mmGB_TILE_MODE29:
++ case mmGB_TILE_MODE30:
++ case mmGB_TILE_MODE31:
++ idx = (reg_offset - mmGB_TILE_MODE0);
++ return adev->gfx.config.tile_mode_array[idx];
++ case mmGB_MACROTILE_MODE0:
++ case mmGB_MACROTILE_MODE1:
++ case mmGB_MACROTILE_MODE2:
++ case mmGB_MACROTILE_MODE3:
++ case mmGB_MACROTILE_MODE4:
++ case mmGB_MACROTILE_MODE5:
++ case mmGB_MACROTILE_MODE6:
++ case mmGB_MACROTILE_MODE7:
++ case mmGB_MACROTILE_MODE8:
++ case mmGB_MACROTILE_MODE9:
++ case mmGB_MACROTILE_MODE10:
++ case mmGB_MACROTILE_MODE11:
++ case mmGB_MACROTILE_MODE12:
++ case mmGB_MACROTILE_MODE13:
++ case mmGB_MACROTILE_MODE14:
++ case mmGB_MACROTILE_MODE15:
++ idx = (reg_offset - mmGB_MACROTILE_MODE0);
++ return adev->gfx.config.macrotile_mode_array[idx];
++ default:
++ return RREG32(reg_offset);
++ }
++ }
+ }
+
+ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
+@@ -562,10 +641,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
+ if (reg_offset != asic_register_entry->reg_offset)
+ continue;
+ if (!asic_register_entry->untouched)
+- *value = asic_register_entry->grbm_indexed ?
+- vi_read_indexed_register(adev, se_num,
+- sh_num, reg_offset) :
+- RREG32(reg_offset);
++ *value = vi_get_register_value(adev,
++ asic_register_entry->grbm_indexed,
++ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ }
+@@ -575,10 +653,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
+ continue;
+
+ if (!vi_allowed_read_registers[i].untouched)
+- *value = vi_allowed_read_registers[i].grbm_indexed ?
+- vi_read_indexed_register(adev, se_num,
+- sh_num, reg_offset) :
+- RREG32(reg_offset);
++ *value = vi_get_register_value(adev,
++ vi_allowed_read_registers[i].grbm_indexed,
++ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch
new file mode 100644
index 00000000..3ee7aa52
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch
@@ -0,0 +1,26 @@
+From 7dc74a872ca0a5502f2c8e56fdfd9af97b8da1b6 Mon Sep 17 00:00:00 2001
+From: Sanjay R Mehta <sanju.mehta@amd.com>
+Date: Wed, 7 Dec 2016 21:00:00 +0530
+Subject: [PATCH 09/10] Removed extra parameter
+
+Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index dcc59f3..d1cb4db 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -3648,7 +3648,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
+ /* cache the values for userspace */
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
++ gfx_v8_0_select_se_sh(adev, i, j);
+ adev->gfx.config.rb_config[i][j].rb_backend_disable =
+ RREG32(mmCC_RB_BACKEND_DISABLE);
+ adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch
new file mode 100644
index 00000000..4416497c
--- /dev/null
+++ b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch
@@ -0,0 +1,204 @@
+From c5ef870413c64c25cfe2a646c395b0c0d293a4f5 Mon Sep 17 00:00:00 2001
+From: Ravi Patlegar <ravi.patlegar@amd.com>
+Date: Tue, 13 Dec 2016 16:28:54 +0530
+Subject: [PATCH 10/10] refine pg code for gfx_v8.
+
+1. bit CP_PG_DISABLE was reversed.
+2. load RLC_SRM_INDEX_CNTL_ADDR/DATA_x pairs
+ with valid addr/data.
+3. always init gfx pg.
+4. delete repeated check for pg mask.
+
+Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
+Signed-off-by: Ravi Patlegar <ravi.patlegar@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 118 ++++++++++++----------------------
+ 2 files changed, 44 insertions(+), 76 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 40497c2..af04d3b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -2211,6 +2211,8 @@ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev);
+ #define REG_GET_FIELD(value, reg, field) \
+ (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
+
++#define WREG32_FIELD(reg, field, val) \
++ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+ /*
+ * BIOS helpers.
+ */
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index d1cb4db..b4c41f9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -3934,8 +3934,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
+ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
+ data = mmRLC_SRM_INDEX_CNTL_DATA_0;
+ for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
+- amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
+- amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
++ if (unique_indices[i] != 0) {
++ amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
++ amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
++ }
+ }
+ kfree(register_list_format);
+
+@@ -3955,32 +3957,17 @@ static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
+ {
+ uint32_t data;
+
+- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
+- AMDGPU_PG_SUPPORT_GFX_SMG |
+- AMDGPU_PG_SUPPORT_GFX_DMG)) {
+- data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
+- data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
+- data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+- WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
+-
+- data = 0;
+- data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
+- data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
+- data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
+- data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
+- WREG32(mmRLC_PG_DELAY, data);
+-
+- data = RREG32(mmRLC_PG_DELAY_2);
+- data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
+- data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
+- WREG32(mmRLC_PG_DELAY_2, data);
+-
+- data = RREG32(mmRLC_AUTO_PG_CTRL);
+- data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
+- data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
+- WREG32(mmRLC_AUTO_PG_CTRL, data);
+- }
+- }
++ WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
++
++ data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
++ data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
++ data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
++ data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
++ WREG32(mmRLC_PG_DELAY, data);
++
++ WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
++ WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
++}
+
+ static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+ bool enable)
+@@ -4016,18 +4003,8 @@ static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+
+ static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+ {
+- u32 data, orig;
+-
+- orig = data = RREG32(mmRLC_PG_CNTL);
+-
+- if (enable)
+- data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+- else
+- data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+-
+- if (orig != data)
+- WREG32(mmRLC_PG_CNTL, data);
+- }
++ WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
++}
+
+ static void polaris11_init_power_gating(struct amdgpu_device *adev)
+ {
+@@ -4062,39 +4039,30 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
+
+ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
+ {
+- if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+- AMD_PG_SUPPORT_GFX_SMG |
+- AMD_PG_SUPPORT_GFX_DMG |
+- AMD_PG_SUPPORT_CP |
+- AMD_PG_SUPPORT_GDS |
+- AMD_PG_SUPPORT_RLC_SMU_HS)) {
+- gfx_v8_0_init_csb(adev);
+- gfx_v8_0_init_save_restore_list(adev);
+- gfx_v8_0_enable_save_restore_machine(adev);
++ gfx_v8_0_init_csb(adev);
++ gfx_v8_0_init_save_restore_list(adev);
++ gfx_v8_0_enable_save_restore_machine(adev);
+
+- if ((adev->asic_type == CHIP_CARRIZO) ||
+- (adev->asic_type == CHIP_STONEY)) {
+- struct amdgpu_cu_info cu_info;
+-
+- gfx_v8_0_get_cu_info(adev, &cu_info);
+-
+- WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+- gfx_v8_0_init_power_gating(adev);
+- WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask);
+- if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) {
+- cz_enable_sck_slow_down_on_power_up(adev, true);
+- cz_enable_sck_slow_down_on_power_down(adev, true);
+- } else {
+- cz_enable_sck_slow_down_on_power_up(adev, false);
+- cz_enable_sck_slow_down_on_power_down(adev, false);
+- }
+- if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP)
+- cz_enable_cp_power_gating(adev, true);
+- else
+- cz_enable_cp_power_gating(adev, false);
+- } else if (adev->asic_type == CHIP_POLARIS11) {
+- polaris11_init_power_gating(adev);
++ if ((adev->asic_type == CHIP_CARRIZO) ||
++ (adev->asic_type == CHIP_STONEY)) {
++ struct amdgpu_cu_info cu_info;
++ gfx_v8_0_get_cu_info(adev, &cu_info);
++ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
++ gfx_v8_0_init_power_gating(adev);
++ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask);
++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) {
++ cz_enable_sck_slow_down_on_power_up(adev, true);
++ cz_enable_sck_slow_down_on_power_down(adev, true);
++ } else {
++ cz_enable_sck_slow_down_on_power_up(adev, false);
++ cz_enable_sck_slow_down_on_power_down(adev, false);
+ }
++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP)
++ cz_enable_cp_power_gating(adev, true);
++ else
++ cz_enable_cp_power_gating(adev, false);
++ } else if (adev->asic_type == CHIP_POLARIS11) {
++ polaris11_init_power_gating(adev);
+ }
+ }
+
+@@ -5513,7 +5481,7 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+ if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PIPELINE)
+ cz_enable_gfx_pipeline_power_gating(adev, true);
+ } else {
+- cz_enable_gfx_cg_power_gating(adev, false);
++ cz_enable_gfx_cg_power_gating(adev, true);
+ cz_enable_gfx_pipeline_power_gating(adev, false);
+ }
+ }
+@@ -5524,14 +5492,12 @@ static int gfx_v8_0_set_powergating_state(void *handle,
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+
+- if (!(adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG))
+- return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)
+- cz_update_gfx_cg_power_gating(adev, enable);
++
++ cz_update_gfx_cg_power_gating(adev, enable);
+
+ if ((adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+--
+2.7.4
+
diff --git a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
index cc005c0c..f4fdebb6 100644
--- a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
+++ b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc
@@ -1139,3 +1139,14 @@ patch 1137-drm-amdgpu-acp-fix-resume-on-CZ-systems-with-AZ-audi.patch
patch 0001-amdgpu-fix-various-compilation-issues.patch
patch CVE-2016-5195.patch
patch 0001-random-replace-non-blocking-pool-with-a-Chacha20-bas.patch
+patch 1138-add-new-semaphore-object-in-kernel-side.patch
+patch 1139-unify-memory-query-info-interface.patch
+patch 1140-dma-buf-return-index-of-the-first-signaled-fence.patch
+patch 1141-Fix-a-deadlock-affecting-ww_mutexes.patch
+patch 1142-add-additional-cached-gca-config-variables.patch
+patch 1143-implement-raster-configuration-for-gfx-v8.patch
+patch 1144-cache-rb-config-values.patch
+patch 1145-use-cached-raster-config-values-in-csb.patch
+patch 1146-used-cached-gca-values-for-vi_read_register.patch
+patch 1147-Removed-extra-parameter.patch
+patch 1148-refine-pg-code-for-gfx_v8.patch