diff options
19 files changed, 1971 insertions, 37 deletions
diff --git a/common/conf/machine/include/amd-common-configurations.inc b/common/conf/machine/include/amd-common-configurations.inc index 55e96be8..8b5e4bb0 100644 --- a/common/conf/machine/include/amd-common-configurations.inc +++ b/common/conf/machine/include/amd-common-configurations.inc @@ -2,9 +2,9 @@ POKY_DEFAULT_DISTRO_FEATURES_remove = "wayland" PREFERRED_PROVIDER_jpeg ?= "jpeg" PREFERRED_PROVIDER_jpeg-native ?= "jpeg-native" -PREFERRED_PROVIDER_llvm ?= "llvm3.7.1" +PREFERRED_PROVIDER_llvm ?= "llvm3.9.1" PREFERRED_VERSION_linux-yocto ?= "4.4%" -PREFERRED_VERSION_mesa ?= "11.0.8+git%" +PREFERRED_VERSION_mesa ?= "12.0.3+git%" PREFERRED_VERSION_gstreamer1.0-omx ?= "git+git%" PREFERRED_VERSION_libav ?= "9.18" PREFERRED_VERSION_grub ?= "2.00+AUTOINC+%" diff --git a/common/recipes-core/llvm/files/0001-CrossCompile.cmake-adjust-build-for-OE.patch b/common/recipes-core/llvm/files/0001-CrossCompile.cmake-adjust-build-for-OE.patch new file mode 100644 index 00000000..bf03ec2f --- /dev/null +++ b/common/recipes-core/llvm/files/0001-CrossCompile.cmake-adjust-build-for-OE.patch @@ -0,0 +1,44 @@ +From eb27ad28d5171770d27415ace95f4c91f15828bf Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Wed, 21 Dec 2016 14:32:50 +0500 +Subject: [PATCH] CrossCompile.cmake: adjust build for OE + +CMake picks up its values from these environment variables +in case of native builds and in OE we set these to target +tools which will be incorrect in this case. +We specifically need to strip the BUILD_CC variable +before setting CC through it because OE tends to +add a space which isn't liked too much by cmake. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + cmake/modules/CrossCompile.cmake | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake +index 9c598a6..cf76fd6 100644 +--- a/cmake/modules/CrossCompile.cmake ++++ b/cmake/modules/CrossCompile.cmake +@@ -4,6 +4,19 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) + set(LLVM_${target_name}_BUILD "${CMAKE_BINARY_DIR}/${target_name}") + set(LLVM_${target_name}_BUILD ${LLVM_${target_name}_BUILD} PARENT_SCOPE) + message(STATUS "Setting native build dir to " ${LLVM_${target_name}_BUILD}) ++ string(STRIP $ENV{BUILD_CC} build_cc) ++ set(ENV{AR} $ENV{BUILD_AR}) ++ set(ENV{ASM} ${build_cc}) ++ set(ENV{ASMFLAGS} $ENV{BUILD_CFLAGS}) ++ set(ENV{CC} ${build_cc}) ++ set(ENV{CFLAGS} $ENV{BUILD_CFLAGS}) ++ set(ENV{CXX} $ENV{BUILD_CXX}) ++ set(ENV{CXXFLAGS} $ENV{BUILD_CXXFLAGS}) ++ set(ENV{CPP} $ENV{BUILD_CPP}) ++ set(ENV{CPPFLAGS} $ENV{BUILD_CPPFLAGS}) ++ set(ENV{NM} $ENV{BUILD_NM}) ++ set(ENV{RANLIB} $ENV{BUILD_RANLIB}) ++ set(ENV{LDFLAGS} $ENV{BUILD_LDFLAGS}) + endif(NOT DEFINED LLVM_${target_name}_BUILD) + + if (EXISTS ${LLVM_MAIN_SRC_DIR}/cmake/platforms/${toolchain}.cmake) +-- +1.9.1 + diff --git a/common/recipes-core/llvm/files/0002-CrossCompile.cmake-use-target-BuildVariables-include.patch b/common/recipes-core/llvm/files/0002-CrossCompile.cmake-use-target-BuildVariables-include.patch new file mode 100644 index 00000000..5ed00757 --- /dev/null +++ b/common/recipes-core/llvm/files/0002-CrossCompile.cmake-use-target-BuildVariables-include.patch @@ -0,0 +1,33 @@ +From 489b229104c76651ff36fc5639384cf9dc6b8d7d Mon Sep 17 00:00:00 2001 +From: Awais Belal <awais_belal@mentor.com> +Date: Fri, 23 Dec 2016 03:19:18 +0500 +Subject: [PATCH] CrossCompile.cmake: use target BuildVariables include for + host + +This is primarily OE specific where we'd like to report +the target build variables when checked through host +llvm-config because that is used for configuring +projects depending on LLVM. + +Signed-off-by: Awais Belal <awais_belal@mentor.com> +--- + cmake/modules/CrossCompile.cmake | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake +index 9c598a6..173aefd 100644 +--- a/cmake/modules/CrossCompile.cmake ++++ b/cmake/modules/CrossCompile.cmake +@@ -22,7 +22,8 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) + -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE + WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} + DEPENDS ${LLVM_${target_name}_BUILD} +- COMMENT "Configuring ${target_name} LLVM...") ++ COMMENT "Configuring ${target_name} LLVM..." ++ COMMAND "${CMAKE_COMMAND}" "-E" "copy" "${CMAKE_SOURCE_DIR}/../build/tools/llvm-config/BuildVariables.inc" "${CMAKE_SOURCE_DIR}/../build/NATIVE/tools/llvm-config/BuildVariables.inc") + + add_custom_target(CONFIGURE_LLVM_${target_name} + DEPENDS ${LLVM_${target_name}_BUILD}/CMakeCache.txt) +-- +1.9.1 + diff --git a/common/recipes-core/llvm/llvm3.9.1_3.9.1.bb b/common/recipes-core/llvm/llvm3.9.1_3.9.1.bb new file mode 100644 index 00000000..e1e5caba --- /dev/null +++ b/common/recipes-core/llvm/llvm3.9.1_3.9.1.bb @@ -0,0 +1,124 @@ +DESCRIPTION = "The Low Level Virtual Machine" +HOMEPAGE = "http://llvm.org" + +# 3-clause BSD-like +# University of Illinois/NCSA Open Source License +LICENSE = "NCSA" +LIC_FILES_CHKSUM = "file://LICENSE.TXT;md5=b99eb43c934ceebecab85c6b9b1a08be" + +DEPENDS = "libffi libxml2-native llvm-common zlib" +RDEPENDS_${PN} += "ncurses-terminfo" + +inherit perlnative pythonnative cmake + +PROVIDES += "llvm" + +LLVM_RELEASE = "${PV}" +LLVM_DIR = "llvm${LLVM_RELEASE}" + +SRCREV = "a093ef43dd592b729da46db4ff3057fef9a46023" +PV = "3.9.1" +SRC_URI = "git://llvm.org/git/llvm.git;branch=release_39;protocol=http \ + file://0001-CrossCompile.cmake-adjust-build-for-OE.patch \ + file://0002-CrossCompile.cmake-use-target-BuildVariables-include.patch" +S = "${WORKDIR}/git" + +LLVM_INSTALL_DIR = "${WORKDIR}/llvm-install" + +EXTRA_OECMAKE += "-DLLVM_ENABLE_ASSERTIONS=OFF \ + -DLLVM_ENABLE_EXPENSIVE_CHECKS=OFF \ + -DLLVM_BINDINGS_LIST="" \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_FFI=ON \ + -DLLVM_OPTIMIZED_TABLEGEN=ON \ + -DLLVM_TARGETS_TO_BUILD="AMDGPU;X86"" + +EXTRA_OEMAKE += "REQUIRES_RTTI=1 VERBOSE=1" + +do_configure_prepend() { + # Fix paths in llvm-config + sed -i "s|sys::path::parent_path(CurrentPath))\.str()|sys::path::parent_path(sys::path::parent_path(CurrentPath))).str()|g" ${S}/tools/llvm-config/llvm-config.cpp + sed -ri "s#/(bin|include|lib)(/?\")#/\1/${LLVM_DIR}\2#g" ${S}/tools/llvm-config/llvm-config.cpp + sed -ri "s#lib/${LLVM_DIR}#${baselib}/${LLVM_DIR}#g" ${S}/tools/llvm-config/llvm-config.cpp +} + +do_install() { + oe_runmake DESTDIR=${LLVM_INSTALL_DIR} install + + install ${B}/NATIVE/bin/llvm-config ${LLVM_INSTALL_DIR}/llvm-config-host + + install -d ${D}${bindir}/${LLVM_DIR} + cp -r ${LLVM_INSTALL_DIR}${bindir}/* ${D}${bindir}/${LLVM_DIR}/ + + install -d ${D}${includedir}/${LLVM_DIR} + cp -r ${LLVM_INSTALL_DIR}${includedir}/* ${D}${includedir}/${LLVM_DIR}/ + + install -d ${D}${libdir}/${LLVM_DIR} + + # The LLVM sources have "/lib" embedded and so we cannot completely rely on the ${libdir} variable + if [ -d ${LLVM_INSTALL_DIR}${libdir}/ ]; then + cp -r ${LLVM_INSTALL_DIR}${libdir}/* ${D}${libdir}/${LLVM_DIR}/ + elif [ -d ${LLVM_INSTALL_DIR}${prefix}/lib ]; then + cp -r ${LLVM_INSTALL_DIR}${prefix}/lib/* ${D}${libdir}/${LLVM_DIR}/ + elif [ -d ${LLVM_INSTALL_DIR}${prefix}/lib64 ]; then + cp -r ${LLVM_INSTALL_DIR}${prefix}/lib64/* ${D}${libdir}/${LLVM_DIR}/ + fi + + # Remove unnecessary cmake files + rm -rf ${D}${libdir}/${LLVM_DIR}/cmake + + ln -s ${LLVM_DIR}/libLLVM-${PV}${SOLIBSDEV} ${D}${libdir}/libLLVM-${PV}${SOLIBSDEV} +} + +SYSROOT_PREPROCESS_FUNCS += "llvm_sysroot_preprocess" + +llvm_sysroot_preprocess() { + install -d ${SYSROOT_DESTDIR}${bindir_crossscripts} + cp ${LLVM_INSTALL_DIR}/llvm-config-host ${SYSROOT_DESTDIR}${bindir_crossscripts}/llvm-config${PV} +} + +PACKAGES += "${PN}-bugpointpasses ${PN}-llvmhello" +ALLOW_EMPTY_${PN} = "1" +ALLOW_EMPTY_${PN}-staticdev = "1" +FILES_${PN} = "" +FILES_${PN}-staticdev = "" +FILES_${PN}-dbg = " \ + ${bindir}/${LLVM_DIR}/.debug \ + ${libdir}/${LLVM_DIR}/.debug/BugpointPasses.so \ + ${libdir}/${LLVM_DIR}/.debug/LLVMHello.so \ + /usr/src/debug \ +" + +FILES_${PN}-dev = " \ + ${bindir}/${LLVM_DIR} \ + ${includedir}/${LLVM_DIR} \ +" +RRECOMMENDS_${PN}-dev += "${PN}-bugpointpasses ${PN}-llvmhello" + +FILES_${PN}-bugpointpasses = "\ + ${libdir}/${LLVM_DIR}/BugpointPasses.so \ +" +FILES_${PN}-llvmhello = "\ + ${libdir}/${LLVM_DIR}/LLVMHello.so \ +" + +PACKAGES_DYNAMIC = "^libllvm${LLVM_RELEASE}-.*$" +NOAUTOPACKAGEDEBUG = "1" + +INSANE_SKIP_${MLPREFIX}libllvm${LLVM_RELEASE}-llvm-${LLVM_RELEASE} += "dev-so" +INSANE_SKIP_${MLPREFIX}libllvm${LLVM_RELEASE}-llvm += "dev-so" + +python llvm_populate_packages() { + libdir = bb.data.expand('${libdir}', d) + libllvm_libdir = bb.data.expand('${libdir}/${LLVM_DIR}', d) + split_dbg_packages = do_split_packages(d, libllvm_libdir+'/.debug', '^lib(.*)\.so$', 'libllvm${LLVM_RELEASE}-%s-dbg', 'Split debug package for %s', allow_dirs=True) + split_packages = do_split_packages(d, libdir, '^lib(.*)\.so$', 'libllvm${LLVM_RELEASE}-%s', 'Split package for %s', allow_dirs=True, allow_links=True, recursive=True) + split_staticdev_packages = do_split_packages(d, libllvm_libdir, '^lib(.*)\.a$', 'libllvm${LLVM_RELEASE}-%s-staticdev', 'Split staticdev package for %s', allow_dirs=True) + if split_packages: + pn = d.getVar('PN', True) + d.appendVar('RDEPENDS_' + pn, ' '+' '.join(split_packages)) + d.appendVar('RDEPENDS_' + pn + '-dbg', ' '+' '.join(split_dbg_packages)) + d.appendVar('RDEPENDS_' + pn + '-staticdev', ' '+' '.join(split_staticdev_packages)) +} + +PACKAGESPLITFUNCS_prepend = "llvm_populate_packages " diff --git a/common/recipes-graphics/mesa/mesa_git.bbappend b/common/recipes-graphics/mesa/mesa_git.bbappend index 34b66e6b..ca506af5 100644 --- a/common/recipes-graphics/mesa/mesa_git.bbappend +++ b/common/recipes-graphics/mesa/mesa_git.bbappend @@ -1,8 +1,8 @@ FILESEXTRAPATHS_prepend := "${THISDIR}/${PN}:" -SRCREV_amd = "b9b19162ee3f8d68be76b71adf2a290cbb675660" -LIC_FILES_CHKSUM_amd = "file://docs/license.html;md5=6a23445982a7a972ac198e93cc1cb3de" -PV_amd = "11.0.8+git${SRCPV}" -DEPENDS_append_amd = " libvdpau libomxil python-mako-native" +SRCREV_amd = "09460b8cf7ddac4abb46eb6439314b29954c76a6" +LIC_FILES_CHKSUM_amd = "file://docs/license.html;md5=899fbe7e42d494c7c8c159c7001693d5" +PV_amd = "12.0.3+git${SRCPV}" +DEPENDS_append_amd = " libvdpau libomxil" PACKAGECONFIG[va] = "--enable-va,--disable-va,libva" PACKAGECONFIG_append_amd = " xvmc gallium r600 gallium-llvm xa" @@ -17,17 +17,10 @@ LIBVA_PLATFORMS .= "${@bb.utils.contains('DISTRO_FEATURES', 'wayland', ' libva-w LIBVA_PLATFORMS .= "${@bb.utils.contains('DISTRO_FEATURES', 'opengl', ' libva-gl', '', d)}" RDEPENDS_mesa-megadriver += "${@bb.utils.contains('PACKAGECONFIG', 'va', '${LIBVA_PLATFORMS}', '', d)}" -MESA_LLVM_RELEASE_amd = "3.7.1" +MESA_LLVM_RELEASE_amd = "3.9.1" SRC_URI_amd = "\ - git://anongit.freedesktop.org/git/mesa/mesa;branch=11.0 \ - file://0001-st-omx-h264-fix-corruption-when-scaling-matrix-prese.patch \ - file://0002-st-omx-Remove-trailing-spaces.patch \ - file://0003-st-omx-dec-Correct-the-timestamping.patch \ - file://0004-st-omx-Avoid-segfault-in-deconstructor-if-constructo.patch \ - file://0005-st-omx-enc-Correct-the-timestamping.patch \ - file://0006-st-omx-enc-Modularize-the-Encoding-task.patch \ - file://0007-st-omx-enc-Support-framerate-conversion.patch \ + git://anongit.freedesktop.org/git/mesa/mesa;branch=12.0 \ " EXTRA_OECONF_append_amd = " \ @@ -35,7 +28,6 @@ EXTRA_OECONF_append_amd = " \ --enable-osmesa \ --enable-glx \ --enable-omx \ - --enable-r600-llvm-compiler \ --with-omx-libdir=${libdir}/bellagio \ " @@ -73,23 +65,7 @@ python () { d.setVar("GALLIUMDRIVERS", "swrast,r300,r600,radeonsi") } -# We'll need to setup some symlinks for the va enabled -# video driver to work properly in case va is enabled -# so skip the .so symlink checks. -INSANE_SKIP_${PN}-megadriver += "${@bb.utils.contains('PACKAGECONFIG', 'va', 'dev-so', '', d)}" - -# Mesa 11.1 onwards provides options for crypto functions -# this is not supported in the version we are using currently -# and generates warnings so disable it. -MESA_CRYPTO = "" - -do_install_append_amd() { - # Create symlinks for the gallium drivers to be able to play - # through the va enabled driver by default. - if ${@bb.utils.contains('PACKAGECONFIG','va','true','false',d)}; then - gallium_drivers=$(echo ${GALLIUMDRIVERS} | sed 's/,/ /g') - for gdriver in ${gallium_drivers}; do - ln -sf ${libdir}/dri/gallium_drv_video.so ${D}${libdir}/dri/${gdriver}_drv_video.so - done - fi -} +# We're using components like vdpau which depend +# on nettle so lets just use it as the default for +# crypto as well. +MESA_CRYPTO ?= "nettle" diff --git a/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch new file mode 100644 index 00000000..f27f1afc --- /dev/null +++ b/common/recipes-kernel/linux/files/1138-add-new-semaphore-object-in-kernel-side.patch @@ -0,0 +1,504 @@ +From d29a89414316f4c54a1a619527398714b091d3db Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Mon, 14 Nov 2016 12:26:18 +0530 +Subject: [PATCH] add new semaphore object in kernel side + +So that semaphore can be shared across porcess across devices. + +Change-Id: Ie82cace6af81e2ddf45f4bbf9f3c0dafd6bcc499 +Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 + + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 267 ++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h | 44 ++++ + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 29 +++ + 8 files changed, 361 insertions(+), 4 deletions(-) + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h + +diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile +index 28e8e4c..2acc7c1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/Makefile ++++ b/drivers/gpu/drm/amd/amdgpu/Makefile +@@ -31,7 +31,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ + amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ + atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ + amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ +- amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o ++ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ ++ amdgpu_sem.o + + # add asic specific block + amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index d3de21d..3f5d2ad 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1000,6 +1000,8 @@ struct amdgpu_ctx_ring { + uint64_t sequence; + struct fence **fences; + struct amd_sched_entity entity; ++ struct list_head sem_list; ++ struct mutex sem_lock; + /* client id */ + u64 client_id; + }; +@@ -1699,6 +1701,8 @@ struct amdgpu_vce { + struct amdgpu_irq_src irq; + unsigned harvest_config; + struct amd_sched_entity entity; ++ struct list_head sem_list; ++ struct mutex sem_lock; + }; + + /* +@@ -1872,6 +1876,13 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, + int amdgpu_freesync_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + ++int amdgpu_sem_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp); ++ ++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct amdgpu_sync *sync); ++ ++ + /* VRAM scratch page for HDP bug, default vram page */ + struct amdgpu_vram_scratch { + struct amdgpu_bo *robj; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index 0d1346c..bb6057a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -882,7 +882,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + } + } + +- return 0; ++ return amdgpu_sem_add_cs(p->ctx, p->job->ring, &p->job->sync); + } + + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +index 17e1362..a020e22 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +@@ -42,6 +42,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + ctx->rings[i].sequence = 1; + ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; ++ INIT_LIST_HEAD(&ctx->rings[i].sem_list); ++ mutex_init(&ctx->rings[i].sem_lock); + } + /* create context entity for each ring */ + for (i = 0; i < adev->num_rings; i++) { +@@ -74,8 +76,10 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) + return; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) +- for (j = 0; j < amdgpu_sched_jobs; ++j) ++ for (j = 0; j < amdgpu_sched_jobs; ++j) { + fence_put(ctx->rings[i].fences[j]); ++ mutex_destroy(&ctx->rings[i].sem_lock); ++ } + kfree(ctx->fences); + + for (i = 0; i < adev->num_rings; i++) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index f6ae587..a48783e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -786,6 +786,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +- DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER) ++ DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_freesync_ioctl, DRM_MASTER), ++ DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + }; + const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c +new file mode 100644 +index 0000000..db16baa +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c +@@ -0,0 +1,267 @@ ++/* ++ * Copyright 2016 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Chunming Zhou <david1.zhou@amd.com> ++ */ ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/kernel.h> ++#include <linux/poll.h> ++#include <linux/seq_file.h> ++#include <linux/export.h> ++#include <linux/sched.h> ++#include <linux/slab.h> ++#include <linux/uaccess.h> ++#include <linux/anon_inodes.h> ++#include "amdgpu_sem.h" ++#include "amdgpu.h" ++#include <drm/drmP.h> ++ ++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in, ++ struct amdgpu_sem *sem); ++ ++static const struct file_operations amdgpu_sem_fops; ++ ++static struct amdgpu_sem *amdgpu_sem_alloc(struct fence *fence) ++{ ++ struct amdgpu_sem *sem; ++ ++ sem = kzalloc(sizeof(struct amdgpu_sem), GFP_KERNEL); ++ if (!sem) ++ return NULL; ++ ++ sem->file = anon_inode_getfile("sem_file", ++ &amdgpu_sem_fops, ++ sem, 0); ++ if (IS_ERR(sem->file)) ++ goto err; ++ ++ kref_init(&sem->kref); ++ INIT_LIST_HEAD(&sem->list); ++ /* fence should be get before passing here */ ++ sem->fence = fence; ++ ++ return sem; ++err: ++ kfree(sem); ++ return NULL; ++} ++ ++static void amdgpu_sem_free(struct kref *kref) ++{ ++ struct amdgpu_sem *sem = container_of( ++ kref, struct amdgpu_sem, kref); ++ ++ fence_put(sem->fence); ++ kfree(sem); ++} ++ ++static int amdgpu_sem_release(struct inode *inode, struct file *file) ++{ ++ struct amdgpu_sem *sem = file->private_data; ++ ++ kref_put(&sem->kref, amdgpu_sem_free); ++ return 0; ++} ++ ++static unsigned int amdgpu_sem_poll(struct file *file, poll_table *wait) ++{ ++ return 0; ++} ++ ++static long amdgpu_sem_file_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ return 0; ++} ++ ++static const struct file_operations amdgpu_sem_fops = { ++ .release = amdgpu_sem_release, ++ .poll = amdgpu_sem_poll, ++ .unlocked_ioctl = amdgpu_sem_file_ioctl, ++ .compat_ioctl = amdgpu_sem_file_ioctl, ++}; ++ ++static int amdgpu_sem_create(void) ++{ ++ return get_unused_fd_flags(O_CLOEXEC); ++} ++ ++static int amdgpu_sem_signal(int fd, struct fence *fence) ++{ ++ struct amdgpu_sem *sem; ++ ++ sem = amdgpu_sem_alloc(fence); ++ if (!sem) ++ return -ENOMEM; ++ fd_install(fd, sem->file); ++ ++ return 0; ++} ++ ++static int amdgpu_sem_wait(int fd, struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in) ++{ ++ struct file *file = fget(fd); ++ struct amdgpu_sem *sem; ++ int r; ++ ++ if (!file) ++ return -EINVAL; ++ ++ sem = file->private_data; ++ if (!sem) { ++ r = -EINVAL; ++ goto err; ++ } ++ r = amdgpu_sem_cring_add(fpriv, in, sem); ++err: ++ fput(file); ++ return r; ++} ++ ++static void amdgpu_sem_destroy(void) ++{ ++ /* userspace should close fd when they try to destroy sem, ++ * closing fd will free semaphore object. ++ */ ++} ++ ++static struct fence *amdgpu_sem_get_fence(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in) ++{ ++ struct amdgpu_ring *out_ring; ++ struct amdgpu_ctx *ctx; ++ struct fence *fence; ++ uint32_t ctx_id, ip_type, ip_instance, ring; ++ int r; ++ ++ ctx_id = in->ctx_id; ++ ip_type = in->ip_type; ++ ip_instance = in->ip_instance; ++ ring = in->ring; ++ ctx = amdgpu_ctx_get(fpriv, ctx_id); ++ if (!ctx) ++ return NULL; ++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring, ++ &out_ring); ++ if (r) { ++ amdgpu_ctx_put(ctx); ++ return NULL; ++ } ++ /* get the last fence of this entity */ ++ fence = amdgpu_ctx_get_fence(ctx, out_ring, ++ in->seq ? in->seq : ++ ctx->rings[out_ring->idx].sequence - 1); ++ amdgpu_ctx_put(ctx); ++ ++ return fence; ++} ++ ++static int amdgpu_sem_cring_add(struct amdgpu_fpriv *fpriv, ++ struct drm_amdgpu_sem_in *in, ++ struct amdgpu_sem *sem) ++{ ++ struct amdgpu_ring *out_ring; ++ struct amdgpu_ctx *ctx; ++ uint32_t ctx_id, ip_type, ip_instance, ring; ++ int r; ++ ++ ctx_id = in->ctx_id; ++ ip_type = in->ip_type; ++ ip_instance = in->ip_instance; ++ ring = in->ring; ++ ctx = amdgpu_ctx_get(fpriv, ctx_id); ++ if (!ctx) ++ return -EINVAL; ++ r = amdgpu_cs_get_ring(ctx->adev, ip_type, ip_instance, ring, ++ &out_ring); ++ if (r) ++ goto err; ++ mutex_lock(&ctx->rings[out_ring->idx].sem_lock); ++ list_add(&sem->list, &ctx->rings[out_ring->idx].sem_list); ++ mutex_unlock(&ctx->rings[out_ring->idx].sem_lock); ++ ++err: ++ amdgpu_ctx_put(ctx); ++ return r; ++} ++ ++int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, ++ struct amdgpu_sync *sync) ++{ ++ struct amdgpu_sem *sem, *tmp; ++ int r = 0; ++ ++ if (list_empty(&ctx->rings[ring->idx].sem_list)) ++ return 0; ++ ++ mutex_lock(&ctx->rings[ring->idx].sem_lock); ++ list_for_each_entry_safe(sem, tmp, &ctx->rings[ring->idx].sem_list, ++ list) { ++ r = amdgpu_sync_fence(ctx->adev, sync, sem->fence); ++ fence_put(sem->fence); ++ if (r) ++ goto err; ++ list_del(&sem->list); ++ kfree(sem); ++ } ++err: ++ mutex_unlock(&ctx->rings[ring->idx].sem_lock); ++ return r; ++} ++ ++int amdgpu_sem_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp) ++{ ++ union drm_amdgpu_sem *args = data; ++ struct amdgpu_fpriv *fpriv = filp->driver_priv; ++ struct fence *fence; ++ int r = 0; ++ int fd = args->in.fd; ++ ++ switch (args->in.op) { ++ case AMDGPU_SEM_OP_CREATE_SEM: ++ args->out.fd = amdgpu_sem_create(); ++ break; ++ case AMDGPU_SEM_OP_WAIT_SEM: ++ r = amdgpu_sem_wait(fd, fpriv, &args->in); ++ break; ++ case AMDGPU_SEM_OP_SIGNAL_SEM: ++ fence = amdgpu_sem_get_fence(fpriv, &args->in); ++ if (IS_ERR(fence)) { ++ r = PTR_ERR(fence); ++ return r; ++ } ++ r = amdgpu_sem_signal(fd, fence); ++ fence_put(fence); ++ break; ++ case AMDGPU_SEM_OP_DESTROY_SEM: ++ amdgpu_sem_destroy(); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return r; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h +new file mode 100644 +index 0000000..56d59d3 +--- /dev/null ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h +@@ -0,0 +1,44 @@ ++/* ++ * Copyright 2016 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: Chunming Zhou <david1.zhou@amd.com> ++ * ++ */ ++ ++ ++#ifndef _LINUX_AMDGPU_SEM_H ++#define _LINUX_AMDGPU_SEM_H ++ ++#include <linux/types.h> ++#include <linux/kref.h> ++#include <linux/ktime.h> ++#include <linux/list.h> ++#include <linux/spinlock.h> ++#include <linux/fence.h> ++ ++struct amdgpu_sem { ++ struct file *file; ++ struct kref kref; ++ struct fence *fence; ++ struct list_head list; ++}; ++ ++#endif /* _LINUX_AMDGPU_SEM_H */ +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index b06e3dc..65153bf 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -46,6 +46,7 @@ + #define DRM_AMDGPU_WAIT_CS 0x09 + #define DRM_AMDGPU_GEM_OP 0x10 + #define DRM_AMDGPU_GEM_USERPTR 0x11 ++#define DRM_AMDGPU_SEM 0x5b + #define DRM_AMDGPU_FREESYNC 0x14 + + #define DRM_AMDGPU_WAIT_FENCES 0x5e +@@ -64,6 +65,7 @@ + #define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) + #define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) + #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync) ++#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) + + #define AMDGPU_GEM_DOMAIN_CPU 0x1 + #define AMDGPU_GEM_DOMAIN_GTT 0x2 +@@ -185,6 +187,33 @@ union drm_amdgpu_ctx { + union drm_amdgpu_ctx_out out; + }; + ++/* sem related */ ++#define AMDGPU_SEM_OP_CREATE_SEM 1 ++#define AMDGPU_SEM_OP_WAIT_SEM 2 ++#define AMDGPU_SEM_OP_SIGNAL_SEM 3 ++#define AMDGPU_SEM_OP_DESTROY_SEM 4 ++ ++struct drm_amdgpu_sem_in { ++ /** AMDGPU_SEM_OP_* */ ++ uint32_t op; ++ int32_t fd; ++ uint32_t ctx_id; ++ uint32_t ip_type; ++ uint32_t ip_instance; ++ uint32_t ring; ++ uint64_t seq; ++}; ++ ++union drm_amdgpu_sem_out { ++ int32_t fd; ++ uint32_t _pad; ++}; ++ ++union drm_amdgpu_sem { ++ struct drm_amdgpu_sem_in in; ++ union drm_amdgpu_sem_out out; ++}; ++ + /* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch new file mode 100644 index 00000000..b7c965a3 --- /dev/null +++ b/common/recipes-kernel/linux/files/1139-unify-memory-query-info-interface.patch @@ -0,0 +1,113 @@ +From 314642915b4a2bda146fb9d900ca99eabeab36c0 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Mon, 14 Nov 2016 12:13:41 +0530 +Subject: [PATCH 01/10] unify memory query info interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Change-Id: I74d2b7379bc4febe714a91daf4e1786895de90f2 +Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> +Reviewed-by: Marek Olšák <marek.olsak@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 32 +++++++++++++++++++++++ + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 32 +++++++++++++++++++++++ + 2 files changed, 64 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 31c20ba..a48783e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -390,6 +390,38 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + return copy_to_user(out, &vram_gtt, + min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; + } ++ ++ case AMDGPU_INFO_MEMORY: { ++ struct drm_amdgpu_memory_info mem; ++ ++ memset(&mem, 0, sizeof(mem)); ++ mem.vram.total_heap_size = adev->mc.real_vram_size; ++ mem.vram.usable_heap_size = ++ adev->mc.real_vram_size - adev->vram_pin_size; ++ mem.vram.heap_usage = atomic64_read(&adev->vram_usage); ++ mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; ++ ++ mem.cpu_accessible_vram.total_heap_size = ++ adev->mc.visible_vram_size; ++ mem.cpu_accessible_vram.usable_heap_size = ++ adev->mc.visible_vram_size - ++ (adev->vram_pin_size - adev->invisible_pin_size); ++ mem.cpu_accessible_vram.heap_usage = ++ atomic64_read(&adev->vram_vis_usage); ++ mem.cpu_accessible_vram.max_allocation = ++ mem.cpu_accessible_vram.usable_heap_size * 3 / 4; ++ ++ mem.gtt.total_heap_size = adev->mc.gtt_size; ++ mem.gtt.usable_heap_size = ++ adev->mc.gtt_size - adev->gart_pin_size; ++ mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); ++ mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; ++ ++ return copy_to_user(out, &mem, ++ min((size_t)size, sizeof(mem))) ++ ? -EFAULT : 0; ++ } ++ + case AMDGPU_INFO_READ_MMR_REG: { + unsigned n, alloc_size; + uint32_t *regs; +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index 4d7d982..3f13a87 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -540,6 +540,10 @@ struct drm_amdgpu_cs_chunk_data { + #define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 + /* virtual range */ + #define AMDGPU_INFO_VIRTUAL_RANGE 0x18 ++ ++/* Query memory about VRAM and GTT domains */ ++#define AMDGPU_INFO_MEMORY 0x19 ++ + /* gpu capability */ + #define AMDGPU_INFO_CAPABILITY 0x50 + /* query pin memory capability */ +@@ -705,6 +709,34 @@ struct drm_amdgpu_info_hw_ip { + __u32 _pad; + }; + ++struct drm_amdgpu_heap_info { ++ /** max. physical memory */ ++ __u64 total_heap_size; ++ ++ /** Theoretical max. available memory in the given heap */ ++ __u64 usable_heap_size; ++ ++ /** ++ * Number of bytes allocated in the heap. This includes all processes ++ * and private allocations in the kernel. It changes when new buffers ++ * are allocated, freed, and moved. It cannot be larger than ++ * heap_size. ++ */ ++ __u64 heap_usage; ++ ++ /** ++ * Theoretical possible max. size of buffer which ++ * could be allocated in the given heap ++ */ ++ __u64 max_allocation; ++}; ++ ++struct drm_amdgpu_memory_info { ++ struct drm_amdgpu_heap_info vram; ++ struct drm_amdgpu_heap_info cpu_accessible_vram; ++ struct drm_amdgpu_heap_info gtt; ++}; ++ + /* + * Supported GPU families + */ +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch new file mode 100644 index 00000000..76815764 --- /dev/null +++ b/common/recipes-kernel/linux/files/1140-dma-buf-return-index-of-the-first-signaled-fence.patch @@ -0,0 +1,188 @@ +From 7e06443930ab2fabda1977c20ff82ff6bc42e3be Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 15 Nov 2016 14:30:58 +0530 +Subject: [PATCH 02/10] dma-buf: return index of the first signaled fence + +Return the index of the first signaled fence. This information +is useful in some APIs like Vulkan. + +Signed-off-by: monk.liu <monk.liu@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Cc: Sumit Semwal <sumit.semwal@linaro.org> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/dma-buf/fence.c | 19 ++++++++++++++----- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +++- + drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 2 +- + drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h | 21 +++++++++++---------- + include/linux/fence.h | 2 +- + 5 files changed, 30 insertions(+), 18 deletions(-) + +diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c +index 7b05dbe..192f99b 100644 +--- a/drivers/dma-buf/fence.c ++++ b/drivers/dma-buf/fence.c +@@ -398,14 +398,17 @@ out: + EXPORT_SYMBOL(fence_default_wait); + + static bool +-fence_test_signaled_any(struct fence **fences, uint32_t count) ++fence_test_signaled_any(struct fence **fences, uint32_t count, uint32_t *idx) + { + int i; + + for (i = 0; i < count; ++i) { + struct fence *fence = fences[i]; +- if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) ++ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { ++ if (idx) ++ *idx = i; + return true; ++ } + } + return false; + } +@@ -417,6 +420,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count) + * @count: [in] number of fences to wait on + * @intr: [in] if true, do an interruptible wait + * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT ++ * @idx: [out] the first signaled fence index, meaninful only on Returns positive + * + * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if + * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies +@@ -428,7 +432,7 @@ fence_test_signaled_any(struct fence **fences, uint32_t count) + */ + signed long + fence_wait_any_timeout(struct fence **fences, uint32_t count, +- bool intr, signed long timeout) ++ bool intr, signed long timeout, uint32_t *idx) + { + struct default_wait_cb *cb; + signed long ret = timeout; +@@ -439,8 +443,11 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + + if (timeout == 0) { + for (i = 0; i < count; ++i) +- if (fence_is_signaled(fences[i])) ++ if (fence_is_signaled(fences[i])) { ++ if (idx) ++ *idx = i; + return 1; ++ } + + return 0; + } +@@ -463,6 +470,8 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + if (fence_add_callback(fence, &cb[i].base, + fence_default_wait_cb)) { + /* This fence is already signaled */ ++ if (idx) ++ *idx = i; + goto fence_rm_cb; + } + } +@@ -473,7 +482,7 @@ fence_wait_any_timeout(struct fence **fences, uint32_t count, + else + set_current_state(TASK_UNINTERRUPTIBLE); + +- if (fence_test_signaled_any(fences, count)) ++ if (fence_test_signaled_any(fences, count, idx)) + break; + + ret = schedule_timeout(ret); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +index bb6057a..181e2b7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -1107,6 +1107,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, + { + unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns); + uint32_t fence_count = wait->in.fence_count; ++ uint32_t first = ~0; + struct fence **array; + unsigned i; + long r; +@@ -1132,13 +1133,14 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev, + } + } + +- r = fence_wait_any_timeout(array, fence_count, true, timeout); ++ r = fence_wait_any_timeout(array, fence_count, true, timeout, &first); + if (r < 0) + goto err_free_fence_array; + + out: + memset(wait, 0, sizeof(*wait)); + wait->out.status = (r > 0); ++ wait->out.first_signaled = first; + /* set return value 0 to indicate success */ + r = 0; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +index 8bf84ef..9f4311c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +@@ -360,7 +360,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, + if (count) { + spin_unlock(&sa_manager->wq.lock); + t = fence_wait_any_timeout(fences, count, false, +- MAX_SCHEDULE_TIMEOUT); ++ MAX_SCHEDULE_TIMEOUT, NULL); + for (i = 0; i < count; ++i) + fence_put(fences[i]); + +diff --git a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +index 3f13a87..c2f06eb 100644 +--- a/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h ++++ b/drivers/gpu/drm/amd/include/uapi/drm/amdgpu_drm.h +@@ -334,23 +334,24 @@ union drm_amdgpu_wait_cs { + }; + + struct drm_amdgpu_fence { +- uint32_t ctx_id; +- uint32_t ip_type; +- uint32_t ip_instance; +- uint32_t ring; +- uint64_t seq_no; ++ __u32 ctx_id; ++ __u32 ip_type; ++ __u32 ip_instance; ++ __u32 ring; ++ __u64 seq_no; + }; + + struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ +- uint64_t fences; +- uint32_t fence_count; +- uint32_t wait_all; +- uint64_t timeout_ns; ++ __u64 fences; ++ __u32 fence_count; ++ __u32 wait_all; ++ __u64 timeout_ns; + }; + + struct drm_amdgpu_wait_fences_out { +- uint64_t status; ++ __u32 status; ++ __u32 first_signaled; + }; + + union drm_amdgpu_wait_fences { +diff --git a/include/linux/fence.h b/include/linux/fence.h +index bb52201..b8da489 100644 +--- a/include/linux/fence.h ++++ b/include/linux/fence.h +@@ -322,7 +322,7 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) + + signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); + signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, +- bool intr, signed long timeout); ++ bool intr, signed long timeout, uint32_t *idx); + + /** + * fence_wait - sleep until the fence gets signaled +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch new file mode 100644 index 00000000..25312ef1 --- /dev/null +++ b/common/recipes-kernel/linux/files/1141-Fix-a-deadlock-affecting-ww_mutexes.patch @@ -0,0 +1,121 @@ +From 5f3c992c00f95a483cf01d55b8ff0fa1fe6df216 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 23 Nov 2016 14:54:46 +0530 +Subject: [PATCH 03/10] Fix a deadlock affecting ww_mutexes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +his patch fixes a race condition involving 4 threads and 2 ww_mutexes +as indicated in the following example. Acquire context stamps are ordered +like the thread numbers, i.e. thread #1 should back off when it encounters +a mutex locked by thread #0 etc. + +Thread #0 Thread #1 Thread #2 Thread #3 +--------- --------- --------- --------- + lock(ww) + lock(ww') + lock(ww) + lock(ww) + unlock(ww) part 1 +lock(ww) + unlock(ww) part 2 + back off +lock(ww') + +Here, unlock(ww) part 1 is the part that sets lock->base.count to 1 +(without being protected by lock->base.wait_lock), meaning that thread #0 +can acquire ww in the fast path. Since lock->base.count == 0, thread #0 +won't wake up any of the waiters. + +Then, unlock(ww) part 2 wakes up _only_the_first_ waiter of ww. This is +thread #2, since waiters are added at the tail. Thread #2 wakes up and +backs off since it sees ww owned by a context with a lower stamp. + +Meanwhile, thread #1 is never woken up, and so it won't back off its lock +on ww'. So thread #0 gets stuck waiting for ww' to be released. + +This patch fixes the deadlock by waking up all waiters in the slow path +of ww_mutex_unlock. + +We have an internal test case for amdgpu which continuously submits +command streams from tens of threads, where all command stream reference +hundreds of GPU buffer objects with a lot of overlap in the buffer lists +between command streams. This test reliably caused a deadlock, and while I +haven't completely confirmed that it is exactly the scenario outlined +above, this patch does fix the test case. + +Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + kernel/locking/mutex.c | 26 ++++++++++++++++++++++---- + 1 file changed, 22 insertions(+), 4 deletions(-) + +diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c +index 0551c21..39fa58a 100644 +--- a/kernel/locking/mutex.c ++++ b/kernel/locking/mutex.c +@@ -409,6 +409,10 @@ static bool mutex_optimistic_spin(struct mutex *lock, + __visible __used noinline + void __sched __mutex_unlock_slowpath(atomic_t *lock_count); + ++static __used noinline ++void __sched __mutex_unlock_slowpath_wakeall(atomic_t *lock_count); ++ ++ + /** + * mutex_unlock - release the mutex + * @lock: the mutex to be released +@@ -473,7 +477,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) + */ + mutex_clear_owner(&lock->base); + #endif +- __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); ++ __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath_wakeall); + } + EXPORT_SYMBOL(ww_mutex_unlock); + +@@ -713,7 +717,7 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); + * Release the lock, slowpath: + */ + static inline void +-__mutex_unlock_common_slowpath(struct mutex *lock, int nested) ++__mutex_unlock_common_slowpath(struct mutex *lock, int nested, int wake_all) + { + unsigned long flags; + +@@ -736,7 +740,13 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); + +- if (!list_empty(&lock->wait_list)) { ++ if (wake_all) { ++ struct mutex_waiter *waiter; ++ list_for_each_entry(waiter, &lock->wait_list, list) { ++ debug_mutex_wake_waiter(lock, waiter); ++ wake_up_process(waiter->task); ++ } ++ } else if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, +@@ -758,7 +768,15 @@ __mutex_unlock_slowpath(atomic_t *lock_count) + { + struct mutex *lock = container_of(lock_count, struct mutex, count); + +- __mutex_unlock_common_slowpath(lock, 1); ++ __mutex_unlock_common_slowpath(lock, 1, 0); ++} ++ ++static void ++__mutex_unlock_slowpath_wakeall(atomic_t *lock_count) ++{ ++ struct mutex *lock = container_of(lock_count, struct mutex, count); ++ ++ __mutex_unlock_common_slowpath(lock, 1, 1); + } + + #ifndef CONFIG_DEBUG_LOCK_ALLOC +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch new file mode 100644 index 00000000..c9c426f9 --- /dev/null +++ b/common/recipes-kernel/linux/files/1142-add-additional-cached-gca-config-variables.patch @@ -0,0 +1,51 @@ +From caa6b72d0c01491114f017fe3bca7adc05194611 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 17:07:10 +0530 +Subject: [PATCH 04/10] add additional cached gca config variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We need to cache some additional values to handle SR-IOV +and PG. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 3f5d2ad..40497c2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1135,6 +1135,16 @@ struct amdgpu_scratch { + /* + * GFX configurations + */ ++#define AMDGPU_GFX_MAX_SE 4 ++#define AMDGPU_GFX_MAX_SH_PER_SE 2 ++ ++struct amdgpu_rb_config { ++ uint32_t rb_backend_disable; ++ uint32_t user_rb_backend_disable; ++ uint32_t raster_config; ++ uint32_t raster_config_1; ++}; ++ + struct amdgpu_gca_config { + unsigned max_shader_engines; + unsigned max_tile_pipes; +@@ -1163,6 +1173,8 @@ struct amdgpu_gca_config { + + uint32_t tile_mode_array[32]; + uint32_t macrotile_mode_array[16]; ++ ++ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + }; + + struct amdgpu_gfx { +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch new file mode 100644 index 00000000..c1a271f2 --- /dev/null +++ b/common/recipes-kernel/linux/files/1143-implement-raster-configuration-for-gfx-v8.patch @@ -0,0 +1,262 @@ +From 705f105de150240594945703df70f82d5ab861ce Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 19:33:01 +0530 +Subject: [PATCH 05/10] implement raster configuration for gfx v8 + +This patch is to implement the raster configuration and harvested +configuration of gfx v8. + +Signed-off-by: Huang Rui <ray.huang@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 168 +++++++++++++++++++++++++++++++++- + drivers/gpu/drm/amd/amdgpu/vid.h | 37 ++++++++ + 2 files changed, 204 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index c5a3d04..20ac07f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3484,13 +3484,163 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) + return (~data) & mask; + } + ++static void ++gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) ++{ ++ switch (adev->asic_type) { ++ case CHIP_FIJI: ++ *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | ++ RB_XSEL2(1) | PKR_MAP(2) | ++ PKR_XSEL(1) | PKR_YSEL(1) | ++ SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); ++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | ++ SE_PAIR_YSEL(2); ++ break; ++ case CHIP_TONGA: ++ case CHIP_POLARIS10: ++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | ++ SE_XSEL(1) | SE_YSEL(1); ++ *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | ++ SE_PAIR_YSEL(2); ++ break; ++ case CHIP_TOPAZ: ++ case CHIP_CARRIZO: ++ *rconf |= RB_MAP_PKR0(2); ++ *rconf1 |= 0x0; ++ break; ++ case CHIP_POLARIS11: ++ *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | ++ SE_XSEL(1) | SE_YSEL(1); ++ *rconf1 |= 0x0; ++ break; ++ case CHIP_STONEY: ++ *rconf |= 0x0; ++ *rconf1 |= 0x0; ++ break; ++ default: ++ DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); ++ break; ++ } ++} ++ ++static void ++gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, ++ u32 raster_config, u32 raster_config_1, ++ unsigned rb_mask, unsigned num_rb) ++{ ++ unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); ++ unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); ++ unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); ++ unsigned rb_per_se = num_rb / num_se; ++ unsigned se_mask[4]; ++ unsigned se; ++ ++ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; ++ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; ++ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; ++ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; ++ ++ WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); ++ WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); ++ WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); ++ ++ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || ++ (!se_mask[2] && !se_mask[3]))) { ++ raster_config_1 &= ~SE_PAIR_MAP_MASK; ++ ++ if (!se_mask[0] && !se_mask[1]) { ++ raster_config_1 |= ++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); ++ } else { ++ raster_config_1 |= ++ SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); ++ } ++ } ++ ++ for (se = 0; se < num_se; se++) { ++ unsigned raster_config_se = raster_config; ++ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); ++ unsigned pkr1_mask = pkr0_mask << rb_per_pkr; ++ int idx = (se / 2) * 2; ++ ++ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { ++ raster_config_se &= ~SE_MAP_MASK; ++ ++ if (!se_mask[idx]) { ++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); ++ } else { ++ raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); ++ } ++ } ++ ++ pkr0_mask &= rb_mask; ++ pkr1_mask &= rb_mask; ++ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { ++ raster_config_se &= ~PKR_MAP_MASK; ++ ++ if (!pkr0_mask) { ++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); ++ } else { ++ raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); ++ } ++ } ++ ++ if (rb_per_se >= 2) { ++ unsigned rb0_mask = 1 << (se * rb_per_se); ++ unsigned rb1_mask = rb0_mask << 1; ++ ++ rb0_mask &= rb_mask; ++ rb1_mask &= rb_mask; ++ if (!rb0_mask || !rb1_mask) { ++ raster_config_se &= ~RB_MAP_PKR0_MASK; ++ ++ if (!rb0_mask) { ++ raster_config_se |= ++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); ++ } else { ++ raster_config_se |= ++ RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); ++ } ++ } ++ ++ if (rb_per_se > 2) { ++ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); ++ rb1_mask = rb0_mask << 1; ++ rb0_mask &= rb_mask; ++ rb1_mask &= rb_mask; ++ if (!rb0_mask || !rb1_mask) { ++ raster_config_se &= ~RB_MAP_PKR1_MASK; ++ ++ if (!rb0_mask) { ++ raster_config_se |= ++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); ++ } else { ++ raster_config_se |= ++ RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); ++ } ++ } ++ } ++ } ++ ++ /* GRBM_GFX_INDEX has a different offset on VI */ ++ gfx_v8_0_select_se_sh(adev, se, 0xffffffff); ++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); ++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); ++ } ++ ++ /* GRBM_GFX_INDEX has a different offset on VI */ ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++} ++ + static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + { + int i, j; + u32 data; ++ u32 raster_config = 0, raster_config_1 = 0; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; ++ unsigned num_rb_pipes; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { +@@ -3502,10 +3652,26 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + } + } + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); ++ ++ num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * ++ adev->gfx.config.max_shader_engines, 16); ++ ++ gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); ++ ++ if (!adev->gfx.config.backend_enable_mask || ++ adev->gfx.config.num_rbs >= num_rb_pipes) { ++ WREG32(mmPA_SC_RASTER_CONFIG, raster_config); ++ WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); ++ } else { ++ gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, ++ adev->gfx.config.backend_enable_mask, ++ num_rb_pipes); ++ } ++ ++ mutex_unlock(&adev->grbm_idx_mutex); + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h +index 3bf7172..4bd2bfd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vid.h ++++ b/drivers/gpu/drm/amd/amdgpu/vid.h +@@ -368,4 +368,41 @@ + #define VCE_CMD_IB_AUTO 0x00000005 + #define VCE_CMD_SEMAPHORE 0x00000006 + ++ ++/* mmPA_SC_RASTER_CONFIG mask */ ++#define RB_MAP_PKR0(x) ((x) << 0) ++#define RB_MAP_PKR0_MASK (0x3 << 0) ++#define RB_MAP_PKR1(x) ((x) << 2) ++#define RB_MAP_PKR1_MASK (0x3 << 2) ++#define RB_XSEL2(x) ((x) << 4) ++#define RB_XSEL2_MASK (0x3 << 4) ++#define RB_XSEL (1 << 6) ++#define RB_YSEL (1 << 7) ++#define PKR_MAP(x) ((x) << 8) ++#define PKR_MAP_MASK (0x3 << 8) ++#define PKR_XSEL(x) ((x) << 10) ++#define PKR_XSEL_MASK (0x3 << 10) ++#define PKR_YSEL(x) ((x) << 12) ++#define PKR_YSEL_MASK (0x3 << 12) ++#define SC_MAP(x) ((x) << 16) ++#define SC_MAP_MASK (0x3 << 16) ++#define SC_XSEL(x) ((x) << 18) ++#define SC_XSEL_MASK (0x3 << 18) ++#define SC_YSEL(x) ((x) << 20) ++#define SC_YSEL_MASK (0x3 << 20) ++#define SE_MAP(x) ((x) << 24) ++#define SE_MAP_MASK (0x3 << 24) ++#define SE_XSEL(x) ((x) << 26) ++#define SE_XSEL_MASK (0x3 << 26) ++#define SE_YSEL(x) ((x) << 28) ++#define SE_YSEL_MASK (0x3 << 28) ++ ++/* mmPA_SC_RASTER_CONFIG_1 mask */ ++#define SE_PAIR_MAP(x) ((x) << 0) ++#define SE_PAIR_MAP_MASK (0x3 << 0) ++#define SE_PAIR_XSEL(x) ((x) << 2) ++#define SE_PAIR_XSEL_MASK (0x3 << 2) ++#define SE_PAIR_YSEL(x) ((x) << 4) ++#define SE_PAIR_YSEL_MASK (0x3 << 4) ++ + #endif +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch new file mode 100644 index 00000000..0c8fe273 --- /dev/null +++ b/common/recipes-kernel/linux/files/1144-cache-rb-config-values.patch @@ -0,0 +1,46 @@ +From 0aaf3d10e376981da3d92f037c6e36a5c4e8d348 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 19:40:46 +0530 +Subject: [PATCH 06/10] cache rb config values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Needed when for SR-IOV and when PG is enabled. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 20ac07f..479047e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3671,6 +3671,21 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + num_rb_pipes); + } + ++ /* cache the values for userspace */ ++ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { ++ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { ++ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); ++ adev->gfx.config.rb_config[i][j].rb_backend_disable = ++ RREG32(mmCC_RB_BACKEND_DISABLE); ++ adev->gfx.config.rb_config[i][j].user_rb_backend_disable = ++ RREG32(mmGC_USER_RB_BACKEND_DISABLE); ++ adev->gfx.config.rb_config[i][j].raster_config = ++ RREG32(mmPA_SC_RASTER_CONFIG); ++ adev->gfx.config.rb_config[i][j].raster_config_1 = ++ RREG32(mmPA_SC_RASTER_CONFIG_1); ++ } ++ } ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch new file mode 100644 index 00000000..b3b19e7b --- /dev/null +++ b/common/recipes-kernel/linux/files/1145-use-cached-raster-config-values-in-csb.patch @@ -0,0 +1,61 @@ +From 9fcd43d6a79011dd9ab3837d38ba27454be747ad Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Tue, 6 Dec 2016 20:14:23 +0530 +Subject: [PATCH 07/10] use cached raster config values in csb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Simplify the code and properly set the csb for harvest values. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++---------------------------- + 1 file changed, 2 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 479047e..dcc59f3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -1110,34 +1110,8 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - + PACKET3_SET_CONTEXT_REG_START); +- switch (adev->asic_type) { +- case CHIP_TONGA: +- case CHIP_POLARIS10: +- buffer[count++] = cpu_to_le32(0x16000012); +- buffer[count++] = cpu_to_le32(0x0000002A); +- break; +- case CHIP_POLARIS11: +- buffer[count++] = cpu_to_le32(0x16000012); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- case CHIP_FIJI: +- buffer[count++] = cpu_to_le32(0x3a00161a); +- buffer[count++] = cpu_to_le32(0x0000002e); +- break; +- case CHIP_TOPAZ: +- case CHIP_CARRIZO: +- buffer[count++] = cpu_to_le32(0x00000002); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- case CHIP_STONEY: +- buffer[count++] = cpu_to_le32(0x00000000); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- default: +- buffer[count++] = cpu_to_le32(0x00000000); +- buffer[count++] = cpu_to_le32(0x00000000); +- break; +- } ++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); ++ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch new file mode 100644 index 00000000..ed7262c9 --- /dev/null +++ b/common/recipes-kernel/linux/files/1146-used-cached-gca-values-for-vi_read_register.patch @@ -0,0 +1,166 @@ +From 2ce0f44274368b2a6640c3062eb119a0de8c1056 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 7 Dec 2016 15:07:53 +0530 +Subject: [PATCH 08/10] used cached gca values for vi_read_register +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Using the cached values has less latency for bare metal +and SR-IOV, and prevents reading back bogus values if the +engine is powergated. + +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/vi.c | 115 +++++++++++++++++++++++++++++++++------- + 1 file changed, 96 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c +index 02ba429..3a42e83 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vi.c ++++ b/drivers/gpu/drm/amd/amdgpu/vi.c +@@ -513,21 +513,100 @@ static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = + {mmPA_SC_RASTER_CONFIG_1, false, true}, + }; + +-static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num, +- u32 sh_num, u32 reg_offset) ++static uint32_t vi_get_register_value(struct amdgpu_device *adev, ++ bool indexed, u32 se_num, ++ u32 sh_num, u32 reg_offset) + { +- uint32_t val; ++ if (indexed) { ++ uint32_t val; ++ unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num; ++ unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num; ++ ++ switch (reg_offset) { ++ case mmCC_RB_BACKEND_DISABLE: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable; ++ case mmGC_USER_RB_BACKEND_DISABLE: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable; ++ case mmPA_SC_RASTER_CONFIG: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config; ++ case mmPA_SC_RASTER_CONFIG_1: ++ return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1; ++ } + +- mutex_lock(&adev->grbm_idx_mutex); +- if (se_num != 0xffffffff || sh_num != 0xffffffff) +- gfx_v8_0_select_se_sh(adev, se_num, sh_num); ++ mutex_lock(&adev->grbm_idx_mutex); ++ if (se_num != 0xffffffff || sh_num != 0xffffffff) ++ gfx_v8_0_select_se_sh(adev, se_num, sh_num); + +- val = RREG32(reg_offset); ++ val = RREG32(reg_offset); + +- if (se_num != 0xffffffff || sh_num != 0xffffffff) +- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); +- mutex_unlock(&adev->grbm_idx_mutex); +- return val; ++ if (se_num != 0xffffffff || sh_num != 0xffffffff) ++ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); ++ mutex_unlock(&adev->grbm_idx_mutex); ++ return val; ++ } else { ++ unsigned idx; ++ ++ switch (reg_offset) { ++ case mmGB_ADDR_CONFIG: ++ return adev->gfx.config.gb_addr_config; ++ case mmMC_ARB_RAMCFG: ++ return adev->gfx.config.mc_arb_ramcfg; ++ case mmGB_TILE_MODE0: ++ case mmGB_TILE_MODE1: ++ case mmGB_TILE_MODE2: ++ case mmGB_TILE_MODE3: ++ case mmGB_TILE_MODE4: ++ case mmGB_TILE_MODE5: ++ case mmGB_TILE_MODE6: ++ case mmGB_TILE_MODE7: ++ case mmGB_TILE_MODE8: ++ case mmGB_TILE_MODE9: ++ case mmGB_TILE_MODE10: ++ case mmGB_TILE_MODE11: ++ case mmGB_TILE_MODE12: ++ case mmGB_TILE_MODE13: ++ case mmGB_TILE_MODE14: ++ case mmGB_TILE_MODE15: ++ case mmGB_TILE_MODE16: ++ case mmGB_TILE_MODE17: ++ case mmGB_TILE_MODE18: ++ case mmGB_TILE_MODE19: ++ case mmGB_TILE_MODE20: ++ case mmGB_TILE_MODE21: ++ case mmGB_TILE_MODE22: ++ case mmGB_TILE_MODE23: ++ case mmGB_TILE_MODE24: ++ case mmGB_TILE_MODE25: ++ case mmGB_TILE_MODE26: ++ case mmGB_TILE_MODE27: ++ case mmGB_TILE_MODE28: ++ case mmGB_TILE_MODE29: ++ case mmGB_TILE_MODE30: ++ case mmGB_TILE_MODE31: ++ idx = (reg_offset - mmGB_TILE_MODE0); ++ return adev->gfx.config.tile_mode_array[idx]; ++ case mmGB_MACROTILE_MODE0: ++ case mmGB_MACROTILE_MODE1: ++ case mmGB_MACROTILE_MODE2: ++ case mmGB_MACROTILE_MODE3: ++ case mmGB_MACROTILE_MODE4: ++ case mmGB_MACROTILE_MODE5: ++ case mmGB_MACROTILE_MODE6: ++ case mmGB_MACROTILE_MODE7: ++ case mmGB_MACROTILE_MODE8: ++ case mmGB_MACROTILE_MODE9: ++ case mmGB_MACROTILE_MODE10: ++ case mmGB_MACROTILE_MODE11: ++ case mmGB_MACROTILE_MODE12: ++ case mmGB_MACROTILE_MODE13: ++ case mmGB_MACROTILE_MODE14: ++ case mmGB_MACROTILE_MODE15: ++ idx = (reg_offset - mmGB_MACROTILE_MODE0); ++ return adev->gfx.config.macrotile_mode_array[idx]; ++ default: ++ return RREG32(reg_offset); ++ } ++ } + } + + static int vi_read_register(struct amdgpu_device *adev, u32 se_num, +@@ -562,10 +641,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, + if (reg_offset != asic_register_entry->reg_offset) + continue; + if (!asic_register_entry->untouched) +- *value = asic_register_entry->grbm_indexed ? +- vi_read_indexed_register(adev, se_num, +- sh_num, reg_offset) : +- RREG32(reg_offset); ++ *value = vi_get_register_value(adev, ++ asic_register_entry->grbm_indexed, ++ se_num, sh_num, reg_offset); + return 0; + } + } +@@ -575,10 +653,9 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, + continue; + + if (!vi_allowed_read_registers[i].untouched) +- *value = vi_allowed_read_registers[i].grbm_indexed ? +- vi_read_indexed_register(adev, se_num, +- sh_num, reg_offset) : +- RREG32(reg_offset); ++ *value = vi_get_register_value(adev, ++ vi_allowed_read_registers[i].grbm_indexed, ++ se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch new file mode 100644 index 00000000..3ee7aa52 --- /dev/null +++ b/common/recipes-kernel/linux/files/1147-Removed-extra-parameter.patch @@ -0,0 +1,26 @@ +From 7dc74a872ca0a5502f2c8e56fdfd9af97b8da1b6 Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Wed, 7 Dec 2016 21:00:00 +0530 +Subject: [PATCH 09/10] Removed extra parameter + +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index dcc59f3..d1cb4db 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3648,7 +3648,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) + /* cache the values for userspace */ + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { +- gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); ++ gfx_v8_0_select_se_sh(adev, i, j); + adev->gfx.config.rb_config[i][j].rb_backend_disable = + RREG32(mmCC_RB_BACKEND_DISABLE); + adev->gfx.config.rb_config[i][j].user_rb_backend_disable = +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch new file mode 100644 index 00000000..4416497c --- /dev/null +++ b/common/recipes-kernel/linux/files/1148-refine-pg-code-for-gfx_v8.patch @@ -0,0 +1,204 @@ +From c5ef870413c64c25cfe2a646c395b0c0d293a4f5 Mon Sep 17 00:00:00 2001 +From: Ravi Patlegar <ravi.patlegar@amd.com> +Date: Tue, 13 Dec 2016 16:28:54 +0530 +Subject: [PATCH 10/10] refine pg code for gfx_v8. + +1. bit CP_PG_DISABLE was reversed. +2. load RLC_SRM_INDEX_CNTL_ADDR/DATA_x pairs + with valid addr/data. +3. always init gfx pg. +4. delete repeated check for pg mask. + +Signed-off-by: Rex Zhu <Rex.Zhu@amd.com> +Signed-off-by: Ravi Patlegar <ravi.patlegar@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 118 ++++++++++++---------------------- + 2 files changed, 44 insertions(+), 76 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 40497c2..af04d3b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -2211,6 +2211,8 @@ bool amdgpu_device_has_dal_support(struct amdgpu_device *adev); + #define REG_GET_FIELD(value, reg, field) \ + (((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field)) + ++#define WREG32_FIELD(reg, field, val) \ ++ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + /* + * BIOS helpers. + */ +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index d1cb4db..b4c41f9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3934,8 +3934,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) + temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; + data = mmRLC_SRM_INDEX_CNTL_DATA_0; + for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { +- amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); +- amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); ++ if (unique_indices[i] != 0) { ++ amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); ++ amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); ++ } + } + kfree(register_list_format); + +@@ -3955,32 +3957,17 @@ static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) + { + uint32_t data; + +- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG | +- AMDGPU_PG_SUPPORT_GFX_SMG | +- AMDGPU_PG_SUPPORT_GFX_DMG)) { +- data = RREG32(mmCP_RB_WPTR_POLL_CNTL); +- data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; +- data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); +- WREG32(mmCP_RB_WPTR_POLL_CNTL, data); +- +- data = 0; +- data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); +- data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); +- WREG32(mmRLC_PG_DELAY, data); +- +- data = RREG32(mmRLC_PG_DELAY_2); +- data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; +- data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); +- WREG32(mmRLC_PG_DELAY_2, data); +- +- data = RREG32(mmRLC_AUTO_PG_CTRL); +- data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; +- data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); +- WREG32(mmRLC_AUTO_PG_CTRL, data); +- } +- } ++ WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); ++ ++ data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); ++ data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); ++ WREG32(mmRLC_PG_DELAY, data); ++ ++ WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); ++ WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); ++} + + static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, + bool enable) +@@ -4016,18 +4003,8 @@ static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, + + static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) + { +- u32 data, orig; +- +- orig = data = RREG32(mmRLC_PG_CNTL); +- +- if (enable) +- data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; +- else +- data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; +- +- if (orig != data) +- WREG32(mmRLC_PG_CNTL, data); +- } ++ WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); ++} + + static void polaris11_init_power_gating(struct amdgpu_device *adev) + { +@@ -4062,39 +4039,30 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev) + + static void gfx_v8_0_init_pg(struct amdgpu_device *adev) + { +- if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | +- AMD_PG_SUPPORT_GFX_SMG | +- AMD_PG_SUPPORT_GFX_DMG | +- AMD_PG_SUPPORT_CP | +- AMD_PG_SUPPORT_GDS | +- AMD_PG_SUPPORT_RLC_SMU_HS)) { +- gfx_v8_0_init_csb(adev); +- gfx_v8_0_init_save_restore_list(adev); +- gfx_v8_0_enable_save_restore_machine(adev); ++ gfx_v8_0_init_csb(adev); ++ gfx_v8_0_init_save_restore_list(adev); ++ gfx_v8_0_enable_save_restore_machine(adev); + +- if ((adev->asic_type == CHIP_CARRIZO) || +- (adev->asic_type == CHIP_STONEY)) { +- struct amdgpu_cu_info cu_info; +- +- gfx_v8_0_get_cu_info(adev, &cu_info); +- +- WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); +- gfx_v8_0_init_power_gating(adev); +- WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask); +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) { +- cz_enable_sck_slow_down_on_power_up(adev, true); +- cz_enable_sck_slow_down_on_power_down(adev, true); +- } else { +- cz_enable_sck_slow_down_on_power_up(adev, false); +- cz_enable_sck_slow_down_on_power_down(adev, false); +- } +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP) +- cz_enable_cp_power_gating(adev, true); +- else +- cz_enable_cp_power_gating(adev, false); +- } else if (adev->asic_type == CHIP_POLARIS11) { +- polaris11_init_power_gating(adev); ++ if ((adev->asic_type == CHIP_CARRIZO) || ++ (adev->asic_type == CHIP_STONEY)) { ++ struct amdgpu_cu_info cu_info; ++ gfx_v8_0_get_cu_info(adev, &cu_info); ++ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); ++ gfx_v8_0_init_power_gating(adev); ++ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, cu_info.ao_cu_mask); ++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS) { ++ cz_enable_sck_slow_down_on_power_up(adev, true); ++ cz_enable_sck_slow_down_on_power_down(adev, true); ++ } else { ++ cz_enable_sck_slow_down_on_power_up(adev, false); ++ cz_enable_sck_slow_down_on_power_down(adev, false); + } ++ if (adev->pg_flags & AMDGPU_PG_SUPPORT_CP) ++ cz_enable_cp_power_gating(adev, true); ++ else ++ cz_enable_cp_power_gating(adev, false); ++ } else if (adev->asic_type == CHIP_POLARIS11) { ++ polaris11_init_power_gating(adev); + } + } + +@@ -5513,7 +5481,7 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, + if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PIPELINE) + cz_enable_gfx_pipeline_power_gating(adev, true); + } else { +- cz_enable_gfx_cg_power_gating(adev, false); ++ cz_enable_gfx_cg_power_gating(adev, true); + cz_enable_gfx_pipeline_power_gating(adev, false); + } + } +@@ -5524,14 +5492,12 @@ static int gfx_v8_0_set_powergating_state(void *handle, + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + +- if (!(adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) +- return 0; + + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: +- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) +- cz_update_gfx_cg_power_gating(adev, enable); ++ ++ cz_update_gfx_cg_power_gating(adev, enable); + + if ((adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG) && enable) + gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); +-- +2.7.4 + diff --git a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc index cc005c0c..f4fdebb6 100644 --- a/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc +++ b/common/recipes-kernel/linux/files/linux-yocto-amd-patches.scc @@ -1139,3 +1139,14 @@ patch 1137-drm-amdgpu-acp-fix-resume-on-CZ-systems-with-AZ-audi.patch patch 0001-amdgpu-fix-various-compilation-issues.patch patch CVE-2016-5195.patch patch 0001-random-replace-non-blocking-pool-with-a-Chacha20-bas.patch +patch 1138-add-new-semaphore-object-in-kernel-side.patch +patch 1139-unify-memory-query-info-interface.patch +patch 1140-dma-buf-return-index-of-the-first-signaled-fence.patch +patch 1141-Fix-a-deadlock-affecting-ww_mutexes.patch +patch 1142-add-additional-cached-gca-config-variables.patch +patch 1143-implement-raster-configuration-for-gfx-v8.patch +patch 1144-cache-rb-config-values.patch +patch 1145-use-cached-raster-config-values-in-csb.patch +patch 1146-used-cached-gca-values-for-vi_read_register.patch +patch 1147-Removed-extra-parameter.patch +patch 1148-refine-pg-code-for-gfx_v8.patch diff --git a/common/recipes-support/libffi/libffi_3.2.1.bbappend b/common/recipes-support/libffi/libffi_3.2.1.bbappend new file mode 100644 index 00000000..f3a1c13d --- /dev/null +++ b/common/recipes-support/libffi/libffi_3.2.1.bbappend @@ -0,0 +1,4 @@ +do_install_append() { + install -d ${D}${includedir} + install -m 0644 ${B}/include/ffi.h ${D}${includedir} +} diff --git a/meta-amdfalconx86/conf/machine/amdfalconx86.conf b/meta-amdfalconx86/conf/machine/amdfalconx86.conf index 42546b8c..648510db 100644 --- a/meta-amdfalconx86/conf/machine/amdfalconx86.conf +++ b/meta-amdfalconx86/conf/machine/amdfalconx86.conf @@ -39,7 +39,7 @@ SERIAL_CONSOLES ?= "115200;ttyS0 115200;ttyUSB0 115200;ttyS4 115200;ttyS5" KERNEL_SERIAL_CONSOLE ?= "console=ttyS0,115200n8" # Enable powerplay -APPEND += "amdgpu.powerplay=1 amdgpu.pg_mask=0" +APPEND += "amdgpu.powerplay=1" TOOLCHAIN_HOST_TASK_append_mel = " ${@bb.utils.contains('INCLUDE_VULKAN', 'yes', "nativesdk-glslang", "", d)}" |