diff options
Diffstat (limited to 'meta-amdfalconx86/recipes-graphics/drm/libdrm/0001-drm-add-libdrm_amdgpu.patch')
-rw-r--r-- | meta-amdfalconx86/recipes-graphics/drm/libdrm/0001-drm-add-libdrm_amdgpu.patch | 5665 |
1 files changed, 5665 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-graphics/drm/libdrm/0001-drm-add-libdrm_amdgpu.patch b/meta-amdfalconx86/recipes-graphics/drm/libdrm/0001-drm-add-libdrm_amdgpu.patch new file mode 100644 index 00000000..1609db27 --- /dev/null +++ b/meta-amdfalconx86/recipes-graphics/drm/libdrm/0001-drm-add-libdrm_amdgpu.patch @@ -0,0 +1,5665 @@ +From 7a6c09a5a0b17e9e981424fe35ea3492369d4eab Mon Sep 17 00:00:00 2001 +From: Alex Deucher <alexander.deucher@amd.com> +Date: Mon, 20 Apr 2015 12:04:22 -0400 +Subject: [PATCH 1/3] drm: add libdrm_amdgpu + +This is the new ioctl wrapper used by the new admgpu driver. +It's primarily used by xf86-video-amdgpu and mesa. + +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Arindam Nath <arindam.nath@amd.com> +--- + Makefile.am | 5 + + amdgpu/Makefile.am | 55 ++ + amdgpu/amdgpu.h | 1278 ++++++++++++++++++++++++++++++++++++++++++++ + amdgpu/amdgpu_bo.c | 622 +++++++++++++++++++++ + amdgpu/amdgpu_cs.c | 981 ++++++++++++++++++++++++++++++++++ + amdgpu/amdgpu_device.c | 242 +++++++++ + amdgpu/amdgpu_gpu_info.c | 275 ++++++++++ + amdgpu/amdgpu_internal.h | 210 ++++++++ + amdgpu/amdgpu_vamgr.c | 169 ++++++ + amdgpu/libdrm_amdgpu.pc.in | 10 + + amdgpu/util_double_list.h | 146 +++++ + amdgpu/util_hash.c | 382 +++++++++++++ + amdgpu/util_hash.h | 99 ++++ + amdgpu/util_hash_table.c | 257 +++++++++ + amdgpu/util_hash_table.h | 65 +++ + amdgpu/util_math.h | 32 ++ + configure.ac | 20 + + include/drm/amdgpu_drm.h | 600 +++++++++++++++++++++ + 18 files changed, 5448 insertions(+) + create mode 100644 amdgpu/Makefile.am + create mode 100644 amdgpu/amdgpu.h + create mode 100644 amdgpu/amdgpu_bo.c + create mode 100644 amdgpu/amdgpu_cs.c + create mode 100644 amdgpu/amdgpu_device.c + create mode 100644 amdgpu/amdgpu_gpu_info.c + create mode 100644 amdgpu/amdgpu_internal.h + create mode 100644 amdgpu/amdgpu_vamgr.c + create mode 100644 amdgpu/libdrm_amdgpu.pc.in + create mode 100644 amdgpu/util_double_list.h + create mode 100644 amdgpu/util_hash.c + create mode 100644 amdgpu/util_hash.h + create mode 100644 amdgpu/util_hash_table.c + create mode 100644 amdgpu/util_hash_table.h + create mode 100644 amdgpu/util_math.h + create mode 100644 include/drm/amdgpu_drm.h + +diff --git a/Makefile.am b/Makefile.am +index 42d3d7f..5defeb2 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -57,6 +57,10 @@ if HAVE_RADEON + RADEON_SUBDIR = radeon + endif + ++if HAVE_AMDGPU ++AMDGPU_SUBDIR = amdgpu ++endif ++ + if HAVE_OMAP + OMAP_SUBDIR = omap + endif +@@ -79,6 +83,7 @@ SUBDIRS = \ + $(INTEL_SUBDIR) \ + $(NOUVEAU_SUBDIR) \ + $(RADEON_SUBDIR) \ ++ $(AMDGPU_SUBDIR) \ + $(OMAP_SUBDIR) \ + $(EXYNOS_SUBDIR) \ + $(FREEDRENO_SUBDIR) \ +diff --git a/amdgpu/Makefile.am b/amdgpu/Makefile.am +new file mode 100644 +index 0000000..9baf194 +--- /dev/null ++++ b/amdgpu/Makefile.am +@@ -0,0 +1,55 @@ ++# Copyright © 2008 Jérôme Glisse ++# ++# Permission is hereby granted, free of charge, to any person obtaining a ++# copy of this software and associated documentation files (the "Software"), ++# to deal in the Software without restriction, including without limitation ++# the rights to use, copy, modify, merge, publish, distribute, sublicense, ++# and/or sell copies of the Software, and to permit persons to whom the ++# Software is furnished to do so, subject to the following conditions: ++# ++# The above copyright notice and this permission notice (including the next ++# paragraph) shall be included in all copies or substantial portions of the ++# Software. ++# ++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++# IN THE SOFTWARE. ++# ++# Authors: ++# Jérôme Glisse <glisse@freedesktop.org> ++ ++AM_CFLAGS = \ ++ $(WARN_CFLAGS) -Wno-switch-enum \ ++ -I$(top_srcdir) \ ++ -I$(top_srcdir)/amdgpu \ ++ $(PTHREADSTUBS_CFLAGS) \ ++ -I$(top_srcdir)/include/drm ++ ++libdrm_amdgpu_la_LTLIBRARIES = libdrm_amdgpu.la ++libdrm_amdgpu_ladir = $(libdir) ++libdrm_amdgpu_la_LDFLAGS = -version-number 1:0:1 -no-undefined ++libdrm_amdgpu_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@ ++ ++libdrm_amdgpu_la_SOURCES = \ ++ amdgpu_gpu_info.c \ ++ amdgpu_device.c \ ++ amdgpu_bo.c \ ++ util_hash.c \ ++ util_hash_table.c \ ++ amdgpu_vamgr.c \ ++ amdgpu_cs.c ++ ++nodist_EXTRA_libdrm_amdgpu_la_SOURCES = dummy.cxx ++ ++libdrm_amdgpuincludedir = ${includedir}/libdrm ++libdrm_amdgpuinclude_HEADERS = \ ++ amdgpu.h ++ ++pkgconfigdir = @pkgconfigdir@ ++pkgconfig_DATA = libdrm_amdgpu.pc ++ ++EXTRA_DIST = libdrm_amdgpu.pc.in +diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h +new file mode 100644 +index 0000000..90dc33c +--- /dev/null ++++ b/amdgpu/amdgpu.h +@@ -0,0 +1,1278 @@ ++/* ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++*/ ++ ++/** ++ * \file amdgpu.h ++ * ++ * Declare public libdrm_amdgpu API ++ * ++ * This file define API exposed by libdrm_amdgpu library. ++ * User wanted to use libdrm_amdgpu functionality must include ++ * this file. ++ * ++ */ ++#ifndef _amdgpu_h_ ++#define _amdgpu_h_ ++ ++#include <stdint.h> ++#include <stdbool.h> ++ ++struct drm_amdgpu_info_hw_ip; ++ ++/*--------------------------------------------------------------------------*/ ++/* --------------------------- Defines ------------------------------------ */ ++/*--------------------------------------------------------------------------*/ ++ ++/** ++ * Define max. number of Command Buffers (IB) which could be sent to the single ++ * hardware IP to accommodate CE/DE requirements ++ * ++ * \sa amdgpu_cs_ib_info ++*/ ++#define AMDGPU_CS_MAX_IBS_PER_SUBMIT 4 ++ ++/** ++ * ++ */ ++#define AMDGPU_TIMEOUT_INFINITE 0xffffffffffffffffull ++ ++/** ++ * The special flag for GFX submission to identify that this is CE IB ++ * \sa amdgpu_cs_ib_info ++*/ ++#define AMDGPU_CS_GFX_IB_CE 0x1 ++ ++/** ++ * The special flag to mark that this IB will re-used ++ * by client and should not be automatically return back ++ * to free pool by libdrm_amdgpu when submission is completed. ++ * ++ * \sa amdgpu_cs_ib_info ++*/ ++#define AMDGPU_CS_REUSE_IB 0x2 ++ ++/** ++ * The special resource flag for IB submission. ++ * When VRAM is full, some resources may be moved to GTT to make place ++ * for other resources which want to be in VRAM. This flag affects the order ++ * in which resources are moved back to VRAM until there is no space there. ++ * The resources with the highest priority will be moved first. ++ * The value can be between 0 and 15, inclusive. ++ */ ++#define AMDGPU_IB_RESOURCE_PRIORITY(x) ((x) & 0xf) ++ ++ ++/*--------------------------------------------------------------------------*/ ++/* ----------------------------- Enums ------------------------------------ */ ++/*--------------------------------------------------------------------------*/ ++ ++/** ++ * Enum describing possible handle types ++ * ++ * \sa amdgpu_bo_import, amdgpu_bo_export ++ * ++*/ ++enum amdgpu_bo_handle_type { ++ /** GEM flink name (needs DRM authentication, used by DRI2) */ ++ amdgpu_bo_handle_type_gem_flink_name = 0, ++ ++ /** KMS handle which is used by all driver ioctls */ ++ amdgpu_bo_handle_type_kms = 1, ++ ++ /** DMA-buf fd handle */ ++ amdgpu_bo_handle_type_dma_buf_fd = 2 ++}; ++ ++/** ++ * Enum describing possible context reset states ++ * ++ * \sa amdgpu_cs_query_reset_state() ++ * ++*/ ++enum amdgpu_cs_ctx_reset_state { ++ /** No reset was detected */ ++ amdgpu_cs_reset_no_error = 0, ++ ++ /** Reset/TDR was detected and context caused */ ++ amdgpu_cs_reset_guilty = 1, ++ ++ /** Reset/TDR was detected caused by other context */ ++ amdgpu_cs_reset_innocent = 2, ++ ++ /** Reset TDR was detected by cause of it unknown */ ++ amdgpu_cs_reset_unknown = 3 ++}; ++ ++/** ++ * For performance reasons and to simplify logic libdrm_amdgpu will handle ++ * IBs only some pre-defined sizes. ++ * ++ * \sa amdgpu_cs_alloc_ib() ++ */ ++enum amdgpu_cs_ib_size { ++ amdgpu_cs_ib_size_4K = 1, ++ amdgpu_cs_ib_size_16K = 2, ++ amdgpu_cs_ib_size_32K = 3, ++ amdgpu_cs_ib_size_64K = 4, ++ amdgpu_cs_ib_size_128K = 5 ++}; ++ ++/** The number of different IB sizes */ ++#define AMDGPU_CS_IB_SIZE_NUM 6 ++ ++ ++/*--------------------------------------------------------------------------*/ ++/* -------------------------- Datatypes ----------------------------------- */ ++/*--------------------------------------------------------------------------*/ ++ ++/** ++ * Define opaque pointer to context associated with fd. ++ * This context will be returned as the result of ++ * "initialize" function and should be pass as the first ++ * parameter to any API call ++ */ ++typedef struct amdgpu_device *amdgpu_device_handle; ++ ++/** ++ * Define GPU Context type as pointer to opaque structure ++ * Example of GPU Context is the "rendering" context associated ++ * with OpenGL context (glCreateContext) ++ */ ++typedef struct amdgpu_context *amdgpu_context_handle; ++ ++/** ++ * Define handle for amdgpu resources: buffer, GDS, etc. ++ */ ++typedef struct amdgpu_bo *amdgpu_bo_handle; ++ ++/** ++ * Define handle to be used when dealing with command ++ * buffers (a.k.a. ibs) ++ * ++ */ ++typedef struct amdgpu_ib *amdgpu_ib_handle; ++ ++ ++/*--------------------------------------------------------------------------*/ ++/* -------------------------- Structures ---------------------------------- */ ++/*--------------------------------------------------------------------------*/ ++ ++/** ++ * Structure describing memory allocation request ++ * ++ * \sa amdgpu_bo_alloc() ++ * ++*/ ++struct amdgpu_bo_alloc_request { ++ /** Allocation request. It must be aligned correctly. */ ++ uint64_t alloc_size; ++ ++ /** ++ * It may be required to have some specific alignment requirements ++ * for physical back-up storage (e.g. for displayable surface). ++ * If 0 there is no special alignment requirement ++ */ ++ uint64_t phys_alignment; ++ ++ /** ++ * UMD should specify where to allocate memory and how it ++ * will be accessed by the CPU. ++ */ ++ uint32_t preferred_heap; ++ ++ /** Additional flags passed on allocation */ ++ uint64_t flags; ++}; ++ ++/** ++ * Structure describing memory allocation request ++ * ++ * \sa amdgpu_bo_alloc() ++*/ ++struct amdgpu_bo_alloc_result { ++ /** Assigned virtual MC Base Address */ ++ uint64_t virtual_mc_base_address; ++ ++ /** Handle of allocated memory to be used by the given process only. */ ++ amdgpu_bo_handle buf_handle; ++}; ++ ++/** ++ * Special UMD specific information associated with buffer. ++ * ++ * It may be need to pass some buffer charactersitic as part ++ * of buffer sharing. Such information are defined UMD and ++ * opaque for libdrm_amdgpu as well for kernel driver. ++ * ++ * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info, ++ * amdgpu_bo_import(), amdgpu_bo_export ++ * ++*/ ++struct amdgpu_bo_metadata { ++ /** Special flag associated with surface */ ++ uint64_t flags; ++ ++ /** ++ * ASIC-specific tiling information (also used by DCE). ++ * The encoding is defined by the AMDGPU_TILING_* definitions. ++ */ ++ uint64_t tiling_info; ++ ++ /** Size of metadata associated with the buffer, in bytes. */ ++ uint32_t size_metadata; ++ ++ /** UMD specific metadata. Opaque for kernel */ ++ uint32_t umd_metadata[64]; ++}; ++ ++/** ++ * Structure describing allocated buffer. Client may need ++ * to query such information as part of 'sharing' buffers mechanism ++ * ++ * \sa amdgpu_bo_set_metadata(), amdgpu_bo_query_info(), ++ * amdgpu_bo_import(), amdgpu_bo_export() ++*/ ++struct amdgpu_bo_info { ++ /** Allocated memory size */ ++ uint64_t alloc_size; ++ ++ /** ++ * It may be required to have some specific alignment requirements ++ * for physical back-up storage. ++ */ ++ uint64_t phys_alignment; ++ ++ /** ++ * Assigned virtual MC Base Address. ++ * \note This information will be returned only if this buffer was ++ * allocated in the same process otherwise 0 will be returned. ++ */ ++ uint64_t virtual_mc_base_address; ++ ++ /** Heap where to allocate memory. */ ++ uint32_t preferred_heap; ++ ++ /** Additional allocation flags. */ ++ uint64_t alloc_flags; ++ ++ /** Metadata associated with buffer if any. */ ++ struct amdgpu_bo_metadata metadata; ++}; ++ ++/** ++ * Structure with information about "imported" buffer ++ * ++ * \sa amdgpu_bo_import() ++ * ++ */ ++struct amdgpu_bo_import_result { ++ /** Handle of memory/buffer to use */ ++ amdgpu_bo_handle buf_handle; ++ ++ /** Buffer size */ ++ uint64_t alloc_size; ++ ++ /** Assigned virtual MC Base Address */ ++ uint64_t virtual_mc_base_address; ++}; ++ ++ ++/** ++ * ++ * Structure to describe GDS partitioning information. ++ * \note OA and GWS resources are asscoiated with GDS partition ++ * ++ * \sa amdgpu_gpu_resource_query_gds_info ++ * ++*/ ++struct amdgpu_gds_resource_info { ++ uint32_t gds_gfx_partition_size; ++ uint32_t compute_partition_size; ++ uint32_t gds_total_size; ++ uint32_t gws_per_gfx_partition; ++ uint32_t gws_per_compute_partition; ++ uint32_t oa_per_gfx_partition; ++ uint32_t oa_per_compute_partition; ++}; ++ ++ ++ ++/** ++ * Structure describing result of request to allocate GDS ++ * ++ * \sa amdgpu_gpu_resource_gds_alloc ++ * ++*/ ++struct amdgpu_gds_alloc_info { ++ /** Handle assigned to gds allocation */ ++ amdgpu_bo_handle resource_handle; ++ ++ /** How much was really allocated */ ++ uint32_t gds_memory_size; ++ ++ /** Number of GWS resources allocated */ ++ uint32_t gws; ++ ++ /** Number of OA resources allocated */ ++ uint32_t oa; ++}; ++ ++/** ++ * Structure to described allocated command buffer (a.k.a. IB) ++ * ++ * \sa amdgpu_cs_alloc_ib() ++ * ++*/ ++struct amdgpu_cs_ib_alloc_result { ++ /** IB allocation handle */ ++ amdgpu_ib_handle handle; ++ ++ /** Assigned GPU VM MC Address of command buffer */ ++ uint64_t mc_address; ++ ++ /** Address to be used for CPU access */ ++ void *cpu; ++}; ++ ++/** ++ * Structure describing IB ++ * ++ * \sa amdgpu_cs_request, amdgpu_cs_submit() ++ * ++*/ ++struct amdgpu_cs_ib_info { ++ /** Special flags */ ++ uint64_t flags; ++ ++ /** Handle of command buffer */ ++ amdgpu_ib_handle ib_handle; ++ ++ /** ++ * Size of Command Buffer to be submitted. ++ * - The size is in units of dwords (4 bytes). ++ * - Must be less or equal to the size of allocated IB ++ * - Could be 0 ++ */ ++ uint32_t size; ++}; ++ ++/** ++ * Structure describing submission request ++ * ++ * \note We could have several IBs as packet. e.g. CE, CE, DE case for gfx ++ * ++ * \sa amdgpu_cs_submit() ++*/ ++struct amdgpu_cs_request { ++ /** Specify flags with additional information */ ++ uint64_t flags; ++ ++ /** Specify HW IP block type to which to send the IB. */ ++ unsigned ip_type; ++ ++ /** IP instance index if there are several IPs of the same type. */ ++ unsigned ip_instance; ++ ++ /** ++ * Specify ring index of the IP. We could have several rings ++ * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1. ++ */ ++ uint32_t ring; ++ ++ /** ++ * Specify number of resource handles passed. ++ * Size of 'handles' array ++ * ++ */ ++ uint32_t number_of_resources; ++ ++ /** Array of resources used by submission. */ ++ amdgpu_bo_handle *resources; ++ ++ /** Array of resources flags. This is optional and can be NULL. */ ++ uint8_t *resource_flags; ++ ++ /** Number of IBs to submit in the field ibs. */ ++ uint32_t number_of_ibs; ++ ++ /** ++ * IBs to submit. Those IBs will be submit together as single entity ++ */ ++ struct amdgpu_cs_ib_info *ibs; ++}; ++ ++/** ++ * Structure describing request to check submission state using fence ++ * ++ * \sa amdgpu_cs_query_fence_status() ++ * ++*/ ++struct amdgpu_cs_query_fence { ++ ++ /** In which context IB was sent to execution */ ++ amdgpu_context_handle context; ++ ++ /** Timeout in nanoseconds. */ ++ uint64_t timeout_ns; ++ ++ /** To which HW IP type the fence belongs */ ++ unsigned ip_type; ++ ++ /** IP instance index if there are several IPs of the same type. */ ++ unsigned ip_instance; ++ ++ /** Ring index of the HW IP */ ++ uint32_t ring; ++ ++ /** Flags */ ++ uint64_t flags; ++ ++ /** Specify fence for which we need to check ++ * submission status.*/ ++ uint64_t fence; ++}; ++ ++/** ++ * Structure which provide information about GPU VM MC Address space ++ * alignments requirements ++ * ++ * \sa amdgpu_query_buffer_size_alignment ++ */ ++struct amdgpu_buffer_size_alignments { ++ /** Size alignment requirement for allocation in ++ * local memory */ ++ uint64_t size_local; ++ ++ /** ++ * Size alignment requirement for allocation in remote memory ++ */ ++ uint64_t size_remote; ++}; ++ ++ ++/** ++ * Structure which provide information about heap ++ * ++ * \sa amdgpu_query_heap_info() ++ * ++ */ ++struct amdgpu_heap_info { ++ /** Theoretical max. available memory in the given heap */ ++ uint64_t heap_size; ++ ++ /** ++ * Number of bytes allocated in the heap. This includes all processes ++ * and private allocations in the kernel. It changes when new buffers ++ * are allocated, freed, and moved. It cannot be larger than ++ * heap_size. ++ */ ++ uint64_t heap_usage; ++ ++ /** ++ * Theoretical possible max. size of buffer which ++ * could be allocated in the given heap ++ */ ++ uint64_t max_allocation; ++}; ++ ++ ++ ++/** ++ * Describe GPU h/w info needed for UMD correct initialization ++ * ++ * \sa amdgpu_query_gpu_info() ++*/ ++struct amdgpu_gpu_info { ++ /** Asic id */ ++ uint32_t asic_id; ++ /**< Chip revision */ ++ uint32_t chip_rev; ++ /** Chip external revision */ ++ uint32_t chip_external_rev; ++ /** Family ID */ ++ uint32_t family_id; ++ /** Special flags */ ++ uint64_t ids_flags; ++ /** max engine clock*/ ++ uint64_t max_engine_clk; ++ /** number of shader engines */ ++ uint32_t num_shader_engines; ++ /** number of shader arrays per engine */ ++ uint32_t num_shader_arrays_per_engine; ++ /** Number of available good shader pipes */ ++ uint32_t avail_quad_shader_pipes; ++ /** Max. number of shader pipes.(including good and bad pipes */ ++ uint32_t max_quad_shader_pipes; ++ /** Number of parameter cache entries per shader quad pipe */ ++ uint32_t cache_entries_per_quad_pipe; ++ /** Number of available graphics context */ ++ uint32_t num_hw_gfx_contexts; ++ /** Number of render backend pipes */ ++ uint32_t rb_pipes; ++ /** Active render backend pipe number */ ++ uint32_t active_rb_pipes; ++ /** Enabled render backend pipe mask */ ++ uint32_t enabled_rb_pipes_mask; ++ /** Frequency of GPU Counter */ ++ uint32_t gpu_counter_freq; ++ /** CC_RB_BACKEND_DISABLE.BACKEND_DISABLE per SE */ ++ uint32_t backend_disable[4]; ++ /** Value of MC_ARB_RAMCFG register*/ ++ uint32_t mc_arb_ramcfg; ++ /** Value of GB_ADDR_CONFIG */ ++ uint32_t gb_addr_cfg; ++ /** Values of the GB_TILE_MODE0..31 registers */ ++ uint32_t gb_tile_mode[32]; ++ /** Values of GB_MACROTILE_MODE0..15 registers */ ++ uint32_t gb_macro_tile_mode[16]; ++ /** Value of PA_SC_RASTER_CONFIG register per SE */ ++ uint32_t pa_sc_raster_cfg[4]; ++ /** Value of PA_SC_RASTER_CONFIG_1 register per SE */ ++ uint32_t pa_sc_raster_cfg1[4]; ++ /* CU info */ ++ uint32_t cu_active_number; ++ uint32_t cu_ao_mask; ++ uint32_t cu_bitmap[4][4]; ++}; ++ ++ ++/*--------------------------------------------------------------------------*/ ++/*------------------------- Functions --------------------------------------*/ ++/*--------------------------------------------------------------------------*/ ++ ++/* ++ * Initialization / Cleanup ++ * ++*/ ++ ++ ++/** ++ * ++ * \param fd - \c [in] File descriptor for AMD GPU device ++ * received previously as the result of ++ * e.g. drmOpen() call. ++ * For legacy fd type, the DRI2/DRI3 authentication ++ * should be done before calling this function. ++ * \param major_version - \c [out] Major version of library. It is assumed ++ * that adding new functionality will cause ++ * increase in major version ++ * \param minor_version - \c [out] Minor version of library ++ * \param device_handle - \c [out] Pointer to opaque context which should ++ * be passed as the first parameter on each ++ * API call ++ * ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * ++ * \sa amdgpu_device_deinitialize() ++*/ ++int amdgpu_device_initialize(int fd, ++ uint32_t *major_version, ++ uint32_t *minor_version, ++ amdgpu_device_handle *device_handle); ++ ++ ++ ++/** ++ * ++ * When access to such library does not needed any more the special ++ * function must be call giving opportunity to clean up any ++ * resources if needed. ++ * ++ * \param device_handle - \c [in] Context associated with file ++ * descriptor for AMD GPU device ++ * received previously as the ++ * result e.g. of drmOpen() call. ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_device_initialize() ++ * ++*/ ++int amdgpu_device_deinitialize(amdgpu_device_handle device_handle); ++ ++ ++/* ++ * Memory Management ++ * ++*/ ++ ++/** ++ * Allocate memory to be used by UMD for GPU related operations ++ * ++ * \param dev - \c [in] Device handle. ++ * See #amdgpu_device_initialize() ++ * \param alloc_buffer - \c [in] Pointer to the structure describing an ++ * allocation request ++ * \param info - \c [out] Pointer to structure which return ++ * information about allocated memory ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_bo_free() ++*/ ++int amdgpu_bo_alloc(amdgpu_device_handle dev, ++ struct amdgpu_bo_alloc_request *alloc_buffer, ++ struct amdgpu_bo_alloc_result *info); ++ ++/** ++ * Associate opaque data with buffer to be queried by another UMD ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param buf_handle - \c [in] Buffer handle ++ * \param info - \c [in] Metadata to associated with buffer ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++*/ ++int amdgpu_bo_set_metadata(amdgpu_bo_handle buf_handle, ++ struct amdgpu_bo_metadata *info); ++ ++/** ++ * Query buffer information including metadata previusly associated with ++ * buffer. ++ * ++ * \param dev - \c [in] Device handle. ++ * See #amdgpu_device_initialize() ++ * \param buf_handle - \c [in] Buffer handle ++ * \param info - \c [out] Structure describing buffer ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc() ++*/ ++int amdgpu_bo_query_info(amdgpu_bo_handle buf_handle, ++ struct amdgpu_bo_info *info); ++ ++/** ++ * Allow others to get access to buffer ++ * ++ * \param dev - \c [in] Device handle. ++ * See #amdgpu_device_initialize() ++ * \param buf_handle - \c [in] Buffer handle ++ * \param type - \c [in] Type of handle requested ++ * \param shared_handle - \c [out] Special "shared" handle ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_bo_import() ++ * ++*/ ++int amdgpu_bo_export(amdgpu_bo_handle buf_handle, ++ enum amdgpu_bo_handle_type type, ++ uint32_t *shared_handle); ++ ++/** ++ * Request access to "shared" buffer ++ * ++ * \param dev - \c [in] Device handle. ++ * See #amdgpu_device_initialize() ++ * \param type - \c [in] Type of handle requested ++ * \param shared_handle - \c [in] Shared handle received as result "import" ++ * operation ++ * \param output - \c [out] Pointer to structure with information ++ * about imported buffer ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \note Buffer must be "imported" only using new "fd" (different from ++ * one used by "exporter"). ++ * ++ * \sa amdgpu_bo_export() ++ * ++*/ ++int amdgpu_bo_import(amdgpu_device_handle dev, ++ enum amdgpu_bo_handle_type type, ++ uint32_t shared_handle, ++ struct amdgpu_bo_import_result *output); ++ ++/** ++ * Free previosuly allocated memory ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param buf_handle - \c [in] Buffer handle to free ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \note In the case of memory shared between different applications all ++ * resources will be “physically” freed only all such applications ++ * will be terminated ++ * \note If is UMD responsibility to ‘free’ buffer only when there is no ++ * more GPU access ++ * ++ * \sa amdgpu_bo_set_metadata(), amdgpu_bo_alloc() ++ * ++*/ ++int amdgpu_bo_free(amdgpu_bo_handle buf_handle); ++ ++/** ++ * Request CPU access to GPU accessable memory ++ * ++ * \param buf_handle - \c [in] Buffer handle ++ * \param cpu - \c [out] CPU address to be used for access ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_bo_cpu_unmap() ++ * ++*/ ++int amdgpu_bo_cpu_map(amdgpu_bo_handle buf_handle, void **cpu); ++ ++/** ++ * Release CPU access to GPU memory ++ * ++ * \param buf_handle - \c [in] Buffer handle ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_bo_cpu_map() ++ * ++*/ ++int amdgpu_bo_cpu_unmap(amdgpu_bo_handle buf_handle); ++ ++ ++/** ++ * Wait until a buffer is not used by the device. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_lib_initialize() ++ * \param buf_handle - \c [in] Buffer handle. ++ * \param timeout_ns - Timeout in nanoseconds. ++ * \param buffer_busy - 0 if buffer is idle, all GPU access was completed ++ * and no GPU access is scheduled. ++ * 1 GPU access is in fly or scheduled ++ * ++ * \return 0 - on success ++ * <0 - AMD specific error code ++ */ ++int amdgpu_bo_wait_for_idle(amdgpu_bo_handle buf_handle, ++ uint64_t timeout_ns, ++ bool *buffer_busy); ++ ++ ++/* ++ * Special GPU Resources ++ * ++*/ ++ ++ ++ ++/** ++ * Query information about GDS ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param gds_info - \c [out] Pointer to structure to get GDS information ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_gpu_resource_query_gds_info(amdgpu_device_handle dev, ++ struct amdgpu_gds_resource_info * ++ gds_info); ++ ++ ++/** ++ * Allocate GDS partitions ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param gds_size - \c [in] Size of gds allocation. Must be aligned ++ * accordingly. ++ * \param alloc_info - \c [out] Pointer to structure to receive information ++ * about allocation ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * ++*/ ++int amdgpu_gpu_resource_gds_alloc(amdgpu_device_handle dev, ++ uint32_t gds_size, ++ struct amdgpu_gds_alloc_info *alloc_info); ++ ++ ++ ++ ++/** ++ * Release GDS resource. When GDS and associated resources not needed any ++ * more UMD should free them ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param handle - \c [in] Handle assigned to GDS allocation ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_gpu_resource_gds_free(amdgpu_bo_handle handle); ++ ++ ++ ++/* ++ * GPU Execution context ++ * ++*/ ++ ++/** ++ * Create GPU execution Context ++ * ++ * For the purpose of GPU Scheduler and GPU Robustness extensions it is ++ * necessary to have information/identify rendering/compute contexts. ++ * It also may be needed to associate some specific requirements with such ++ * contexts. Kernel driver will guarantee that submission from the same ++ * context will always be executed in order (first come, first serve). ++ * ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param context - \c [out] GPU Context handle ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_cs_ctx_free() ++ * ++*/ ++int amdgpu_cs_ctx_create(amdgpu_device_handle dev, ++ amdgpu_context_handle *context); ++ ++/** ++ * ++ * Destroy GPU execution context when not needed any more ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param context - \c [in] GPU Context handle ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_cs_ctx_create() ++ * ++*/ ++int amdgpu_cs_ctx_free(amdgpu_device_handle dev, ++ amdgpu_context_handle context); ++ ++/** ++ * Query reset state for the specific GPU Context ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param context - \c [in] GPU Context handle ++ * \param state - \c [out] Reset state status ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_cs_ctx_create() ++ * ++*/ ++int amdgpu_cs_query_reset_state(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ctx_reset_state *state); ++ ++ ++/* ++ * Command Buffers Management ++ * ++*/ ++ ++ ++/** ++ * Allocate memory to be filled with PM4 packets and be served as the first ++ * entry point of execution (a.k.a. Indirect Buffer) ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param context - \c [in] GPU Context which will use IB ++ * \param ib_size - \c [in] Size of allocation ++ * \param output - \c [out] Pointer to structure to get information about ++ * allocated IB ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \sa amdgpu_cs_free_ib() ++ * ++*/ ++int amdgpu_cs_alloc_ib(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ib_size ib_size, ++ struct amdgpu_cs_ib_alloc_result *output); ++ ++/** ++ * If UMD has allocates IBs which doesn’t need any more than those IBs must ++ * be explicitly freed ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param context - \c [in] GPU Context containing IB ++ * \param handle - \c [in] IB handle ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \note Libdrm_amdgpu will guarantee that it will correctly detect when it ++ * is safe to return IB to free pool ++ * ++ * \sa amdgpu_cs_alloc_ib() ++ * ++*/ ++int amdgpu_cs_free_ib(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ amdgpu_ib_handle handle); ++ ++/** ++ * Send request to submit command buffers to hardware. ++ * ++ * Kernel driver could use GPU Scheduler to make decision when physically ++ * sent this request to the hardware. Accordingly this request could be put ++ * in queue and sent for execution later. The only guarantee is that request ++ * from the same GPU context to the same ip:ip_instance:ring will be executed in ++ * order. ++ * ++ * ++ * \param dev - \c [in] Device handle. ++ * See #amdgpu_device_initialize() ++ * \param context - \c [in] GPU Context ++ * \param flags - \c [in] Global submission flags ++ * \param ibs_request - \c [in] Pointer to submission requests. ++ * We could submit to the several ++ * engines/rings simulteniously as ++ * 'atomic' operation ++ * \param number_of_requests - \c [in] Number of submission requests ++ * \param fences - \c [out] Pointer to array of data to get ++ * fences to identify submission ++ * requests. Timestamps are valid ++ * in this GPU context and could be used ++ * to identify/detect completion of ++ * submission request ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \note It is assumed that by default IB will be returned to free pool ++ * automatically by libdrm_amdgpu when submission will completed. ++ * It is possible for UMD to make decision to re-use the same IB in ++ * this case it should be explicitly freed.\n ++ * Accordingly, by default, after submission UMD should not touch passed ++ * IBs. If UMD needs to re-use IB then the special flag AMDGPU_CS_REUSE_IB ++ * must be passed. ++ * ++ * \note It is required to pass correct resource list with buffer handles ++ * which will be accessible by command buffers from submission ++ * This will allow kernel driver to correctly implement "paging". ++ * Failure to do so will have unpredictable results. ++ * ++ * \sa amdgpu_command_buffer_alloc(), amdgpu_command_buffer_free(), ++ * amdgpu_cs_query_fence_status() ++ * ++*/ ++int amdgpu_cs_submit(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ uint64_t flags, ++ struct amdgpu_cs_request *ibs_request, ++ uint32_t number_of_requests, ++ uint64_t *fences); ++ ++/** ++ * Query status of Command Buffer Submission ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param fence - \c [in] Structure describing fence to query ++ * \param expired - \c [out] If fence expired or not.\n ++ * 0 – if fence is not expired\n ++ * !0 - otherwise ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++ * \note If UMD wants only to check operation status and returned immediately ++ * then timeout value as 0 must be passed. In this case success will be ++ * returned in the case if submission was completed or timeout error ++ * code. ++ * ++ * \sa amdgpu_cs_submit() ++*/ ++int amdgpu_cs_query_fence_status(amdgpu_device_handle dev, ++ struct amdgpu_cs_query_fence *fence, ++ uint32_t *expired); ++ ++ ++/* ++ * Query / Info API ++ * ++*/ ++ ++ ++/** ++ * Query allocation size alignments ++ * ++ * UMD should query information about GPU VM MC size alignments requirements ++ * to be able correctly choose required allocation size and implement ++ * internal optimization if needed. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param info - \c [out] Pointer to structure to get size alignment ++ * requirements ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev, ++ struct amdgpu_buffer_size_alignments ++ *info); ++ ++ ++ ++/** ++ * Query firmware versions ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param fw_type - \c [in] AMDGPU_INFO_FW_* ++ * \param ip_instance - \c [in] Index of the IP block of the same type. ++ * \param index - \c [in] Index of the engine. (for SDMA and MEC) ++ * \param version - \c [out] Pointer to to the "version" return value ++ * \param feature - \c [out] Pointer to to the "feature" return value ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type, ++ unsigned ip_instance, unsigned index, ++ uint32_t *version, uint32_t *feature); ++ ++ ++ ++/** ++ * Query the number of HW IP instances of a certain type. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_* ++ * \param count - \c [out] Pointer to structure to get information ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++*/ ++int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type, ++ uint32_t *count); ++ ++ ++ ++/** ++ * Query engine information ++ * ++ * This query allows UMD to query information different engines and their ++ * capabilities. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param type - \c [in] Hardware IP block type = AMDGPU_HW_IP_* ++ * \param ip_instance - \c [in] Index of the IP block of the same type. ++ * \param info - \c [out] Pointer to structure to get information ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++*/ ++int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type, ++ unsigned ip_instance, ++ struct drm_amdgpu_info_hw_ip *info); ++ ++ ++ ++ ++/** ++ * Query heap information ++ * ++ * This query allows UMD to query potentially available memory resources and ++ * adjust their logic if necessary. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param heap - \c [in] Heap type ++ * \param info - \c [in] Pointer to structure to get needed information ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_query_heap_info(amdgpu_device_handle dev, ++ uint32_t heap, ++ uint32_t flags, ++ struct amdgpu_heap_info *info); ++ ++ ++ ++/** ++ * Get the CRTC ID from the mode object ID ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param id - \c [in] Mode object ID ++ * \param result - \c [in] Pointer to the CRTC ID ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id, ++ int32_t *result); ++ ++ ++ ++/** ++ * Query GPU H/w Info ++ * ++ * Query hardware specific information ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param heap - \c [in] Heap type ++ * \param info - \c [in] Pointer to structure to get needed information ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX Error code ++ * ++*/ ++int amdgpu_query_gpu_info(amdgpu_device_handle dev, ++ struct amdgpu_gpu_info *info); ++ ++ ++ ++/** ++ * Query hardware or driver information. ++ * ++ * The return size is query-specific and depends on the "info_id" parameter. ++ * No more than "size" bytes is returned. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() ++ * \param info_id - \c [in] AMDGPU_INFO_* ++ * \param size - \c [in] Size of the returned value. ++ * \param value - \c [out] Pointer to the return value. ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX error code ++ * ++*/ ++int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, ++ unsigned size, void *value); ++ ++ ++ ++/** ++ * Read a set of consecutive memory-mapped registers. ++ * Not all registers are allowed to be read by userspace. ++ * ++ * \param dev - \c [in] Device handle. See #amdgpu_device_initialize( ++ * \param dword_offset - \c [in] Register offset in dwords ++ * \param count - \c [in] The number of registers to read starting ++ * from the offset ++ * \param instance - \c [in] GRBM_GFX_INDEX selector. It may have other ++ * uses. Set it to 0xffffffff if unsure. ++ * \param flags - \c [in] Flags with additional information. ++ * \param values - \c [out] The pointer to return values. ++ * ++ * \return 0 on success\n ++ * >0 - AMD specific error code\n ++ * <0 - Negative POSIX error code ++ * ++*/ ++int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset, ++ unsigned count, uint32_t instance, uint32_t flags, ++ uint32_t *values); ++ ++ ++ ++/** ++ * Request GPU access to user allocated memory e.g. via "malloc" ++ * ++ * \param dev - [in] Device handle. See #amdgpu_device_initialize() ++ * \param cpu - [in] CPU address of user allocated memory which we ++ * want to map to GPU address space (make GPU accessible) ++ * (This address must be correctly aligned). ++ * \param size - [in] Size of allocation (must be correctly aligned) ++ * \param amdgpu_bo_alloc_result - [out] Handle of allocation to be passed as resource ++ * on submission and be used in other operations.(e.g. for VA submission) ++ * ( Temporally defined amdgpu_bo_alloc_result as parameter for return mc address. ) ++ * ++ * ++ * \return 0 on success ++ * >0 - AMD specific error code ++ * <0 - Negative POSIX Error code ++ * ++ * ++ * \note ++ * This call doesn't guarantee that such memory will be persistently ++ * "locked" / make non-pageable. The purpose of this call is to provide ++ * opportunity for GPU get access to this resource during submission. ++ * ++ * The maximum amount of memory which could be mapped in this call depends ++ * if overcommit is disabled or not. If overcommit is disabled than the max. ++ * amount of memory to be pinned will be limited by left "free" size in total ++ * amount of memory which could be locked simultaneously ("GART" size). ++ * ++ * Supported (theoretical) max. size of mapping is restricted only by ++ * "GART" size. ++ * ++ * It is responsibility of caller to correctly specify access rights ++ * on VA assignment. ++*/ ++int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev, ++ void *cpu, ++ uint64_t size, ++ struct amdgpu_bo_alloc_result *info); ++ ++ ++#endif /* #ifdef _amdgpu_h_ */ ++ ++ +diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c +new file mode 100644 +index 0000000..ce7e9d1 +--- /dev/null ++++ b/amdgpu/amdgpu_bo.c +@@ -0,0 +1,622 @@ ++/* ++ * Copyright © 2014 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#define _FILE_OFFSET_BITS 64 ++#include <stdlib.h> ++#include <stdio.h> ++#include <string.h> ++#include <errno.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <sys/ioctl.h> ++#include <sys/mman.h> ++#include <sys/time.h> ++ ++#include "xf86drm.h" ++#include "amdgpu_drm.h" ++#include "amdgpu_internal.h" ++#include "util_hash_table.h" ++ ++static void amdgpu_close_kms_handle(amdgpu_device_handle dev, ++ uint32_t handle) ++{ ++ struct drm_gem_close args = {}; ++ ++ args.handle = handle; ++ drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args); ++} ++ ++void amdgpu_bo_free_internal(amdgpu_bo_handle bo) ++{ ++ /* Remove the buffer from the hash tables. */ ++ pthread_mutex_lock(&bo->dev->bo_table_mutex); ++ util_hash_table_remove(bo->dev->bo_handles, ++ (void*)(uintptr_t)bo->handle); ++ if (bo->flink_name) { ++ util_hash_table_remove(bo->dev->bo_flink_names, ++ (void*)(uintptr_t)bo->flink_name); ++ } ++ pthread_mutex_unlock(&bo->dev->bo_table_mutex); ++ ++ /* Release CPU access. */ ++ if (bo->cpu_map_count > 0) { ++ bo->cpu_map_count = 1; ++ amdgpu_bo_cpu_unmap(bo); ++ } ++ ++ amdgpu_close_kms_handle(bo->dev, bo->handle); ++ pthread_mutex_destroy(&bo->cpu_access_mutex); ++ amdgpu_vamgr_free_va(&bo->dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size); ++ free(bo); ++} ++ ++int amdgpu_bo_alloc(amdgpu_device_handle dev, ++ struct amdgpu_bo_alloc_request *alloc_buffer, ++ struct amdgpu_bo_alloc_result *info) ++{ ++ struct amdgpu_bo *bo; ++ union drm_amdgpu_gem_create args; ++ unsigned heap = alloc_buffer->preferred_heap; ++ int r = 0; ++ ++ /* It's an error if the heap is not specified */ ++ if (!(heap & (AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM))) ++ return -EINVAL; ++ ++ bo = calloc(1, sizeof(struct amdgpu_bo)); ++ if (!bo) ++ return -ENOMEM; ++ ++ atomic_set(&bo->refcount, 1); ++ bo->dev = dev; ++ bo->alloc_size = alloc_buffer->alloc_size; ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.bo_size = alloc_buffer->alloc_size; ++ args.in.alignment = alloc_buffer->phys_alignment; ++ ++ /* Set the placement. */ ++ args.in.domains = heap & AMDGPU_GEM_DOMAIN_MASK; ++ args.in.domain_flags = alloc_buffer->flags & AMDGPU_GEM_CREATE_CPU_GTT_MASK; ++ ++ /* Allocate the buffer with the preferred heap. */ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_CREATE, ++ &args, sizeof(args)); ++ if (r) { ++ free(bo); ++ return r; ++ } ++ ++ bo->handle = args.out.handle; ++ ++ pthread_mutex_init(&bo->cpu_access_mutex, NULL); ++ ++ /* map the buffer to the GPU virtual address space */ ++ { ++ union drm_amdgpu_gem_va va; ++ ++ memset(&va, 0, sizeof(va)); ++ ++ bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, alloc_buffer->alloc_size, alloc_buffer->phys_alignment); ++ ++ va.in.handle = bo->handle; ++ va.in.operation = AMDGPU_VA_OP_MAP; ++ va.in.flags = AMDGPU_VM_PAGE_READABLE | ++ AMDGPU_VM_PAGE_WRITEABLE | ++ AMDGPU_VM_PAGE_EXECUTABLE; ++ va.in.va_address = bo->virtual_mc_base_address; ++ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); ++ if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { ++ amdgpu_bo_free_internal(bo); ++ return r; ++ } ++ pthread_mutex_lock(&dev->bo_table_mutex); ++ ++ util_hash_table_set(dev->bo_vas, ++ (void*)(uintptr_t)bo->virtual_mc_base_address, bo); ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ } ++ ++ info->buf_handle = bo; ++ info->virtual_mc_base_address = bo->virtual_mc_base_address; ++ return 0; ++} ++ ++int amdgpu_bo_set_metadata(amdgpu_bo_handle bo, ++ struct amdgpu_bo_metadata *info) ++{ ++ struct drm_amdgpu_gem_metadata args = {}; ++ ++ args.handle = bo->handle; ++ args.op = AMDGPU_GEM_METADATA_OP_SET_METADATA; ++ args.data.flags = info->flags; ++ args.data.tiling_info = info->tiling_info; ++ ++ if (info->size_metadata > sizeof(args.data.data)) ++ return -EINVAL; ++ ++ if (info->size_metadata) { ++ args.data.data_size_bytes = info->size_metadata; ++ memcpy(args.data.data, info->umd_metadata, info->size_metadata); ++ } ++ ++ return drmCommandWriteRead(bo->dev->fd, ++ DRM_AMDGPU_GEM_METADATA, ++ &args, sizeof(args)); ++} ++ ++int amdgpu_bo_query_info(amdgpu_bo_handle bo, ++ struct amdgpu_bo_info *info) ++{ ++ struct drm_amdgpu_gem_metadata metadata = {}; ++ struct drm_amdgpu_gem_create_in bo_info = {}; ++ struct drm_amdgpu_gem_op gem_op = {}; ++ int r; ++ ++ /* Query metadata. */ ++ metadata.handle = bo->handle; ++ metadata.op = AMDGPU_GEM_METADATA_OP_GET_METADATA; ++ ++ r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_METADATA, ++ &metadata, sizeof(metadata)); ++ if (r) ++ return r; ++ ++ if (metadata.data.data_size_bytes > ++ sizeof(info->metadata.umd_metadata)) ++ return -EINVAL; ++ ++ /* Query buffer info. */ ++ gem_op.handle = bo->handle; ++ gem_op.op = AMDGPU_GEM_OP_GET_GEM_CREATE_INFO; ++ gem_op.value = (intptr_t)&bo_info; ++ ++ r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_OP, ++ &gem_op, sizeof(gem_op)); ++ if (r) ++ return r; ++ ++ memset(info, 0, sizeof(*info)); ++ info->alloc_size = bo_info.bo_size; ++ info->phys_alignment = bo_info.alignment; ++ info->virtual_mc_base_address = bo->virtual_mc_base_address; ++ info->preferred_heap = bo_info.domains; ++ info->alloc_flags = bo_info.domain_flags; ++ info->metadata.flags = metadata.data.flags; ++ info->metadata.tiling_info = metadata.data.tiling_info; ++ ++ info->metadata.size_metadata = metadata.data.data_size_bytes; ++ if (metadata.data.data_size_bytes > 0) ++ memcpy(info->metadata.umd_metadata, metadata.data.data, ++ metadata.data.data_size_bytes); ++ ++ return 0; ++} ++ ++static void amdgpu_add_handle_to_table(amdgpu_bo_handle bo) ++{ ++ pthread_mutex_lock(&bo->dev->bo_table_mutex); ++ util_hash_table_set(bo->dev->bo_handles, ++ (void*)(uintptr_t)bo->handle, bo); ++ pthread_mutex_unlock(&bo->dev->bo_table_mutex); ++} ++ ++static int amdgpu_bo_export_flink(amdgpu_bo_handle bo) ++{ ++ struct drm_gem_flink flink; ++ int fd, dma_fd; ++ uint32_t handle; ++ int r; ++ ++ fd = bo->dev->fd; ++ handle = bo->handle; ++ if (bo->flink_name) ++ return 0; ++ ++ ++ if (bo->dev->flink_fd != bo->dev->fd) { ++ r = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, ++ &dma_fd); ++ if (!r) { ++ r = drmPrimeFDToHandle(bo->dev->flink_fd, dma_fd, &handle); ++ close(dma_fd); ++ } ++ if (r) ++ return r; ++ fd = bo->dev->flink_fd; ++ } ++ memset(&flink, 0, sizeof(flink)); ++ flink.handle = handle; ++ ++ r = drmIoctl(fd, DRM_IOCTL_GEM_FLINK, &flink); ++ if (r) ++ return r; ++ ++ bo->flink_name = flink.name; ++ ++ if (bo->dev->flink_fd != bo->dev->fd) { ++ struct drm_gem_close args = {}; ++ args.handle = handle; ++ drmIoctl(bo->dev->flink_fd, DRM_IOCTL_GEM_CLOSE, &args); ++ } ++ ++ pthread_mutex_lock(&bo->dev->bo_table_mutex); ++ util_hash_table_set(bo->dev->bo_flink_names, ++ (void*)(uintptr_t)bo->flink_name, ++ bo); ++ pthread_mutex_unlock(&bo->dev->bo_table_mutex); ++ ++ return 0; ++} ++ ++int amdgpu_bo_export(amdgpu_bo_handle bo, ++ enum amdgpu_bo_handle_type type, ++ uint32_t *shared_handle) ++{ ++ int r; ++ ++ switch (type) { ++ case amdgpu_bo_handle_type_gem_flink_name: ++ r = amdgpu_bo_export_flink(bo); ++ if (r) ++ return r; ++ ++ *shared_handle = bo->flink_name; ++ return 0; ++ ++ case amdgpu_bo_handle_type_kms: ++ r = amdgpu_bo_export_flink(bo); ++ if (r) ++ return r; ++ ++ amdgpu_add_handle_to_table(bo); ++ *shared_handle = bo->handle; ++ return 0; ++ ++ case amdgpu_bo_handle_type_dma_buf_fd: ++ amdgpu_add_handle_to_table(bo); ++ return drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, ++ (int*)shared_handle); ++ } ++ return -EINVAL; ++} ++ ++int amdgpu_bo_import(amdgpu_device_handle dev, ++ enum amdgpu_bo_handle_type type, ++ uint32_t shared_handle, ++ struct amdgpu_bo_import_result *output) ++{ ++ struct drm_gem_open open_arg = {}; ++ union drm_amdgpu_gem_va va; ++ struct amdgpu_bo *bo = NULL; ++ int r; ++ int dma_fd; ++ uint64_t dma_buf_size = 0; ++ ++ /* Convert a DMA buf handle to a KMS handle now. */ ++ if (type == amdgpu_bo_handle_type_dma_buf_fd) { ++ uint32_t handle; ++ off_t size; ++ ++ /* Get a KMS handle. */ ++ r = drmPrimeFDToHandle(dev->fd, shared_handle, &handle); ++ if (r) { ++ return r; ++ } ++ ++ /* Query the buffer size. */ ++ size = lseek(shared_handle, 0, SEEK_END); ++ if (size == (off_t)-1) { ++ amdgpu_close_kms_handle(dev, handle); ++ return -errno; ++ } ++ lseek(shared_handle, 0, SEEK_SET); ++ ++ dma_buf_size = size; ++ shared_handle = handle; ++ } ++ ++ /* We must maintain a list of pairs <handle, bo>, so that we always ++ * return the same amdgpu_bo instance for the same handle. */ ++ pthread_mutex_lock(&dev->bo_table_mutex); ++ ++ /* If we have already created a buffer with this handle, find it. */ ++ switch (type) { ++ case amdgpu_bo_handle_type_gem_flink_name: ++ bo = util_hash_table_get(dev->bo_flink_names, ++ (void*)(uintptr_t)shared_handle); ++ break; ++ ++ case amdgpu_bo_handle_type_dma_buf_fd: ++ bo = util_hash_table_get(dev->bo_handles, ++ (void*)(uintptr_t)shared_handle); ++ break; ++ ++ case amdgpu_bo_handle_type_kms: ++ /* Importing a KMS handle in not allowed. */ ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ return -EPERM; ++ ++ default: ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ return -EINVAL; ++ } ++ ++ if (bo) { ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ ++ /* The buffer already exists, just bump the refcount. */ ++ atomic_inc(&bo->refcount); ++ ++ output->buf_handle = bo; ++ output->alloc_size = bo->alloc_size; ++ output->virtual_mc_base_address = ++ bo->virtual_mc_base_address; ++ return 0; ++ } ++ ++ bo = calloc(1, sizeof(struct amdgpu_bo)); ++ if (!bo) { ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ if (type == amdgpu_bo_handle_type_dma_buf_fd) { ++ amdgpu_close_kms_handle(dev, shared_handle); ++ } ++ return -ENOMEM; ++ } ++ ++ /* Open the handle. */ ++ switch (type) { ++ case amdgpu_bo_handle_type_gem_flink_name: ++ open_arg.name = shared_handle; ++ r = drmIoctl(dev->flink_fd, DRM_IOCTL_GEM_OPEN, &open_arg); ++ if (r) { ++ free(bo); ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ return r; ++ } ++ ++ bo->handle = open_arg.handle; ++ if (dev->flink_fd != dev->fd) { ++ r = drmPrimeHandleToFD(dev->flink_fd, bo->handle, DRM_CLOEXEC, &dma_fd); ++ if (r) { ++ free(bo); ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ return r; ++ } ++ r = drmPrimeFDToHandle(dev->fd, dma_fd, &bo->handle ); ++ ++ close(dma_fd); ++ ++ if (r) { ++ free(bo); ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ return r; ++ } ++ } ++ bo->flink_name = shared_handle; ++ bo->alloc_size = open_arg.size; ++ util_hash_table_set(dev->bo_flink_names, ++ (void*)(uintptr_t)bo->flink_name, bo); ++ break; ++ ++ case amdgpu_bo_handle_type_dma_buf_fd: ++ bo->handle = shared_handle; ++ bo->alloc_size = dma_buf_size; ++ break; ++ ++ case amdgpu_bo_handle_type_kms: ++ assert(0); /* unreachable */ ++ } ++ ++ /* Initialize it. */ ++ atomic_set(&bo->refcount, 1); ++ bo->dev = dev; ++ pthread_mutex_init(&bo->cpu_access_mutex, NULL); ++ ++ bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, bo->alloc_size, 1 << 20); ++ ++ memset(&va, 0, sizeof(va)); ++ va.in.handle = bo->handle; ++ va.in.operation = AMDGPU_VA_OP_MAP; ++ va.in.va_address = bo->virtual_mc_base_address; ++ va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | ++ AMDGPU_VM_PAGE_EXECUTABLE; ++ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); ++ if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ amdgpu_vamgr_free_va(&dev->vamgr, bo->virtual_mc_base_address, bo->alloc_size); ++ amdgpu_bo_reference(&bo, NULL); ++ return r; ++ } ++ ++ util_hash_table_set(dev->bo_vas, ++ (void*)(uintptr_t)bo->virtual_mc_base_address, bo); ++ util_hash_table_set(dev->bo_handles, (void*)(uintptr_t)bo->handle, bo); ++ pthread_mutex_unlock(&dev->bo_table_mutex); ++ ++ output->buf_handle = bo; ++ output->alloc_size = bo->alloc_size; ++ output->virtual_mc_base_address = bo->virtual_mc_base_address; ++ return 0; ++} ++ ++int amdgpu_bo_free(amdgpu_bo_handle buf_handle) ++{ ++ /* Just drop the reference. */ ++ amdgpu_bo_reference(&buf_handle, NULL); ++ return 0; ++} ++ ++int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu) ++{ ++ union drm_amdgpu_gem_mmap args; ++ void *ptr; ++ int r; ++ ++ pthread_mutex_lock(&bo->cpu_access_mutex); ++ ++ if (bo->cpu_ptr) { ++ /* already mapped */ ++ assert(bo->cpu_map_count > 0); ++ bo->cpu_map_count++; ++ *cpu = bo->cpu_ptr; ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return 0; ++ } ++ ++ assert(bo->cpu_map_count == 0); ++ ++ memset(&args, 0, sizeof(args)); ++ ++ /* Query the buffer address (args.addr_ptr). ++ * The kernel driver ignores the offset and size parameters. */ ++ args.in.handle = bo->handle; ++ ++ r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args, ++ sizeof(args)); ++ if (r) { ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return r; ++ } ++ ++ /* Map the buffer. */ ++ ptr = mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED, ++ bo->dev->fd, args.out.addr_ptr); ++ if (ptr == MAP_FAILED) { ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return -errno; ++ } ++ ++ bo->cpu_ptr = ptr; ++ bo->cpu_map_count = 1; ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ ++ *cpu = ptr; ++ return 0; ++} ++ ++int amdgpu_bo_cpu_unmap(amdgpu_bo_handle bo) ++{ ++ int r; ++ ++ pthread_mutex_lock(&bo->cpu_access_mutex); ++ assert(bo->cpu_map_count >= 0); ++ ++ if (bo->cpu_map_count == 0) { ++ /* not mapped */ ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return -EBADMSG; ++ } ++ ++ bo->cpu_map_count--; ++ if (bo->cpu_map_count > 0) { ++ /* mapped multiple times */ ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return 0; ++ } ++ ++ r = munmap(bo->cpu_ptr, bo->alloc_size) == 0 ? 0 : -errno; ++ bo->cpu_ptr = NULL; ++ pthread_mutex_unlock(&bo->cpu_access_mutex); ++ return r; ++} ++ ++int amdgpu_query_buffer_size_alignment(amdgpu_device_handle dev, ++ struct amdgpu_buffer_size_alignments *info) ++{ ++ info->size_local = dev->dev_info.pte_fragment_size; ++ info->size_remote = dev->dev_info.gart_page_size; ++ return 0; ++} ++ ++int amdgpu_bo_wait_for_idle(amdgpu_bo_handle bo, ++ uint64_t timeout_ns, ++ bool *busy) ++{ ++ union drm_amdgpu_gem_wait_idle args; ++ int r; ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.handle = bo->handle; ++ args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); ++ ++ r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, ++ &args, sizeof(args)); ++ ++ if (r == 0) { ++ *busy = args.out.status; ++ return 0; ++ } else { ++ fprintf(stderr, "amdgpu: GEM_WAIT_IDLE failed with %i\n", r); ++ return r; ++ } ++} ++ ++int amdgpu_create_bo_from_user_mem(amdgpu_device_handle dev, ++ void *cpu, ++ uint64_t size, ++ struct amdgpu_bo_alloc_result *info) ++{ ++ int r; ++ struct amdgpu_bo *bo; ++ struct drm_amdgpu_gem_userptr args; ++ union drm_amdgpu_gem_va va; ++ ++ memset(&args, 0, sizeof(args)); ++ args.addr = (uint64_t)cpu; ++ args.flags = AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_REGISTER; ++ args.size = size; ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_USERPTR, ++ &args, sizeof(args)); ++ if (r) ++ return r; ++ ++ bo = calloc(1, sizeof(struct amdgpu_bo)); ++ if (!bo) ++ return -ENOMEM; ++ ++ atomic_set(&bo->refcount, 1); ++ bo->dev = dev; ++ bo->alloc_size = size; ++ bo->handle = args.handle; ++ bo->virtual_mc_base_address = amdgpu_vamgr_find_va(&dev->vamgr, size, 4 * 1024); ++ ++ memset(&va, 0, sizeof(va)); ++ va.in.handle = bo->handle; ++ va.in.operation = AMDGPU_VA_OP_MAP; ++ va.in.flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | ++ AMDGPU_VM_PAGE_EXECUTABLE; ++ va.in.va_address = bo->virtual_mc_base_address; ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_GEM_VA, &va, sizeof(va)); ++ if (r || va.out.result == AMDGPU_VA_RESULT_ERROR) { ++ amdgpu_bo_free_internal(bo); ++ return r; ++ } ++ util_hash_table_set(dev->bo_vas, ++ (void*)(uintptr_t)bo->virtual_mc_base_address, bo); ++ info->buf_handle = bo; ++ info->virtual_mc_base_address = bo->virtual_mc_base_address; ++ return r; ++} +diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c +new file mode 100644 +index 0000000..614904d +--- /dev/null ++++ b/amdgpu/amdgpu_cs.c +@@ -0,0 +1,981 @@ ++/* ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++*/ ++#include <stdlib.h> ++#include <stdio.h> ++#include <string.h> ++#include <errno.h> ++#include <pthread.h> ++#include <sched.h> ++#include <sys/ioctl.h> ++ ++#include "xf86drm.h" ++#include "amdgpu_drm.h" ++#include "amdgpu_internal.h" ++ ++/** ++ * Create an IB buffer. ++ * ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * \param ib_size - \c [in] Size of allocation ++ * \param ib - \c [out] return the pointer to the created IB buffer ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_create_ib(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ib_size ib_size, ++ amdgpu_ib_handle *ib) ++{ ++ struct amdgpu_bo_alloc_request alloc_buffer; ++ struct amdgpu_bo_alloc_result info; ++ int r; ++ void *cpu; ++ struct amdgpu_ib *new_ib; ++ ++ memset(&alloc_buffer, 0, sizeof(alloc_buffer)); ++ ++ switch (ib_size) { ++ case amdgpu_cs_ib_size_4K: ++ alloc_buffer.alloc_size = 4 * 1024; ++ break; ++ case amdgpu_cs_ib_size_16K: ++ alloc_buffer.alloc_size = 16 * 1024; ++ break; ++ case amdgpu_cs_ib_size_32K: ++ alloc_buffer.alloc_size = 32 * 1024; ++ break; ++ case amdgpu_cs_ib_size_64K: ++ alloc_buffer.alloc_size = 64 * 1024; ++ break; ++ case amdgpu_cs_ib_size_128K: ++ alloc_buffer.alloc_size = 128 * 1024; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ alloc_buffer.phys_alignment = 4 * 1024; ++ ++ alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; ++ ++ r = amdgpu_bo_alloc(dev, ++ &alloc_buffer, ++ &info); ++ if (r) ++ return r; ++ ++ r = amdgpu_bo_cpu_map(info.buf_handle, &cpu); ++ if (r) { ++ amdgpu_bo_free(info.buf_handle); ++ return r; ++ } ++ ++ new_ib = malloc(sizeof(struct amdgpu_ib)); ++ if (NULL == new_ib) { ++ amdgpu_bo_cpu_unmap(info.buf_handle); ++ amdgpu_bo_free(info.buf_handle); ++ return -ENOMEM; ++ } ++ ++ new_ib->buf_handle = info.buf_handle; ++ new_ib->cpu = cpu; ++ new_ib->virtual_mc_base_address = info.virtual_mc_base_address; ++ new_ib->ib_size = ib_size; ++ *ib = new_ib; ++ return 0; ++} ++ ++/** ++ * Destroy an IB buffer. ++ * ++ * \param dev - \c [in] Device handle ++ * \param ib - \c [in] the IB buffer ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_destroy_ib(amdgpu_device_handle dev, ++ amdgpu_ib_handle ib) ++{ ++ int r; ++ r = amdgpu_bo_cpu_unmap(ib->buf_handle); ++ if (r) ++ return r; ++ ++ r = amdgpu_bo_free(ib->buf_handle); ++ if (r) ++ return r; ++ ++ free(ib); ++ return 0; ++} ++ ++/** ++ * Initialize IB pools to empty. ++ * ++ * \param context - \c [in] GPU Context ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_init_ib_pool(amdgpu_context_handle context) ++{ ++ int i; ++ int r; ++ ++ r = pthread_mutex_init(&context->pool_mutex, NULL); ++ if (r) ++ return r; ++ ++ for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) ++ LIST_INITHEAD(&context->ib_pools[i]); ++ ++ return 0; ++} ++ ++/** ++ * Allocate an IB buffer from IB pools. ++ * ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * \param ib_size - \c [in] Size of allocation ++ * \param ib - \c [out] return the pointer to the allocated IB buffer ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_alloc_from_ib_pool(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ib_size ib_size, ++ amdgpu_ib_handle *ib) ++{ ++ int r; ++ struct list_head *head; ++ head = &context->ib_pools[ib_size]; ++ ++ r = -ENOMEM; ++ pthread_mutex_lock(&context->pool_mutex); ++ if (!LIST_IS_EMPTY(head)) { ++ *ib = LIST_ENTRY(struct amdgpu_ib, head->next, list_node); ++ LIST_DEL(&(*ib)->list_node); ++ r = 0; ++ } ++ pthread_mutex_unlock(&context->pool_mutex); ++ ++ return r; ++} ++ ++/** ++ * Free an IB buffer to IB pools. ++ * ++ * \param context - \c [in] GPU Context ++ * \param ib - \c [in] the IB buffer ++ * ++ * \return N/A ++*/ ++static void amdgpu_cs_free_to_ib_pool(amdgpu_context_handle context, ++ amdgpu_ib_handle ib) ++{ ++ struct list_head *head; ++ head = &context->ib_pools[ib->ib_size]; ++ pthread_mutex_lock(&context->pool_mutex); ++ LIST_ADD(&ib->list_node, head); ++ pthread_mutex_unlock(&context->pool_mutex); ++ return; ++} ++ ++/** ++ * Destroy all IB buffers in pools ++ * ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_destroy_ib_pool(amdgpu_device_handle dev, ++ amdgpu_context_handle context) ++{ ++ int i; ++ int r; ++ struct list_head *head; ++ struct amdgpu_ib *next; ++ struct amdgpu_ib *storage; ++ ++ r = 0; ++ pthread_mutex_lock(&context->pool_mutex); ++ for (i = 0; i < AMDGPU_CS_IB_SIZE_NUM; i++) { ++ head = &context->ib_pools[i]; ++ LIST_FOR_EACH_ENTRY_SAFE(next, storage, head, list_node) { ++ r = amdgpu_cs_destroy_ib(dev, next); ++ if (r) ++ break; ++ } ++ } ++ pthread_mutex_unlock(&context->pool_mutex); ++ pthread_mutex_destroy(&context->pool_mutex); ++ return r; ++} ++ ++/** ++ * Initialize pending IB lists ++ * ++ * \param context - \c [in] GPU Context ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_init_pendings(amdgpu_context_handle context) ++{ ++ unsigned ip, inst; ++ uint32_t ring; ++ int r; ++ ++ r = pthread_mutex_init(&context->pendings_mutex, NULL); ++ if (r) ++ return r; ++ ++ for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) ++ for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) ++ for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) ++ LIST_INITHEAD(&context->pendings[ip][inst][ring]); ++ ++ LIST_INITHEAD(&context->freed); ++ return 0; ++} ++ ++/** ++ * Free pending IBs ++ * ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_destroy_pendings(amdgpu_device_handle dev, ++ amdgpu_context_handle context) ++{ ++ int ip, inst; ++ uint32_t ring; ++ int r; ++ struct amdgpu_ib *next; ++ struct amdgpu_ib *s; ++ struct list_head *head; ++ ++ r = 0; ++ pthread_mutex_lock(&context->pendings_mutex); ++ for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) ++ for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) ++ for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) { ++ head = &context->pendings[ip][inst][ring]; ++ LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { ++ r = amdgpu_cs_destroy_ib(dev, next); ++ if (r) ++ break; ++ } ++ } ++ ++ head = &context->freed; ++ LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { ++ r = amdgpu_cs_destroy_ib(dev, next); ++ if (r) ++ break; ++ } ++ ++ pthread_mutex_unlock(&context->pendings_mutex); ++ pthread_mutex_destroy(&context->pendings_mutex); ++ return r; ++} ++ ++/** ++ * Add IB to pending IB lists without holding sequence_mutex. ++ * ++ * \param context - \c [in] GPU Context ++ * \param ib - \c [in] ib to added to pending lists ++ * \param ip - \c [in] hw ip block ++ * \param ip_instance - \c [in] instance of the hw ip block ++ * \param ring - \c [in] Ring of hw ip ++ * ++ * \return N/A ++*/ ++static void amdgpu_cs_add_pending(amdgpu_context_handle context, ++ amdgpu_ib_handle ib, ++ unsigned ip, unsigned ip_instance, ++ uint32_t ring) ++{ ++ struct list_head *head; ++ pthread_mutex_lock(&context->pendings_mutex); ++ head = &context->pendings[ip][ip_instance][ring]; ++ LIST_ADDTAIL(&ib->list_node, head); ++ pthread_mutex_unlock(&context->pendings_mutex); ++ return; ++} ++ ++/** ++ * Garbage collector on a pending IB list without holding pendings_mutex. ++ * This function by itself is not multithread safe. ++ * ++ * \param context - \c [in] GPU Context ++ * \param ip - \c [in] hw ip block ++ * \param ip_instance - \c [in] instance of the hw ip block ++ * \param ring - \c [in] Ring of hw ip ++ * \param expired_fence - \c [in] fence expired ++ * ++ * \return N/A ++ * \note Hold pendings_mutex before calling this function. ++*/ ++static void amdgpu_cs_pending_gc_not_safe(amdgpu_context_handle context, ++ unsigned ip, unsigned ip_instance, ++ uint32_t ring, ++ uint64_t expired_fence) ++{ ++ struct list_head *head; ++ struct amdgpu_ib *next; ++ struct amdgpu_ib *s; ++ int r; ++ ++ head = &context->pendings[ip][ip_instance][ring]; ++ LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) ++ if (next->cs_handle <= expired_fence) { ++ LIST_DEL(&next->list_node); ++ amdgpu_cs_free_to_ib_pool(context, next); ++ } else { ++ /* The pending list is a sorted list. ++ There is no need to continue. */ ++ break; ++ } ++ ++ /* walk the freed list as well */ ++ head = &context->freed; ++ LIST_FOR_EACH_ENTRY_SAFE(next, s, head, list_node) { ++ bool busy; ++ ++ r = amdgpu_bo_wait_for_idle(next->buf_handle, 0, &busy); ++ if (r || busy) ++ break; ++ ++ LIST_DEL(&next->list_node); ++ amdgpu_cs_free_to_ib_pool(context, next); ++ } ++ ++ return; ++} ++ ++/** ++ * Garbage collector on a pending IB list ++ * ++ * \param context - \c [in] GPU Context ++ * \param ip - \c [in] hw ip block ++ * \param ip_instance - \c [in] instance of the hw ip block ++ * \param ring - \c [in] Ring of hw ip ++ * \param expired_fence - \c [in] fence expired ++ * ++ * \return N/A ++*/ ++static void amdgpu_cs_pending_gc(amdgpu_context_handle context, ++ unsigned ip, unsigned ip_instance, ++ uint32_t ring, ++ uint64_t expired_fence) ++{ ++ pthread_mutex_lock(&context->pendings_mutex); ++ amdgpu_cs_pending_gc_not_safe(context, ip, ip_instance, ring, ++ expired_fence); ++ pthread_mutex_unlock(&context->pendings_mutex); ++ return; ++} ++ ++/** ++ * Garbage collector on all pending IB lists ++ * ++ * \param context - \c [in] GPU Context ++ * ++ * \return N/A ++*/ ++static void amdgpu_cs_all_pending_gc(amdgpu_context_handle context) ++{ ++ unsigned ip, inst; ++ uint32_t ring; ++ uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; ++ ++ pthread_mutex_lock(&context->sequence_mutex); ++ for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) ++ for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) ++ for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) ++ expired_fences[ip][inst][ring] = ++ context->expired_fences[ip][inst][ring]; ++ pthread_mutex_unlock(&context->sequence_mutex); ++ ++ pthread_mutex_lock(&context->pendings_mutex); ++ for (ip = 0; ip < AMDGPU_HW_IP_NUM; ip++) ++ for (inst = 0; inst < AMDGPU_HW_IP_INSTANCE_MAX_COUNT; inst++) ++ for (ring = 0; ring < AMDGPU_CS_MAX_RINGS; ring++) ++ amdgpu_cs_pending_gc_not_safe(context, ip, inst, ring, ++ expired_fences[ip][inst][ring]); ++ pthread_mutex_unlock(&context->pendings_mutex); ++} ++ ++/** ++ * Allocate an IB buffer ++ * If there is no free IB buffer in pools, create one. ++ * ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * \param ib_size - \c [in] Size of allocation ++ * \param ib - \c [out] return the pointer to the allocated IB buffer ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++static int amdgpu_cs_alloc_ib_local(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ib_size ib_size, ++ amdgpu_ib_handle *ib) ++{ ++ int r; ++ ++ r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib); ++ if (!r) ++ return r; ++ ++ amdgpu_cs_all_pending_gc(context); ++ ++ /* Retry to allocate from free IB pools after garbage collector. */ ++ r = amdgpu_cs_alloc_from_ib_pool(dev, context, ib_size, ib); ++ if (!r) ++ return r; ++ ++ /* There is no suitable IB in free pools. Create one. */ ++ r = amdgpu_cs_create_ib(dev, context, ib_size, ib); ++ return r; ++} ++ ++int amdgpu_cs_alloc_ib(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ enum amdgpu_cs_ib_size ib_size, ++ struct amdgpu_cs_ib_alloc_result *output) ++{ ++ int r; ++ amdgpu_ib_handle ib; ++ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == context) ++ return -EINVAL; ++ if (NULL == output) ++ return -EINVAL; ++ if (ib_size >= AMDGPU_CS_IB_SIZE_NUM) ++ return -EINVAL; ++ ++ r = amdgpu_cs_alloc_ib_local(dev, context, ib_size, &ib); ++ if (!r) { ++ output->handle = ib; ++ output->cpu = ib->cpu; ++ output->mc_address = ib->virtual_mc_base_address; ++ } ++ ++ return r; ++} ++ ++int amdgpu_cs_free_ib(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ amdgpu_ib_handle handle) ++{ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == context) ++ return -EINVAL; ++ if (NULL == handle) ++ return -EINVAL; ++ ++ pthread_mutex_lock(&context->pendings_mutex); ++ LIST_ADD(&handle->list_node, &context->freed); ++ pthread_mutex_unlock(&context->pendings_mutex); ++ return 0; ++} ++ ++/** ++ * Create command submission context ++ * ++ * \param dev - \c [in] amdgpu device handle ++ * \param context - \c [out] amdgpu context handle ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++int amdgpu_cs_ctx_create(amdgpu_device_handle dev, ++ amdgpu_context_handle *context) ++{ ++ struct amdgpu_context *gpu_context; ++ union drm_amdgpu_ctx args; ++ int r; ++ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == context) ++ return -EINVAL; ++ ++ gpu_context = calloc(1, sizeof(struct amdgpu_context)); ++ if (NULL == gpu_context) ++ return -ENOMEM; ++ ++ r = pthread_mutex_init(&gpu_context->sequence_mutex, NULL); ++ if (r) ++ goto error_mutex; ++ ++ r = amdgpu_cs_init_ib_pool(gpu_context); ++ if (r) ++ goto error_pool; ++ ++ r = amdgpu_cs_init_pendings(gpu_context); ++ if (r) ++ goto error_pendings; ++ ++ r = amdgpu_cs_alloc_ib_local(dev, gpu_context, amdgpu_cs_ib_size_4K, ++ &gpu_context->fence_ib); ++ if (r) ++ goto error_fence_ib; ++ ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); ++ if (r) ++ goto error_kernel; ++ ++ gpu_context->id = args.out.alloc.ctx_id; ++ *context = (amdgpu_context_handle)gpu_context; ++ ++ return 0; ++ ++error_kernel: ++ amdgpu_cs_free_ib(dev, gpu_context, gpu_context->fence_ib); ++ ++error_fence_ib: ++ amdgpu_cs_destroy_pendings(dev, gpu_context); ++ ++error_pendings: ++ amdgpu_cs_destroy_ib_pool(dev, gpu_context); ++ ++error_pool: ++ pthread_mutex_destroy(&gpu_context->sequence_mutex); ++ ++error_mutex: ++ free(gpu_context); ++ return r; ++} ++ ++/** ++ * Release command submission context ++ * ++ * \param dev - \c [in] amdgpu device handle ++ * \param context - \c [in] amdgpu context handle ++ * ++ * \return 0 on success otherwise POSIX Error code ++*/ ++int amdgpu_cs_ctx_free(amdgpu_device_handle dev, ++ amdgpu_context_handle context) ++{ ++ int r; ++ union drm_amdgpu_ctx args; ++ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == context) ++ return -EINVAL; ++ ++ r = amdgpu_cs_free_ib(dev, context, context->fence_ib); ++ if (r) ++ return r; ++ ++ r = amdgpu_cs_destroy_pendings(dev, context); ++ if (r) ++ return r; ++ ++ r = amdgpu_cs_destroy_ib_pool(dev, context); ++ if (r) ++ return r; ++ ++ pthread_mutex_destroy(&context->sequence_mutex); ++ ++ /* now deal with kernel side */ ++ memset(&args, 0, sizeof(args)); ++ args.in.op = AMDGPU_CTX_OP_FREE_CTX; ++ args.in.ctx_id = context->id; ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args)); ++ ++ free(context); ++ ++ return r; ++} ++ ++static int amdgpu_cs_create_bo_list(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ struct amdgpu_cs_request *request, ++ amdgpu_ib_handle fence_ib, ++ uint32_t *handle) ++{ ++ struct drm_amdgpu_bo_list_entry *list; ++ union drm_amdgpu_bo_list args; ++ unsigned num_resources; ++ unsigned i; ++ int r; ++ ++ num_resources = request->number_of_resources; ++ if (fence_ib) ++ ++num_resources; ++ ++ list = alloca(sizeof(struct drm_amdgpu_bo_list_entry) * num_resources); ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.operation = AMDGPU_BO_LIST_OP_CREATE; ++ args.in.bo_number = num_resources; ++ args.in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry); ++ args.in.bo_info_ptr = (uint64_t)(uintptr_t)list; ++ ++ for (i = 0; i < request->number_of_resources; i++) { ++ list[i].bo_handle = request->resources[i]->handle; ++ if (request->resource_flags) ++ list[i].bo_priority = request->resource_flags[i]; ++ else ++ list[i].bo_priority = 0; ++ } ++ ++ if (fence_ib) ++ list[i].bo_handle = fence_ib->buf_handle->handle; ++ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST, ++ &args, sizeof(args)); ++ if (r) ++ return r; ++ ++ *handle = args.out.list_handle; ++ return 0; ++} ++ ++static int amdgpu_cs_free_bo_list(amdgpu_device_handle dev, uint32_t handle) ++{ ++ union drm_amdgpu_bo_list args; ++ int r; ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.operation = AMDGPU_BO_LIST_OP_DESTROY; ++ args.in.list_handle = handle; ++ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_BO_LIST, ++ &args, sizeof(args)); ++ ++ return r; ++} ++ ++static uint32_t amdgpu_cs_fence_index(unsigned ip, unsigned ring) ++{ ++ return ip * AMDGPU_CS_MAX_RINGS + ring; ++} ++ ++/** ++ * Submit command to kernel DRM ++ * \param dev - \c [in] Device handle ++ * \param context - \c [in] GPU Context ++ * \param ibs_request - \c [in] Pointer to submission requests ++ * \param fence - \c [out] return fence for this submission ++ * ++ * \return 0 on success otherwise POSIX Error code ++ * \sa amdgpu_cs_submit() ++*/ ++static int amdgpu_cs_submit_one(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ struct amdgpu_cs_request *ibs_request, ++ uint64_t *fence) ++{ ++ int r; ++ uint32_t i, size; ++ union drm_amdgpu_cs cs; ++ uint64_t *chunk_array; ++ struct drm_amdgpu_cs_chunk *chunks; ++ struct drm_amdgpu_cs_chunk_data *chunk_data; ++ ++ if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM) ++ return -EINVAL; ++ if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS) ++ return -EINVAL; ++ if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT) ++ return -EINVAL; ++ ++ size = (ibs_request->number_of_ibs + 1) * ((sizeof(uint64_t) + ++ sizeof(struct drm_amdgpu_cs_chunk) + ++ sizeof(struct drm_amdgpu_cs_chunk_data)) + ++ ibs_request->number_of_resources + 1) * ++ sizeof(struct drm_amdgpu_bo_list_entry); ++ chunk_array = malloc(size); ++ if (NULL == chunk_array) ++ return -ENOMEM; ++ memset(chunk_array, 0, size); ++ ++ chunks = (struct drm_amdgpu_cs_chunk *)(chunk_array + ibs_request->number_of_ibs + 1); ++ chunk_data = (struct drm_amdgpu_cs_chunk_data *)(chunks + ibs_request->number_of_ibs + 1); ++ ++ memset(&cs, 0, sizeof(cs)); ++ cs.in.chunks = (uint64_t)(uintptr_t)chunk_array; ++ cs.in.ctx_id = context->id; ++ cs.in.num_chunks = ibs_request->number_of_ibs; ++ /* IB chunks */ ++ for (i = 0; i < ibs_request->number_of_ibs; i++) { ++ struct amdgpu_cs_ib_info *ib; ++ chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; ++ chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB; ++ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; ++ chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; ++ ++ ib = &ibs_request->ibs[i]; ++ ++ chunk_data[i].ib_data.handle = ib->ib_handle->buf_handle->handle; ++ chunk_data[i].ib_data.va_start = ib->ib_handle->virtual_mc_base_address; ++ chunk_data[i].ib_data.ib_bytes = ib->size * 4; ++ chunk_data[i].ib_data.ip_type = ibs_request->ip_type; ++ chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance; ++ chunk_data[i].ib_data.ring = ibs_request->ring; ++ ++ if (ib->flags & AMDGPU_CS_GFX_IB_CE) ++ chunk_data[i].ib_data.flags = AMDGPU_IB_FLAG_CE; ++ } ++ ++ r = amdgpu_cs_create_bo_list(dev, context, ibs_request, NULL, ++ &cs.in.bo_list_handle); ++ if (r) ++ goto error_unlock; ++ ++ pthread_mutex_lock(&context->sequence_mutex); ++ ++ if (ibs_request->ip_type != AMDGPU_HW_IP_UVD && ++ ibs_request->ip_type != AMDGPU_HW_IP_VCE) { ++ i = cs.in.num_chunks++; ++ ++ /* fence chunk */ ++ chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i]; ++ chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE; ++ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; ++ chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; ++ ++ /* fence bo handle */ ++ chunk_data[i].fence_data.handle = context->fence_ib->buf_handle->handle; ++ /* offset */ ++ chunk_data[i].fence_data.offset = amdgpu_cs_fence_index( ++ ibs_request->ip_type, ibs_request->ring); ++ chunk_data[i].fence_data.offset *= sizeof(uint64_t); ++ } ++ ++ r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS, ++ &cs, sizeof(cs)); ++ if (r) ++ goto error_unlock; ++ ++ ++ /* Hold sequence_mutex while adding record to the pending list. ++ So the pending list is a sorted list according to fence value. */ ++ ++ for (i = 0; i < ibs_request->number_of_ibs; i++) { ++ struct amdgpu_cs_ib_info *ib; ++ ++ ib = &ibs_request->ibs[i]; ++ if (ib->flags & AMDGPU_CS_REUSE_IB) ++ continue; ++ ++ ib->ib_handle->cs_handle = cs.out.handle; ++ ++ amdgpu_cs_add_pending(context, ib->ib_handle, ibs_request->ip_type, ++ ibs_request->ip_instance, ++ ibs_request->ring); ++ } ++ ++ *fence = cs.out.handle; ++ ++ pthread_mutex_unlock(&context->sequence_mutex); ++ ++ r = amdgpu_cs_free_bo_list(dev, cs.in.bo_list_handle); ++ if (r) ++ goto error_free; ++ ++ free(chunk_array); ++ return 0; ++ ++error_unlock: ++ pthread_mutex_unlock(&context->sequence_mutex); ++ ++error_free: ++ free(chunk_array); ++ return r; ++} ++ ++int amdgpu_cs_submit(amdgpu_device_handle dev, ++ amdgpu_context_handle context, ++ uint64_t flags, ++ struct amdgpu_cs_request *ibs_request, ++ uint32_t number_of_requests, ++ uint64_t *fences) ++{ ++ int r; ++ uint32_t i; ++ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == context) ++ return -EINVAL; ++ if (NULL == ibs_request) ++ return -EINVAL; ++ if (NULL == fences) ++ return -EINVAL; ++ ++ r = 0; ++ for (i = 0; i < number_of_requests; i++) { ++ r = amdgpu_cs_submit_one(dev, context, ibs_request, fences); ++ if (r) ++ break; ++ fences++; ++ ibs_request++; ++ } ++ ++ return r; ++} ++ ++/** ++ * Calculate absolute timeout. ++ * ++ * \param timeout - \c [in] timeout in nanoseconds. ++ * ++ * \return absolute timeout in nanoseconds ++*/ ++uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout) ++{ ++ int r; ++ ++ if (timeout != AMDGPU_TIMEOUT_INFINITE) { ++ struct timespec current; ++ r = clock_gettime(CLOCK_MONOTONIC, ¤t); ++ if (r) ++ return r; ++ ++ timeout += ((uint64_t)current.tv_sec) * 1000000000ull; ++ timeout += current.tv_nsec; ++ } ++ return timeout; ++} ++ ++static int amdgpu_ioctl_wait_cs(amdgpu_device_handle dev, ++ unsigned ip, ++ unsigned ip_instance, ++ uint32_t ring, ++ uint64_t handle, ++ uint64_t timeout_ns, ++ bool *busy) ++{ ++ union drm_amdgpu_wait_cs args; ++ int r; ++ ++ memset(&args, 0, sizeof(args)); ++ args.in.handle = handle; ++ args.in.ip_type = ip; ++ args.in.ip_instance = ip_instance; ++ args.in.ring = ring; ++ args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); ++ ++ /* Handle errors manually here because of timeout */ ++ r = ioctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args); ++ if (r == -1 && (errno == EINTR || errno == EAGAIN)) { ++ *busy = true; ++ return 0; ++ } else if (r) ++ return -errno; ++ ++ *busy = args.out.status; ++ return 0; ++} ++ ++int amdgpu_cs_query_fence_status(amdgpu_device_handle dev, ++ struct amdgpu_cs_query_fence *fence, ++ uint32_t *expired) ++{ ++ amdgpu_context_handle context; ++ uint64_t *signaled_fence; ++ uint64_t *expired_fence; ++ unsigned ip_type, ip_instance; ++ uint32_t ring; ++ bool busy = true; ++ int r; ++ ++ if (NULL == dev) ++ return -EINVAL; ++ if (NULL == fence) ++ return -EINVAL; ++ if (NULL == expired) ++ return -EINVAL; ++ if (NULL == fence->context) ++ return -EINVAL; ++ if (fence->ip_type >= AMDGPU_HW_IP_NUM) ++ return -EINVAL; ++ if (fence->ring >= AMDGPU_CS_MAX_RINGS) ++ return -EINVAL; ++ ++ context = fence->context; ++ ip_type = fence->ip_type; ++ ip_instance = fence->ip_instance; ++ ring = fence->ring; ++ signaled_fence = context->fence_ib->cpu; ++ signaled_fence += amdgpu_cs_fence_index(ip_type, ring); ++ expired_fence = &context->expired_fences[ip_type][ip_instance][ring]; ++ *expired = false; ++ ++ pthread_mutex_lock(&context->sequence_mutex); ++ if (fence->fence <= *expired_fence) { ++ /* This fence value is expired already. */ ++ pthread_mutex_unlock(&context->sequence_mutex); ++ *expired = true; ++ return 0; ++ } ++ ++ if (fence->fence <= *signaled_fence) { ++ /* This fence value is signaled already. */ ++ *expired_fence = *signaled_fence; ++ pthread_mutex_unlock(&context->sequence_mutex); ++ amdgpu_cs_pending_gc(context, ip_type, ip_instance, ring, ++ fence->fence); ++ *expired = true; ++ return 0; ++ } ++ ++ pthread_mutex_unlock(&context->sequence_mutex); ++ ++ r = amdgpu_ioctl_wait_cs(dev, ip_type, ip_instance, ring, ++ fence->fence, fence->timeout_ns, &busy); ++ if (!r && !busy) { ++ *expired = true; ++ pthread_mutex_lock(&context->sequence_mutex); ++ /* The thread doesn't hold sequence_mutex. Other thread could ++ update *expired_fence already. Check whether there is a ++ newerly expired fence. */ ++ if (fence->fence > *expired_fence) { ++ *expired_fence = fence->fence; ++ pthread_mutex_unlock(&context->sequence_mutex); ++ amdgpu_cs_pending_gc(context, ip_type, ip_instance, ++ ring, fence->fence); ++ } else { ++ pthread_mutex_unlock(&context->sequence_mutex); ++ } ++ } ++ ++ return r; ++} ++ +diff --git a/amdgpu/amdgpu_device.c b/amdgpu/amdgpu_device.c +new file mode 100644 +index 0000000..66fa187 +--- /dev/null ++++ b/amdgpu/amdgpu_device.c +@@ -0,0 +1,242 @@ ++/* ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++*/ ++ ++/** ++ * \file amdgpu_device.c ++ * ++ * Implementation of functions for AMD GPU device ++ * ++ * ++ */ ++ ++#include <sys/stat.h> ++#include <errno.h> ++#include <string.h> ++#include <stdio.h> ++#include <stdlib.h> ++ ++#include "xf86drm.h" ++#include "amdgpu_drm.h" ++#include "amdgpu_internal.h" ++#include "util_hash_table.h" ++ ++#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x))) ++#define UINT_TO_PTR(x) ((void *)((intptr_t)(x))) ++#define RENDERNODE_MINOR_MASK 0xff7f ++ ++pthread_mutex_t fd_mutex = PTHREAD_MUTEX_INITIALIZER; ++static struct util_hash_table *fd_tab; ++ ++static unsigned handle_hash(void *key) ++{ ++ return PTR_TO_UINT(key); ++} ++ ++static int handle_compare(void *key1, void *key2) ++{ ++ return PTR_TO_UINT(key1) != PTR_TO_UINT(key2); ++} ++ ++static unsigned fd_hash(void *key) ++{ ++ int fd = PTR_TO_UINT(key); ++ struct stat stat; ++ fstat(fd, &stat); ++ ++ if (!S_ISCHR(stat.st_mode)) ++ return stat.st_dev ^ stat.st_ino; ++ else ++ return stat.st_dev ^ (stat.st_rdev & RENDERNODE_MINOR_MASK); ++} ++ ++static int fd_compare(void *key1, void *key2) ++{ ++ int fd1 = PTR_TO_UINT(key1); ++ int fd2 = PTR_TO_UINT(key2); ++ struct stat stat1, stat2; ++ fstat(fd1, &stat1); ++ fstat(fd2, &stat2); ++ ++ if (!S_ISCHR(stat1.st_mode) || !S_ISCHR(stat2.st_mode)) ++ return stat1.st_dev != stat2.st_dev || ++ stat1.st_ino != stat2.st_ino; ++ else ++ return major(stat1.st_rdev) != major(stat2.st_rdev) || ++ (minor(stat1.st_rdev) & RENDERNODE_MINOR_MASK) != ++ (minor(stat2.st_rdev) & RENDERNODE_MINOR_MASK); ++} ++ ++/** ++* Get the authenticated form fd, ++* ++* \param fd - \c [in] File descriptor for AMD GPU device ++* \param auth - \c [out] Pointer to output the fd is authenticated or not ++* A render node fd, output auth = 0 ++* A legacy fd, get the authenticated for compatibility root ++* ++* \return 0 on success\n ++* >0 - AMD specific error code\n ++* <0 - Negative POSIX Error code ++*/ ++static int amdgpu_get_auth(int fd, int *auth) ++{ ++ int r = 0; ++ drm_client_t client; ++ struct stat stat1; ++ fstat(fd,&stat1); ++ if (minor(stat1.st_rdev) & ~RENDERNODE_MINOR_MASK)/* find a render node fd */ ++ *auth = 0; ++ else { ++ client.idx=0; ++ r= drmIoctl(fd, DRM_IOCTL_GET_CLIENT, &client); ++ if (!r) ++ *auth = client.auth; ++ } ++ return r; ++} ++ ++int amdgpu_device_initialize(int fd, ++ uint32_t *major_version, ++ uint32_t *minor_version, ++ amdgpu_device_handle *device_handle) ++{ ++ struct amdgpu_device *dev; ++ drmVersionPtr version; ++ int r; ++ int flag_auth = 0; ++ int flag_authexist=0; ++ uint32_t accel_working; ++ ++ *device_handle = NULL; ++ ++ pthread_mutex_lock(&fd_mutex); ++ if (!fd_tab) ++ fd_tab = util_hash_table_create(fd_hash, fd_compare); ++ r = amdgpu_get_auth(fd, &flag_auth); ++ if (r) { ++ pthread_mutex_unlock(&fd_mutex); ++ return r; ++ } ++ dev = util_hash_table_get(fd_tab, UINT_TO_PTR(fd)); ++ if (dev) { ++ r = amdgpu_get_auth(dev->fd, &flag_authexist); ++ if (r) { ++ pthread_mutex_unlock(&fd_mutex); ++ return r; ++ } ++ if ((flag_auth) && (!flag_authexist)) { ++ dev->flink_fd = fd; ++ } ++ *major_version = dev->major_version; ++ *minor_version = dev->minor_version; ++ amdgpu_device_reference(device_handle, dev); ++ pthread_mutex_unlock(&fd_mutex); ++ return 0; ++ } ++ ++ dev = calloc(1, sizeof(struct amdgpu_device)); ++ if (!dev) { ++ pthread_mutex_unlock(&fd_mutex); ++ return -ENOMEM; ++ } ++ ++ atomic_set(&dev->refcount, 1); ++ ++ version = drmGetVersion(fd); ++ if (version->version_major != 3) { ++ fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " ++ "only compatible with 3.x.x.\n", ++ __func__, ++ version->version_major, ++ version->version_minor, ++ version->version_patchlevel); ++ drmFreeVersion(version); ++ r = -EBADF; ++ goto cleanup; ++ } ++ ++ dev->fd = fd; ++ dev->flink_fd = fd; ++ dev->major_version = version->version_major; ++ dev->minor_version = version->version_minor; ++ drmFreeVersion(version); ++ ++ dev->bo_flink_names = util_hash_table_create(handle_hash, ++ handle_compare); ++ dev->bo_handles = util_hash_table_create(handle_hash, handle_compare); ++ dev->bo_vas = util_hash_table_create(handle_hash, handle_compare); ++ pthread_mutex_init(&dev->bo_table_mutex, NULL); ++ ++ /* Check if acceleration is working. */ ++ r = amdgpu_query_info(dev, AMDGPU_INFO_ACCEL_WORKING, 4, &accel_working); ++ if (r) ++ goto cleanup; ++ if (!accel_working) { ++ r = -EBADF; ++ goto cleanup; ++ } ++ ++ r = amdgpu_query_gpu_info_init(dev); ++ if (r) ++ goto cleanup; ++ ++ amdgpu_vamgr_init(dev); ++ ++ *major_version = dev->major_version; ++ *minor_version = dev->minor_version; ++ *device_handle = dev; ++ util_hash_table_set(fd_tab, UINT_TO_PTR(fd), dev); ++ pthread_mutex_unlock(&fd_mutex); ++ ++ return 0; ++ ++cleanup: ++ free(dev); ++ pthread_mutex_unlock(&fd_mutex); ++ return r; ++} ++ ++void amdgpu_device_free_internal(amdgpu_device_handle dev) ++{ ++ util_hash_table_destroy(dev->bo_flink_names); ++ util_hash_table_destroy(dev->bo_handles); ++ util_hash_table_destroy(dev->bo_vas); ++ pthread_mutex_destroy(&dev->bo_table_mutex); ++ pthread_mutex_destroy(&(dev->vamgr.bo_va_mutex)); ++ util_hash_table_remove(fd_tab, UINT_TO_PTR(dev->fd)); ++ free(dev); ++} ++ ++int amdgpu_device_deinitialize(amdgpu_device_handle dev) ++{ ++ amdgpu_device_reference(&dev, NULL); ++ return 0; ++} ++ ++void amdgpu_device_reference(struct amdgpu_device **dst, ++ struct amdgpu_device *src) ++{ ++ if (update_references(&(*dst)->refcount, &src->refcount)) ++ amdgpu_device_free_internal(*dst); ++ *dst = src; ++} +diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c +new file mode 100644 +index 0000000..0b77731 +--- /dev/null ++++ b/amdgpu/amdgpu_gpu_info.c +@@ -0,0 +1,275 @@ ++/* ++ * Copyright © 2014 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include <errno.h> ++#include <string.h> ++ ++#include "amdgpu.h" ++#include "amdgpu_drm.h" ++#include "amdgpu_internal.h" ++#include "xf86drm.h" ++ ++int amdgpu_query_info(amdgpu_device_handle dev, unsigned info_id, ++ unsigned size, void *value) ++{ ++ struct drm_amdgpu_info request; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)value; ++ request.return_size = size; ++ request.query = info_id; ++ ++ return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++} ++ ++int amdgpu_query_crtc_from_id(amdgpu_device_handle dev, unsigned id, ++ int32_t *result) ++{ ++ struct drm_amdgpu_info request; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)result; ++ request.return_size = sizeof(*result); ++ request.query = AMDGPU_INFO_CRTC_FROM_ID; ++ request.mode_crtc.id = id; ++ ++ return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++} ++ ++int amdgpu_read_mm_registers(amdgpu_device_handle dev, unsigned dword_offset, ++ unsigned count, uint32_t instance, uint32_t flags, ++ uint32_t *values) ++{ ++ struct drm_amdgpu_info request; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)values; ++ request.return_size = count * sizeof(uint32_t); ++ request.query = AMDGPU_INFO_READ_MMR_REG; ++ request.read_mmr_reg.dword_offset = dword_offset; ++ request.read_mmr_reg.count = count; ++ request.read_mmr_reg.instance = instance; ++ request.read_mmr_reg.flags = flags; ++ ++ return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++} ++ ++int amdgpu_query_hw_ip_count(amdgpu_device_handle dev, unsigned type, ++ uint32_t *count) ++{ ++ struct drm_amdgpu_info request; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)count; ++ request.return_size = sizeof(*count); ++ request.query = AMDGPU_INFO_HW_IP_COUNT; ++ request.query_hw_ip.type = type; ++ ++ return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++} ++ ++int amdgpu_query_hw_ip_info(amdgpu_device_handle dev, unsigned type, ++ unsigned ip_instance, ++ struct drm_amdgpu_info_hw_ip *info) ++{ ++ struct drm_amdgpu_info request; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)info; ++ request.return_size = sizeof(*info); ++ request.query = AMDGPU_INFO_HW_IP_INFO; ++ request.query_hw_ip.type = type; ++ request.query_hw_ip.ip_instance = ip_instance; ++ ++ return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++} ++ ++int amdgpu_query_firmware_version(amdgpu_device_handle dev, unsigned fw_type, ++ unsigned ip_instance, unsigned index, ++ uint32_t *version, uint32_t *feature) ++{ ++ struct drm_amdgpu_info request; ++ struct drm_amdgpu_info_firmware firmware; ++ int r; ++ ++ memset(&request, 0, sizeof(request)); ++ request.return_pointer = (uintptr_t)&firmware; ++ request.return_size = sizeof(firmware); ++ request.query = AMDGPU_INFO_FW_VERSION; ++ request.query_fw.fw_type = fw_type; ++ request.query_fw.ip_instance = ip_instance; ++ request.query_fw.index = index; ++ ++ r = drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, ++ sizeof(struct drm_amdgpu_info)); ++ if (r) ++ return r; ++ ++ *version = firmware.ver; ++ *feature = firmware.feature; ++ return 0; ++} ++ ++int amdgpu_query_gpu_info_init(amdgpu_device_handle dev) ++{ ++ int r, i; ++ ++ r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(dev->dev_info), ++ &dev->dev_info); ++ if (r) ++ return r; ++ ++ dev->info.asic_id = dev->dev_info.device_id; ++ dev->info.chip_rev = dev->dev_info.chip_rev; ++ dev->info.chip_external_rev = dev->dev_info.external_rev; ++ dev->info.family_id = dev->dev_info.family; ++ dev->info.max_engine_clk = dev->dev_info.max_engine_clock; ++ dev->info.gpu_counter_freq = dev->dev_info.gpu_counter_freq; ++ dev->info.enabled_rb_pipes_mask = dev->dev_info.enabled_rb_pipes_mask; ++ dev->info.rb_pipes = dev->dev_info.num_rb_pipes; ++ dev->info.ids_flags = dev->dev_info.ids_flags; ++ dev->info.num_hw_gfx_contexts = dev->dev_info.num_hw_gfx_contexts; ++ dev->info.num_shader_engines = dev->dev_info.num_shader_engines; ++ dev->info.num_shader_arrays_per_engine = ++ dev->dev_info.num_shader_arrays_per_engine; ++ ++ for (i = 0; i < (int)dev->info.num_shader_engines; i++) { ++ unsigned instance = (i << AMDGPU_INFO_MMR_SE_INDEX_SHIFT) | ++ (AMDGPU_INFO_MMR_SH_INDEX_MASK << ++ AMDGPU_INFO_MMR_SH_INDEX_SHIFT); ++ ++ r = amdgpu_read_mm_registers(dev, 0x263d, 1, instance, 0, ++ &dev->info.backend_disable[i]); ++ if (r) ++ return r; ++ /* extract bitfield CC_RB_BACKEND_DISABLE.BACKEND_DISABLE */ ++ dev->info.backend_disable[i] = ++ (dev->info.backend_disable[i] >> 16) & 0xff; ++ ++ r = amdgpu_read_mm_registers(dev, 0xa0d4, 1, instance, 0, ++ &dev->info.pa_sc_raster_cfg[i]); ++ if (r) ++ return r; ++ ++ r = amdgpu_read_mm_registers(dev, 0xa0d5, 1, instance, 0, ++ &dev->info.pa_sc_raster_cfg1[i]); ++ if (r) ++ return r; ++ } ++ ++ r = amdgpu_read_mm_registers(dev, 0x2644, 32, 0xffffffff, 0, ++ dev->info.gb_tile_mode); ++ if (r) ++ return r; ++ ++ r = amdgpu_read_mm_registers(dev, 0x2664, 16, 0xffffffff, 0, ++ dev->info.gb_macro_tile_mode); ++ if (r) ++ return r; ++ ++ r = amdgpu_read_mm_registers(dev, 0x263e, 1, 0xffffffff, 0, ++ &dev->info.gb_addr_cfg); ++ if (r) ++ return r; ++ ++ r = amdgpu_read_mm_registers(dev, 0x9d8, 1, 0xffffffff, 0, ++ &dev->info.mc_arb_ramcfg); ++ if (r) ++ return r; ++ ++ dev->info.cu_active_number = dev->dev_info.cu_active_number; ++ dev->info.cu_ao_mask = dev->dev_info.cu_ao_mask; ++ memcpy(&dev->info.cu_bitmap[0][0], &dev->dev_info.cu_bitmap[0][0], sizeof(dev->info.cu_bitmap)); ++ ++ /* TODO: info->max_quad_shader_pipes is not set */ ++ /* TODO: info->avail_quad_shader_pipes is not set */ ++ /* TODO: info->cache_entries_per_quad_pipe is not set */ ++ /* TODO: info->active_rb_pipes is not set */ ++ return 0; ++} ++ ++int amdgpu_query_gpu_info(amdgpu_device_handle dev, ++ struct amdgpu_gpu_info *info) ++{ ++ /* Get ASIC info*/ ++ *info = dev->info; ++ ++ return 0; ++} ++ ++int amdgpu_query_heap_info(amdgpu_device_handle dev, ++ uint32_t heap, ++ uint32_t flags, ++ struct amdgpu_heap_info *info) ++{ ++ struct drm_amdgpu_info_vram_gtt vram_gtt_info; ++ int r; ++ ++ r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_GTT, ++ sizeof(vram_gtt_info), &vram_gtt_info); ++ if (r) ++ return r; ++ ++ /* Get heap information */ ++ switch (heap) { ++ case AMDGPU_GEM_DOMAIN_VRAM: ++ /* query visible only vram heap */ ++ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) ++ info->heap_size = vram_gtt_info.vram_cpu_accessible_size; ++ else /* query total vram heap */ ++ info->heap_size = vram_gtt_info.vram_size; ++ ++ info->max_allocation = vram_gtt_info.vram_cpu_accessible_size; ++ ++ if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) ++ r = amdgpu_query_info(dev, AMDGPU_INFO_VIS_VRAM_USAGE, ++ sizeof(info->heap_usage), ++ &info->heap_usage); ++ else ++ r = amdgpu_query_info(dev, AMDGPU_INFO_VRAM_USAGE, ++ sizeof(info->heap_usage), ++ &info->heap_usage); ++ if (r) ++ return r; ++ break; ++ case AMDGPU_GEM_DOMAIN_GTT: ++ info->heap_size = vram_gtt_info.gtt_size; ++ info->max_allocation = vram_gtt_info.vram_cpu_accessible_size; ++ ++ r = amdgpu_query_info(dev, AMDGPU_INFO_GTT_USAGE, ++ sizeof(info->heap_usage), ++ &info->heap_usage); ++ if (r) ++ return r; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} +diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h +new file mode 100644 +index 0000000..b27756d +--- /dev/null ++++ b/amdgpu/amdgpu_internal.h +@@ -0,0 +1,210 @@ ++/* ++ * Copyright © 2014 Advanced Micro Devices, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef _amdgpu_internal_h_ ++#define _amdgpu_internal_h_ ++ ++#ifdef HAVE_CONFIG_H ++#include "config.h" ++#endif ++ ++#include <assert.h> ++#include <pthread.h> ++#include "xf86atomic.h" ++#include "amdgpu.h" ++#include "util_double_list.h" ++ ++#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) ++ ++#define AMDGPU_CS_MAX_RINGS 8 ++ ++struct amdgpu_bo_va_hole { ++ struct list_head list; ++ uint64_t offset; ++ uint64_t size; ++}; ++ ++struct amdgpu_bo_va_mgr { ++ /* the start virtual address */ ++ uint64_t va_offset; ++ struct list_head va_holes; ++ pthread_mutex_t bo_va_mutex; ++ uint32_t va_alignment; ++}; ++ ++struct amdgpu_device { ++ atomic_t refcount; ++ int fd; ++ int flink_fd; ++ unsigned major_version; ++ unsigned minor_version; ++ ++ /** List of buffer handles. Protected by bo_table_mutex. */ ++ struct util_hash_table *bo_handles; ++ /** List of buffer GEM flink names. Protected by bo_table_mutex. */ ++ struct util_hash_table *bo_flink_names; ++ /** List of buffer virtual memory ranges. Protected by bo_table_mutex. */ ++ struct util_hash_table *bo_vas; ++ /** This protects all hash tables. */ ++ pthread_mutex_t bo_table_mutex; ++ struct amdgpu_bo_va_mgr vamgr; ++ struct drm_amdgpu_info_device dev_info; ++ struct amdgpu_gpu_info info; ++}; ++ ++struct amdgpu_bo { ++ atomic_t refcount; ++ struct amdgpu_device *dev; ++ ++ uint64_t alloc_size; ++ uint64_t virtual_mc_base_address; ++ ++ uint32_t handle; ++ uint32_t flink_name; ++ ++ pthread_mutex_t cpu_access_mutex; ++ void *cpu_ptr; ++ int cpu_map_count; ++}; ++ ++/* ++ * There are three mutexes. ++ * To avoid deadlock, only hold the mutexes in this order: ++ * sequence_mutex -> pendings_mutex -> pool_mutex. ++*/ ++struct amdgpu_context { ++ /** Mutex for accessing fences and to maintain command submissions ++ and pending lists in good sequence. */ ++ pthread_mutex_t sequence_mutex; ++ /** Buffer for user fences */ ++ struct amdgpu_ib *fence_ib; ++ /** The newest expired fence for the ring of the ip blocks. */ ++ uint64_t expired_fences[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; ++ /** Mutex for accessing pendings list. */ ++ pthread_mutex_t pendings_mutex; ++ /** Pending IBs. */ ++ struct list_head pendings[AMDGPU_HW_IP_NUM][AMDGPU_HW_IP_INSTANCE_MAX_COUNT][AMDGPU_CS_MAX_RINGS]; ++ /** Freed IBs not yet in pool */ ++ struct list_head freed; ++ /** Mutex for accessing free ib pool. */ ++ pthread_mutex_t pool_mutex; ++ /** Internal free IB pools. */ ++ struct list_head ib_pools[AMDGPU_CS_IB_SIZE_NUM]; ++ /* context id*/ ++ uint32_t id; ++}; ++ ++struct amdgpu_ib { ++ struct list_head list_node; ++ amdgpu_bo_handle buf_handle; ++ void *cpu; ++ uint64_t virtual_mc_base_address; ++ enum amdgpu_cs_ib_size ib_size; ++ uint64_t cs_handle; ++}; ++ ++/** ++ * Functions. ++ */ ++ ++void amdgpu_device_free_internal(amdgpu_device_handle dev); ++ ++void amdgpu_bo_free_internal(amdgpu_bo_handle bo); ++ ++void amdgpu_vamgr_init(struct amdgpu_device *dev); ++ ++uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, ++ uint64_t size, uint64_t alignment); ++ ++void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, ++ uint64_t size); ++ ++int amdgpu_query_gpu_info_init(amdgpu_device_handle dev); ++ ++uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout); ++ ++/** ++ * Inline functions. ++ */ ++ ++/** ++ * Increment src and decrement dst as if we were updating references ++ * for an assignment between 2 pointers of some objects. ++ * ++ * \return true if dst is 0 ++ */ ++static inline bool update_references(atomic_t *dst, atomic_t *src) ++{ ++ if (dst != src) { ++ /* bump src first */ ++ if (src) { ++ assert(atomic_read(src) > 0); ++ atomic_inc(src); ++ } ++ if (dst) { ++ assert(atomic_read(dst) > 0); ++ return atomic_dec_and_test(dst); ++ } ++ } ++ return false; ++} ++ ++/** ++ * Assignment between two amdgpu_bo pointers with reference counting. ++ * ++ * Usage: ++ * struct amdgpu_bo *dst = ... , *src = ...; ++ * ++ * dst = src; ++ * // No reference counting. Only use this when you need to move ++ * // a reference from one pointer to another. ++ * ++ * amdgpu_bo_reference(&dst, src); ++ * // Reference counters are updated. dst is decremented and src is ++ * // incremented. dst is freed if its reference counter is 0. ++ */ ++static inline void amdgpu_bo_reference(struct amdgpu_bo **dst, ++ struct amdgpu_bo *src) ++{ ++ if (update_references(&(*dst)->refcount, &src->refcount)) ++ amdgpu_bo_free_internal(*dst); ++ *dst = src; ++} ++ ++/** ++ * Assignment between two amdgpu_device pointers with reference counting. ++ * ++ * Usage: ++ * struct amdgpu_device *dst = ... , *src = ...; ++ * ++ * dst = src; ++ * // No reference counting. Only use this when you need to move ++ * // a reference from one pointer to another. ++ * ++ * amdgpu_device_reference(&dst, src); ++ * // Reference counters are updated. dst is decremented and src is ++ * // incremented. dst is freed if its reference counter is 0. ++ */ ++void amdgpu_device_reference(struct amdgpu_device **dst, ++ struct amdgpu_device *src); ++#endif +diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c +new file mode 100644 +index 0000000..2335912 +--- /dev/null ++++ b/amdgpu/amdgpu_vamgr.c +@@ -0,0 +1,169 @@ ++/* ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++*/ ++ ++#include <stdlib.h> ++#include <string.h> ++#include "amdgpu.h" ++#include "amdgpu_drm.h" ++#include "amdgpu_internal.h" ++#include "util_math.h" ++ ++void amdgpu_vamgr_init(struct amdgpu_device *dev) ++{ ++ struct amdgpu_bo_va_mgr *vamgr = &dev->vamgr; ++ ++ vamgr->va_offset = dev->dev_info.virtual_address_offset; ++ vamgr->va_alignment = dev->dev_info.virtual_address_alignment; ++ ++ list_inithead(&vamgr->va_holes); ++ pthread_mutex_init(&vamgr->bo_va_mutex, NULL); ++} ++ ++uint64_t amdgpu_vamgr_find_va(struct amdgpu_bo_va_mgr *mgr, ++ uint64_t size, uint64_t alignment) ++{ ++ struct amdgpu_bo_va_hole *hole, *n; ++ uint64_t offset = 0, waste = 0; ++ ++ alignment = MAX2(alignment, mgr->va_alignment); ++ size = ALIGN(size, mgr->va_alignment); ++ ++ pthread_mutex_lock(&mgr->bo_va_mutex); ++ /* TODO: using more appropriate way to track the holes */ ++ /* first look for a hole */ ++ LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { ++ offset = hole->offset; ++ waste = offset % alignment; ++ waste = waste ? alignment - waste : 0; ++ offset += waste; ++ if (offset >= (hole->offset + hole->size)) { ++ continue; ++ } ++ if (!waste && hole->size == size) { ++ offset = hole->offset; ++ list_del(&hole->list); ++ free(hole); ++ pthread_mutex_unlock(&mgr->bo_va_mutex); ++ return offset; ++ } ++ if ((hole->size - waste) > size) { ++ if (waste) { ++ n = calloc(1, ++ sizeof(struct amdgpu_bo_va_hole)); ++ n->size = waste; ++ n->offset = hole->offset; ++ list_add(&n->list, &hole->list); ++ } ++ hole->size -= (size + waste); ++ hole->offset += size + waste; ++ pthread_mutex_unlock(&mgr->bo_va_mutex); ++ return offset; ++ } ++ if ((hole->size - waste) == size) { ++ hole->size = waste; ++ pthread_mutex_unlock(&mgr->bo_va_mutex); ++ return offset; ++ } ++ } ++ ++ offset = mgr->va_offset; ++ waste = offset % alignment; ++ waste = waste ? alignment - waste : 0; ++ if (waste) { ++ n = calloc(1, sizeof(struct amdgpu_bo_va_hole)); ++ n->size = waste; ++ n->offset = offset; ++ list_add(&n->list, &mgr->va_holes); ++ } ++ offset += waste; ++ mgr->va_offset += size + waste; ++ pthread_mutex_unlock(&mgr->bo_va_mutex); ++ return offset; ++} ++ ++void amdgpu_vamgr_free_va(struct amdgpu_bo_va_mgr *mgr, uint64_t va, ++ uint64_t size) ++{ ++ struct amdgpu_bo_va_hole *hole; ++ ++ size = ALIGN(size, mgr->va_alignment); ++ ++ pthread_mutex_lock(&mgr->bo_va_mutex); ++ if ((va + size) == mgr->va_offset) { ++ mgr->va_offset = va; ++ /* Delete uppermost hole if it reaches the new top */ ++ if (!LIST_IS_EMPTY(&mgr->va_holes)) { ++ hole = container_of(mgr->va_holes.next, hole, list); ++ if ((hole->offset + hole->size) == va) { ++ mgr->va_offset = hole->offset; ++ list_del(&hole->list); ++ free(hole); ++ } ++ } ++ } else { ++ struct amdgpu_bo_va_hole *next; ++ ++ hole = container_of(&mgr->va_holes, hole, list); ++ LIST_FOR_EACH_ENTRY(next, &mgr->va_holes, list) { ++ if (next->offset < va) ++ break; ++ hole = next; ++ } ++ ++ if (&hole->list != &mgr->va_holes) { ++ /* Grow upper hole if it's adjacent */ ++ if (hole->offset == (va + size)) { ++ hole->offset = va; ++ hole->size += size; ++ /* Merge lower hole if it's adjacent */ ++ if (next != hole ++ && &next->list != &mgr->va_holes ++ && (next->offset + next->size) == va) { ++ next->size += hole->size; ++ list_del(&hole->list); ++ free(hole); ++ } ++ goto out; ++ } ++ } ++ ++ /* Grow lower hole if it's adjacent */ ++ if (next != hole && &next->list != &mgr->va_holes && ++ (next->offset + next->size) == va) { ++ next->size += size; ++ goto out; ++ } ++ ++ /* FIXME on allocation failure we just lose virtual address space ++ * maybe print a warning ++ */ ++ next = calloc(1, sizeof(struct amdgpu_bo_va_hole)); ++ if (next) { ++ next->size = size; ++ next->offset = va; ++ list_add(&next->list, &hole->list); ++ } ++ } ++out: ++ pthread_mutex_unlock(&mgr->bo_va_mutex); ++} +diff --git a/amdgpu/libdrm_amdgpu.pc.in b/amdgpu/libdrm_amdgpu.pc.in +new file mode 100644 +index 0000000..417865e +--- /dev/null ++++ b/amdgpu/libdrm_amdgpu.pc.in +@@ -0,0 +1,10 @@ ++prefix=@prefix@ ++exec_prefix=@exec_prefix@ ++libdir=@libdir@ ++includedir=@includedir@ ++ ++Name: libdrm_amdgpu ++Description: Userspace interface to kernel DRM services for amdgpu ++Version: @PACKAGE_VERSION@ ++Libs: -L${libdir} -ldrm_amdgpu ++Cflags: -I${includedir} -I${includedir}/libdrm +diff --git a/amdgpu/util_double_list.h b/amdgpu/util_double_list.h +new file mode 100644 +index 0000000..3f48ae2 +--- /dev/null ++++ b/amdgpu/util_double_list.h +@@ -0,0 +1,146 @@ ++/************************************************************************** ++ * ++ * Copyright 2006 VMware, Inc., Bismarck, ND. USA. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE ++ * USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ **************************************************************************/ ++ ++/** ++ * \file ++ * List macros heavily inspired by the Linux kernel ++ * list handling. No list looping yet. ++ * ++ * Is not threadsafe, so common operations need to ++ * be protected using an external mutex. ++ */ ++ ++#ifndef _U_DOUBLE_LIST_H_ ++#define _U_DOUBLE_LIST_H_ ++ ++ ++#include <stddef.h> ++ ++ ++struct list_head ++{ ++ struct list_head *prev; ++ struct list_head *next; ++}; ++ ++static inline void list_inithead(struct list_head *item) ++{ ++ item->prev = item; ++ item->next = item; ++} ++ ++static inline void list_add(struct list_head *item, struct list_head *list) ++{ ++ item->prev = list; ++ item->next = list->next; ++ list->next->prev = item; ++ list->next = item; ++} ++ ++static inline void list_addtail(struct list_head *item, struct list_head *list) ++{ ++ item->next = list; ++ item->prev = list->prev; ++ list->prev->next = item; ++ list->prev = item; ++} ++ ++static inline void list_replace(struct list_head *from, struct list_head *to) ++{ ++ to->prev = from->prev; ++ to->next = from->next; ++ from->next->prev = to; ++ from->prev->next = to; ++} ++ ++static inline void list_del(struct list_head *item) ++{ ++ item->prev->next = item->next; ++ item->next->prev = item->prev; ++ item->prev = item->next = NULL; ++} ++ ++static inline void list_delinit(struct list_head *item) ++{ ++ item->prev->next = item->next; ++ item->next->prev = item->prev; ++ item->next = item; ++ item->prev = item; ++} ++ ++#define LIST_INITHEAD(__item) list_inithead(__item) ++#define LIST_ADD(__item, __list) list_add(__item, __list) ++#define LIST_ADDTAIL(__item, __list) list_addtail(__item, __list) ++#define LIST_REPLACE(__from, __to) list_replace(__from, __to) ++#define LIST_DEL(__item) list_del(__item) ++#define LIST_DELINIT(__item) list_delinit(__item) ++ ++#define LIST_ENTRY(__type, __item, __field) \ ++ ((__type *)(((char *)(__item)) - offsetof(__type, __field))) ++ ++#define LIST_IS_EMPTY(__list) \ ++ ((__list)->next == (__list)) ++ ++/** ++ * Cast from a pointer to a member of a struct back to the containing struct. ++ * ++ * 'sample' MUST be initialized, or else the result is undefined! ++ */ ++#ifndef container_of ++#define container_of(ptr, sample, member) \ ++ (void *)((char *)(ptr) \ ++ - ((char *)&(sample)->member - (char *)(sample))) ++#endif ++ ++#define LIST_FOR_EACH_ENTRY(pos, head, member) \ ++ for (pos = NULL, pos = container_of((head)->next, pos, member); \ ++ &pos->member != (head); \ ++ pos = container_of(pos->member.next, pos, member)) ++ ++#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ ++ for (pos = NULL, pos = container_of((head)->next, pos, member), \ ++ storage = container_of(pos->member.next, pos, member); \ ++ &pos->member != (head); \ ++ pos = storage, storage = container_of(storage->member.next, storage, member)) ++ ++#define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ ++ for (pos = NULL, pos = container_of((head)->prev, pos, member), \ ++ storage = container_of(pos->member.prev, pos, member); \ ++ &pos->member != (head); \ ++ pos = storage, storage = container_of(storage->member.prev, storage, member)) ++ ++#define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ ++ for (pos = NULL, pos = container_of((start), pos, member); \ ++ &pos->member != (head); \ ++ pos = container_of(pos->member.next, pos, member)) ++ ++#define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ ++ for (pos = NULL, pos = container_of((start), pos, member); \ ++ &pos->member != (head); \ ++ pos = container_of(pos->member.prev, pos, member)) ++ ++#endif /*_U_DOUBLE_LIST_H_*/ +diff --git a/amdgpu/util_hash.c b/amdgpu/util_hash.c +new file mode 100644 +index 0000000..b1e12c4 +--- /dev/null ++++ b/amdgpu/util_hash.c +@@ -0,0 +1,382 @@ ++/************************************************************************** ++ * ++ * Copyright 2007 VMware, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++ ++ /* ++ * Authors: ++ * Zack Rusin <zackr@vmware.com> ++ */ ++ ++#include "util_hash.h" ++ ++#include <stdlib.h> ++#include <assert.h> ++ ++#define MAX(a, b) ((a > b) ? (a) : (b)) ++ ++static const int MinNumBits = 4; ++ ++static const unsigned char prime_deltas[] = { ++ 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, ++ 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 ++}; ++ ++static int primeForNumBits(int numBits) ++{ ++ return (1 << numBits) + prime_deltas[numBits]; ++} ++ ++/* Returns the smallest integer n such that ++ primeForNumBits(n) >= hint. ++*/ ++static int countBits(int hint) ++{ ++ int numBits = 0; ++ int bits = hint; ++ ++ while (bits > 1) { ++ bits >>= 1; ++ numBits++; ++ } ++ ++ if (numBits >= (int)sizeof(prime_deltas)) { ++ numBits = sizeof(prime_deltas) - 1; ++ } else if (primeForNumBits(numBits) < hint) { ++ ++numBits; ++ } ++ return numBits; ++} ++ ++struct util_node { ++ struct util_node *next; ++ unsigned key; ++ void *value; ++}; ++ ++struct util_hash_data { ++ struct util_node *fakeNext; ++ struct util_node **buckets; ++ int size; ++ int nodeSize; ++ short userNumBits; ++ short numBits; ++ int numBuckets; ++}; ++ ++struct util_hash { ++ union { ++ struct util_hash_data *d; ++ struct util_node *e; ++ } data; ++}; ++ ++static void *util_data_allocate_node(struct util_hash_data *hash) ++{ ++ return malloc(hash->nodeSize); ++} ++ ++static void util_free_node(struct util_node *node) ++{ ++ free(node); ++} ++ ++static struct util_node * ++util_hash_create_node(struct util_hash *hash, ++ unsigned akey, void *avalue, ++ struct util_node **anextNode) ++{ ++ struct util_node *node = util_data_allocate_node(hash->data.d); ++ ++ if (!node) ++ return NULL; ++ ++ node->key = akey; ++ node->value = avalue; ++ ++ node->next = (struct util_node*)(*anextNode); ++ *anextNode = node; ++ ++hash->data.d->size; ++ return node; ++} ++ ++static void util_data_rehash(struct util_hash_data *hash, int hint) ++{ ++ if (hint < 0) { ++ hint = countBits(-hint); ++ if (hint < MinNumBits) ++ hint = MinNumBits; ++ hash->userNumBits = (short)hint; ++ while (primeForNumBits(hint) < (hash->size >> 1)) ++ ++hint; ++ } else if (hint < MinNumBits) { ++ hint = MinNumBits; ++ } ++ ++ if (hash->numBits != hint) { ++ struct util_node *e = (struct util_node *)(hash); ++ struct util_node **oldBuckets = hash->buckets; ++ int oldNumBuckets = hash->numBuckets; ++ int i = 0; ++ ++ hash->numBits = (short)hint; ++ hash->numBuckets = primeForNumBits(hint); ++ hash->buckets = malloc(sizeof(struct util_node*) * hash->numBuckets); ++ for (i = 0; i < hash->numBuckets; ++i) ++ hash->buckets[i] = e; ++ ++ for (i = 0; i < oldNumBuckets; ++i) { ++ struct util_node *firstNode = oldBuckets[i]; ++ while (firstNode != e) { ++ unsigned h = firstNode->key; ++ struct util_node *lastNode = firstNode; ++ struct util_node *afterLastNode; ++ struct util_node **beforeFirstNode; ++ ++ while (lastNode->next != e && lastNode->next->key == h) ++ lastNode = lastNode->next; ++ ++ afterLastNode = lastNode->next; ++ beforeFirstNode = &hash->buckets[h % hash->numBuckets]; ++ while (*beforeFirstNode != e) ++ beforeFirstNode = &(*beforeFirstNode)->next; ++ lastNode->next = *beforeFirstNode; ++ *beforeFirstNode = firstNode; ++ firstNode = afterLastNode; ++ } ++ } ++ free(oldBuckets); ++ } ++} ++ ++static void util_data_might_grow(struct util_hash_data *hash) ++{ ++ if (hash->size >= hash->numBuckets) ++ util_data_rehash(hash, hash->numBits + 1); ++} ++ ++static void util_data_has_shrunk(struct util_hash_data *hash) ++{ ++ if (hash->size <= (hash->numBuckets >> 3) && ++ hash->numBits > hash->userNumBits) { ++ int max = MAX(hash->numBits-2, hash->userNumBits); ++ util_data_rehash(hash, max); ++ } ++} ++ ++static struct util_node *util_data_first_node(struct util_hash_data *hash) ++{ ++ struct util_node *e = (struct util_node *)(hash); ++ struct util_node **bucket = hash->buckets; ++ int n = hash->numBuckets; ++ while (n--) { ++ if (*bucket != e) ++ return *bucket; ++ ++bucket; ++ } ++ return e; ++} ++ ++static struct util_node **util_hash_find_node(struct util_hash *hash, unsigned akey) ++{ ++ struct util_node **node; ++ ++ if (hash->data.d->numBuckets) { ++ node = (struct util_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); ++ assert(*node == hash->data.e || (*node)->next); ++ while (*node != hash->data.e && (*node)->key != akey) ++ node = &(*node)->next; ++ } else { ++ node = (struct util_node **)((const struct util_node * const *)(&hash->data.e)); ++ } ++ return node; ++} ++ ++struct util_hash_iter util_hash_insert(struct util_hash *hash, ++ unsigned key, void *data) ++{ ++ util_data_might_grow(hash->data.d); ++ ++ { ++ struct util_node **nextNode = util_hash_find_node(hash, key); ++ struct util_node *node = util_hash_create_node(hash, key, data, nextNode); ++ if (!node) { ++ struct util_hash_iter null_iter = {hash, 0}; ++ return null_iter; ++ } ++ ++ { ++ struct util_hash_iter iter = {hash, node}; ++ return iter; ++ } ++ } ++} ++ ++struct util_hash * util_hash_create(void) ++{ ++ struct util_hash *hash = malloc(sizeof(struct util_hash)); ++ if (!hash) ++ return NULL; ++ ++ hash->data.d = malloc(sizeof(struct util_hash_data)); ++ if (!hash->data.d) { ++ free(hash); ++ return NULL; ++ } ++ ++ hash->data.d->fakeNext = 0; ++ hash->data.d->buckets = 0; ++ hash->data.d->size = 0; ++ hash->data.d->nodeSize = sizeof(struct util_node); ++ hash->data.d->userNumBits = (short)MinNumBits; ++ hash->data.d->numBits = 0; ++ hash->data.d->numBuckets = 0; ++ ++ return hash; ++} ++ ++void util_hash_delete(struct util_hash *hash) ++{ ++ struct util_node *e_for_x = (struct util_node *)(hash->data.d); ++ struct util_node **bucket = (struct util_node **)(hash->data.d->buckets); ++ int n = hash->data.d->numBuckets; ++ while (n--) { ++ struct util_node *cur = *bucket++; ++ while (cur != e_for_x) { ++ struct util_node *next = cur->next; ++ util_free_node(cur); ++ cur = next; ++ } ++ } ++ free(hash->data.d->buckets); ++ free(hash->data.d); ++ free(hash); ++} ++ ++struct util_hash_iter util_hash_find(struct util_hash *hash, ++ unsigned key) ++{ ++ struct util_node **nextNode = util_hash_find_node(hash, key); ++ struct util_hash_iter iter = {hash, *nextNode}; ++ return iter; ++} ++ ++unsigned util_hash_iter_key(struct util_hash_iter iter) ++{ ++ if (!iter.node || iter.hash->data.e == iter.node) ++ return 0; ++ return iter.node->key; ++} ++ ++void * util_hash_iter_data(struct util_hash_iter iter) ++{ ++ if (!iter.node || iter.hash->data.e == iter.node) ++ return 0; ++ return iter.node->value; ++} ++ ++static struct util_node *util_hash_data_next(struct util_node *node) ++{ ++ union { ++ struct util_node *next; ++ struct util_node *e; ++ struct util_hash_data *d; ++ } a; ++ int start; ++ struct util_node **bucket; ++ int n; ++ ++ a.next = node->next; ++ if (!a.next) { ++ /* iterating beyond the last element */ ++ return 0; ++ } ++ if (a.next->next) ++ return a.next; ++ ++ start = (node->key % a.d->numBuckets) + 1; ++ bucket = a.d->buckets + start; ++ n = a.d->numBuckets - start; ++ while (n--) { ++ if (*bucket != a.e) ++ return *bucket; ++ ++bucket; ++ } ++ return a.e; ++} ++ ++struct util_hash_iter util_hash_iter_next(struct util_hash_iter iter) ++{ ++ struct util_hash_iter next = {iter.hash, util_hash_data_next(iter.node)}; ++ return next; ++} ++ ++int util_hash_iter_is_null(struct util_hash_iter iter) ++{ ++ if (!iter.node || iter.node == iter.hash->data.e) ++ return 1; ++ return 0; ++} ++ ++void * util_hash_take(struct util_hash *hash, ++ unsigned akey) ++{ ++ struct util_node **node = util_hash_find_node(hash, akey); ++ if (*node != hash->data.e) { ++ void *t = (*node)->value; ++ struct util_node *next = (*node)->next; ++ util_free_node(*node); ++ *node = next; ++ --hash->data.d->size; ++ util_data_has_shrunk(hash->data.d); ++ return t; ++ } ++ return 0; ++} ++ ++struct util_hash_iter util_hash_first_node(struct util_hash *hash) ++{ ++ struct util_hash_iter iter = {hash, util_data_first_node(hash->data.d)}; ++ return iter; ++} ++ ++struct util_hash_iter util_hash_erase(struct util_hash *hash, struct util_hash_iter iter) ++{ ++ struct util_hash_iter ret = iter; ++ struct util_node *node = iter.node; ++ struct util_node **node_ptr; ++ ++ if (node == hash->data.e) ++ return iter; ++ ++ ret = util_hash_iter_next(ret); ++ node_ptr = (struct util_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); ++ while (*node_ptr != node) ++ node_ptr = &(*node_ptr)->next; ++ *node_ptr = node->next; ++ util_free_node(node); ++ --hash->data.d->size; ++ return ret; ++} +diff --git a/amdgpu/util_hash.h b/amdgpu/util_hash.h +new file mode 100644 +index 0000000..8e0f9a2 +--- /dev/null ++++ b/amdgpu/util_hash.h +@@ -0,0 +1,99 @@ ++/************************************************************************** ++ * ++ * Copyright 2007 VMware, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++ ++/** ++ * @file ++ * Hash implementation. ++ * ++ * This file provides a hash implementation that is capable of dealing ++ * with collisions. It stores colliding entries in linked list. All ++ * functions operating on the hash return an iterator. The iterator ++ * itself points to the collision list. If there wasn't any collision ++ * the list will have just one entry, otherwise client code should ++ * iterate over the entries to find the exact entry among ones that ++ * had the same key (e.g. memcmp could be used on the data to check ++ * that) ++ * ++ * @author Zack Rusin <zackr@vmware.com> ++ */ ++ ++#ifndef UTIL_HASH_H ++#define UTIL_HASH_H ++ ++#include <stdbool.h> ++ ++struct util_hash; ++struct util_node; ++ ++struct util_hash_iter { ++ struct util_hash *hash; ++ struct util_node *node; ++}; ++ ++ ++struct util_hash *util_hash_create(void); ++void util_hash_delete(struct util_hash *hash); ++ ++ ++/** ++ * Adds a data with the given key to the hash. If entry with the given ++ * key is already in the hash, this current entry is instered before it ++ * in the collision list. ++ * Function returns iterator pointing to the inserted item in the hash. ++ */ ++struct util_hash_iter util_hash_insert(struct util_hash *hash, unsigned key, ++ void *data); ++ ++/** ++ * Removes the item pointed to by the current iterator from the hash. ++ * Note that the data itself is not erased and if it was a malloc'ed pointer ++ * it will have to be freed after calling this function by the callee. ++ * Function returns iterator pointing to the item after the removed one in ++ * the hash. ++ */ ++struct util_hash_iter util_hash_erase(struct util_hash *hash, ++ struct util_hash_iter iter); ++ ++void *util_hash_take(struct util_hash *hash, unsigned key); ++ ++ ++struct util_hash_iter util_hash_first_node(struct util_hash *hash); ++ ++/** ++ * Return an iterator pointing to the first entry in the collision list. ++ */ ++struct util_hash_iter util_hash_find(struct util_hash *hash, unsigned key); ++ ++ ++int util_hash_iter_is_null(struct util_hash_iter iter); ++unsigned util_hash_iter_key(struct util_hash_iter iter); ++void *util_hash_iter_data(struct util_hash_iter iter); ++ ++ ++struct util_hash_iter util_hash_iter_next(struct util_hash_iter iter); ++ ++#endif +diff --git a/amdgpu/util_hash_table.c b/amdgpu/util_hash_table.c +new file mode 100644 +index 0000000..cb7213c +--- /dev/null ++++ b/amdgpu/util_hash_table.c +@@ -0,0 +1,257 @@ ++/************************************************************************** ++ * ++ * Copyright 2008 VMware, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++ ++/** ++ * @file ++ * General purpose hash table implementation. ++ * ++ * Just uses the util_hash for now, but it might be better switch to a linear ++ * probing hash table implementation at some point -- as it is said they have ++ * better lookup and cache performance and it appears to be possible to write ++ * a lock-free implementation of such hash tables . ++ * ++ * @author José Fonseca <jfonseca@vmware.com> ++ */ ++ ++ ++ ++#include "util_hash_table.h" ++#include "util_hash.h" ++ ++#include <stdlib.h> ++#include <assert.h> ++ ++struct util_hash_table ++{ ++ struct util_hash *head; ++ ++ /** Hash function */ ++ unsigned (*make_hash)(void *key); ++ ++ /** Compare two keys */ ++ int (*compare)(void *key1, void *key2); ++}; ++ ++struct util_hash_table_item ++{ ++ void *key; ++ void *value; ++}; ++ ++ ++static struct util_hash_table_item * ++util_hash_table_item(struct util_hash_iter iter) ++{ ++ return (struct util_hash_table_item *)util_hash_iter_data(iter); ++} ++ ++struct util_hash_table *util_hash_table_create(unsigned (*hash)(void *key), ++ int (*compare)(void *key1, void *key2)) ++{ ++ struct util_hash_table *ht; ++ ++ ht = malloc(sizeof(struct util_hash_table)); ++ if(!ht) ++ return NULL; ++ ++ ht->head = util_hash_create(); ++ if(!ht->head) { ++ free(ht); ++ return NULL; ++ } ++ ++ ht->make_hash = hash; ++ ht->compare = compare; ++ ++ return ht; ++} ++ ++static struct util_hash_iter ++util_hash_table_find_iter(struct util_hash_table *ht, ++ void *key, unsigned key_hash) ++{ ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ iter = util_hash_find(ht->head, key_hash); ++ while (!util_hash_iter_is_null(iter)) { ++ item = (struct util_hash_table_item *)util_hash_iter_data(iter); ++ if (!ht->compare(item->key, key)) ++ break; ++ iter = util_hash_iter_next(iter); ++ } ++ ++ return iter; ++} ++ ++static struct util_hash_table_item * ++util_hash_table_find_item(struct util_hash_table *ht, ++ void *key, unsigned key_hash) ++{ ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ iter = util_hash_find(ht->head, key_hash); ++ while (!util_hash_iter_is_null(iter)) { ++ item = (struct util_hash_table_item *)util_hash_iter_data(iter); ++ if (!ht->compare(item->key, key)) ++ return item; ++ iter = util_hash_iter_next(iter); ++ } ++ ++ return NULL; ++} ++ ++void util_hash_table_set(struct util_hash_table *ht, void *key, void *value) ++{ ++ unsigned key_hash; ++ struct util_hash_table_item *item; ++ struct util_hash_iter iter; ++ ++ assert(ht); ++ if (!ht) ++ return; ++ ++ key_hash = ht->make_hash(key); ++ ++ item = util_hash_table_find_item(ht, key, key_hash); ++ if(item) { ++ /* TODO: key/value destruction? */ ++ item->value = value; ++ return; ++ } ++ ++ item = malloc(sizeof(struct util_hash_table_item)); ++ if(!item) ++ return; ++ ++ item->key = key; ++ item->value = value; ++ ++ iter = util_hash_insert(ht->head, key_hash, item); ++ if(util_hash_iter_is_null(iter)) { ++ free(item); ++ return; ++ } ++} ++ ++void *util_hash_table_get(struct util_hash_table *ht, void *key) ++{ ++ unsigned key_hash; ++ struct util_hash_table_item *item; ++ ++ assert(ht); ++ if (!ht) ++ return NULL; ++ ++ key_hash = ht->make_hash(key); ++ ++ item = util_hash_table_find_item(ht, key, key_hash); ++ if(!item) ++ return NULL; ++ ++ return item->value; ++} ++ ++void util_hash_table_remove(struct util_hash_table *ht, void *key) ++{ ++ unsigned key_hash; ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ assert(ht); ++ if (!ht) ++ return; ++ ++ key_hash = ht->make_hash(key); ++ ++ iter = util_hash_table_find_iter(ht, key, key_hash); ++ if(util_hash_iter_is_null(iter)) ++ return; ++ ++ item = util_hash_table_item(iter); ++ assert(item); ++ free(item); ++ ++ util_hash_erase(ht->head, iter); ++} ++ ++void util_hash_table_clear(struct util_hash_table *ht) ++{ ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ assert(ht); ++ if (!ht) ++ return; ++ ++ iter = util_hash_first_node(ht->head); ++ while (!util_hash_iter_is_null(iter)) { ++ item = (struct util_hash_table_item *)util_hash_take(ht->head, util_hash_iter_key(iter)); ++ free(item); ++ iter = util_hash_first_node(ht->head); ++ } ++} ++ ++void util_hash_table_foreach(struct util_hash_table *ht, ++ void (*callback)(void *key, void *value, void *data), ++ void *data) ++{ ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ assert(ht); ++ if (!ht) ++ return; ++ ++ iter = util_hash_first_node(ht->head); ++ while (!util_hash_iter_is_null(iter)) { ++ item = (struct util_hash_table_item *)util_hash_iter_data(iter); ++ callback(item->key, item->value, data); ++ iter = util_hash_iter_next(iter); ++ } ++} ++ ++void util_hash_table_destroy(struct util_hash_table *ht) ++{ ++ struct util_hash_iter iter; ++ struct util_hash_table_item *item; ++ ++ assert(ht); ++ if (!ht) ++ return; ++ ++ iter = util_hash_first_node(ht->head); ++ while (!util_hash_iter_is_null(iter)) { ++ item = (struct util_hash_table_item *)util_hash_iter_data(iter); ++ free(item); ++ iter = util_hash_iter_next(iter); ++ } ++ ++ util_hash_delete(ht->head); ++ free(ht); ++} +diff --git a/amdgpu/util_hash_table.h b/amdgpu/util_hash_table.h +new file mode 100644 +index 0000000..04fe704 +--- /dev/null ++++ b/amdgpu/util_hash_table.h +@@ -0,0 +1,65 @@ ++/************************************************************************** ++ * ++ * Copyright 2008 VMware, Inc. ++ * All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ * ++ **************************************************************************/ ++ ++/** ++ * General purpose hash table. ++ * ++ * @author José Fonseca <jfonseca@vmware.com> ++ */ ++ ++#ifndef U_HASH_TABLE_H_ ++#define U_HASH_TABLE_H_ ++ ++/** ++ * Generic purpose hash table. ++ */ ++struct util_hash_table; ++ ++/** ++ * Create an hash table. ++ * ++ * @param hash hash function ++ * @param compare should return 0 for two equal keys. ++ */ ++struct util_hash_table *util_hash_table_create(unsigned (*hash)(void *key), ++ int (*compare)(void *key1, void *key2)); ++ ++void util_hash_table_set(struct util_hash_table *ht, void *key, void *value); ++ ++void *util_hash_table_get(struct util_hash_table *ht, void *key); ++ ++void util_hash_table_remove(struct util_hash_table *ht, void *key); ++ ++void util_hash_table_clear(struct util_hash_table *ht); ++ ++void util_hash_table_foreach(struct util_hash_table *ht, ++ void (*callback)(void *key, void *value, void *data), ++ void *data); ++ ++void util_hash_table_destroy(struct util_hash_table *ht); ++ ++#endif /* U_HASH_TABLE_H_ */ +diff --git a/amdgpu/util_math.h b/amdgpu/util_math.h +new file mode 100644 +index 0000000..b8de0f8 +--- /dev/null ++++ b/amdgpu/util_math.h +@@ -0,0 +1,32 @@ ++/* ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++*/ ++ ++#ifndef _UTIL_MATH_H_ ++#define _UTIL_MATH_H_ ++ ++#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) ++#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) ++ ++#define ALIGN( value, alignment ) ( ((value) + (alignment) - 1) & ~((alignment) - 1) ) ++ ++#endif /*_UTIL_MATH_H_*/ +diff --git a/configure.ac b/configure.ac +index 155d577..509f2d4 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -36,6 +36,7 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + + # Check for programs + AC_PROG_CC ++AC_PROG_CXX + + AC_USE_SYSTEM_EXTENSIONS + AC_SYS_LARGEFILE +@@ -74,6 +75,11 @@ AC_ARG_ENABLE(radeon, + [Enable support for radeon's KMS API (default: auto)]), + [RADEON=$enableval], [RADEON=auto]) + ++AC_ARG_ENABLE(amdgpu, ++ AS_HELP_STRING([--disable-amdgpu], ++ [Enable support for amdgpu's KMS API (default: auto)]), ++ [AMDGPU=$enableval], [AMDGPU=auto]) ++ + AC_ARG_ENABLE(nouveau, + AS_HELP_STRING([--disable-nouveau], + [Enable support for nouveau's KMS API (default: auto)]), +@@ -236,6 +242,9 @@ if test "x$drm_cv_atomic_primitives" = "xnone"; then + LIBDRM_ATOMICS_NOT_FOUND_MSG($RADEON, radeon, Radeon, radeon) + RADEON=no + ++ LIBDRM_ATOMICS_NOT_FOUND_MSG($AMDGPU, amdgpu, AMD, amdgpu) ++ AMDGPU=no ++ + LIBDRM_ATOMICS_NOT_FOUND_MSG($NOUVEAU, nouveau, NVIDIA, nouveau) + NOUVEAU=no + +@@ -257,6 +266,9 @@ else + if test "x$RADEON" = xauto; then + RADEON=yes + fi ++ if test "x$AMDGPU" = xauto; then ++ AMDGPU=yes ++ fi + if test "x$NOUVEAU" = xauto; then + NOUVEAU=yes + fi +@@ -336,6 +348,11 @@ if test "x$RADEON" = xyes; then + AC_DEFINE(HAVE_RADEON, 1, [Have radeon support]) + fi + ++AM_CONDITIONAL(HAVE_AMDGPU, [test "x$AMDGPU" = xyes]) ++if test "x$AMDGPU" = xyes; then ++ AC_DEFINE(HAVE_AMDGPU, 1, [Have amdgpu support]) ++fi ++ + AM_CONDITIONAL(HAVE_TEGRA, [test "x$TEGRA" = xyes]) + if test "x$TEGRA" = xyes; then + AC_DEFINE(HAVE_TEGRA, 1, [Have Tegra support]) +@@ -432,6 +449,8 @@ AC_CONFIG_FILES([ + intel/libdrm_intel.pc + radeon/Makefile + radeon/libdrm_radeon.pc ++ amdgpu/Makefile ++ amdgpu/libdrm_amdgpu.pc + nouveau/Makefile + nouveau/libdrm_nouveau.pc + omap/Makefile +@@ -463,6 +482,7 @@ echo " libkms $LIBKMS" + echo " Intel API $INTEL" + echo " vmwgfx API $VMWGFX" + echo " Radeon API $RADEON" ++echo " AMDGPU API $AMDGPU" + echo " Nouveau API $NOUVEAU" + echo " OMAP API $OMAP" + echo " EXYNOS API $EXYNOS" +diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h +new file mode 100644 +index 0000000..d248d77 +--- /dev/null ++++ b/include/drm/amdgpu_drm.h +@@ -0,0 +1,600 @@ ++/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- ++ * ++ * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. ++ * Copyright 2000 VA Linux Systems, Inc., Fremont, California. ++ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. ++ * Copyright 2014 Advanced Micro Devices, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Kevin E. Martin <martin@valinux.com> ++ * Gareth Hughes <gareth@valinux.com> ++ * Keith Whitwell <keith@tungstengraphics.com> ++ */ ++ ++#ifndef __AMDGPU_DRM_H__ ++#define __AMDGPU_DRM_H__ ++ ++#include <drm.h> ++ ++#define DRM_AMDGPU_GEM_CREATE 0x00 ++#define DRM_AMDGPU_GEM_MMAP 0x01 ++#define DRM_AMDGPU_CTX 0x02 ++#define DRM_AMDGPU_BO_LIST 0x03 ++#define DRM_AMDGPU_CS 0x04 ++#define DRM_AMDGPU_INFO 0x05 ++#define DRM_AMDGPU_GEM_METADATA 0x06 ++#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 ++#define DRM_AMDGPU_GEM_VA 0x08 ++#define DRM_AMDGPU_WAIT_CS 0x09 ++#define DRM_AMDGPU_GEM_OP 0x10 ++#define DRM_AMDGPU_GEM_USERPTR 0x11 ++ ++#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) ++#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) ++#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) ++#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) ++#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) ++#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) ++#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) ++#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) ++#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, union drm_amdgpu_gem_va) ++#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) ++#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) ++#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) ++ ++#define AMDGPU_GEM_DOMAIN_CPU 0x1 ++#define AMDGPU_GEM_DOMAIN_GTT 0x2 ++#define AMDGPU_GEM_DOMAIN_VRAM 0x4 ++#define AMDGPU_GEM_DOMAIN_GDS 0x8 ++#define AMDGPU_GEM_DOMAIN_GWS 0x10 ++#define AMDGPU_GEM_DOMAIN_OA 0x20 ++ ++#define AMDGPU_GEM_DOMAIN_MASK 0x3F ++ ++/* Flag that CPU access will be required for the case of VRAM domain */ ++#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) ++/* Flag that CPU access will not work, this VRAM domain is invisible */ ++#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) ++/* Flag that un-cached attributes should be used for GTT */ ++#define AMDGPU_GEM_CREATE_CPU_GTT_UC (1 << 2) ++/* Flag that USWC attributes should be used for GTT */ ++#define AMDGPU_GEM_CREATE_CPU_GTT_WC (1 << 3) ++ ++/* Flag mask for GTT domain_flags */ ++#define AMDGPU_GEM_CREATE_CPU_GTT_MASK \ ++ (AMDGPU_GEM_CREATE_CPU_GTT_WC | \ ++ AMDGPU_GEM_CREATE_CPU_GTT_UC | \ ++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \ ++ AMDGPU_GEM_CREATE_NO_CPU_ACCESS) ++ ++struct drm_amdgpu_gem_create_in { ++ /** the requested memory size */ ++ uint64_t bo_size; ++ /** physical start_addr alignment in bytes for some HW requirements */ ++ uint64_t alignment; ++ /** the requested memory domains */ ++ uint64_t domains; ++ /** allocation flags */ ++ uint64_t domain_flags; ++}; ++ ++struct drm_amdgpu_gem_create_out { ++ /** returned GEM object handle */ ++ uint32_t handle; ++}; ++ ++union drm_amdgpu_gem_create { ++ struct drm_amdgpu_gem_create_in in; ++ struct drm_amdgpu_gem_create_out out; ++}; ++ ++/** Opcode to create new residency list. */ ++#define AMDGPU_BO_LIST_OP_CREATE 0 ++/** Opcode to destroy previously created residency list */ ++#define AMDGPU_BO_LIST_OP_DESTROY 1 ++/** Opcode to update resource information in the list */ ++#define AMDGPU_BO_LIST_OP_UPDATE 2 ++ ++struct drm_amdgpu_bo_list_in { ++ /** Type of operation */ ++ uint32_t operation; ++ /** Handle of list or 0 if we want to create one */ ++ uint32_t list_handle; ++ /** Number of BOs in list */ ++ uint32_t bo_number; ++ /** Size of each element describing BO */ ++ uint32_t bo_info_size; ++ /** Pointer to array describing BOs */ ++ uint64_t bo_info_ptr; ++}; ++ ++struct drm_amdgpu_bo_list_entry { ++ /** Handle of BO */ ++ uint32_t bo_handle; ++ /** New (if specified) BO priority to be used during migration */ ++ uint32_t bo_priority; ++}; ++ ++struct drm_amdgpu_bo_list_out { ++ /** Handle of resource list */ ++ uint32_t list_handle; ++}; ++ ++union drm_amdgpu_bo_list { ++ struct drm_amdgpu_bo_list_in in; ++ struct drm_amdgpu_bo_list_out out; ++}; ++ ++/* context related */ ++#define AMDGPU_CTX_OP_ALLOC_CTX 1 ++#define AMDGPU_CTX_OP_FREE_CTX 2 ++#define AMDGPU_CTX_OP_QUERY_STATE 3 ++ ++#define AMDGPU_CTX_OP_STATE_RUNNING 1 ++ ++struct drm_amdgpu_ctx_in { ++ uint32_t op; ++ uint32_t flags; ++ uint32_t ctx_id; ++ uint32_t pad; ++}; ++ ++union drm_amdgpu_ctx_out { ++ struct { ++ uint32_t ctx_id; ++ } alloc; ++ ++ struct { ++ uint64_t flags; ++ uint64_t hangs; ++ } state; ++}; ++ ++union drm_amdgpu_ctx { ++ struct drm_amdgpu_ctx_in in; ++ union drm_amdgpu_ctx_out out; ++}; ++ ++/* ++ * This is not a reliable API and you should expect it to fail for any ++ * number of reasons and have fallback path that do not use userptr to ++ * perform any operation. ++ */ ++#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) ++#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) ++#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) ++#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) ++ ++struct drm_amdgpu_gem_userptr { ++ uint64_t addr; ++ uint64_t size; ++ uint32_t flags; ++ uint32_t handle; ++}; ++ ++#define AMDGPU_TILING_MACRO 0x1 ++#define AMDGPU_TILING_MICRO 0x2 ++#define AMDGPU_TILING_SWAP_16BIT 0x4 ++#define AMDGPU_TILING_R600_NO_SCANOUT AMDGPU_TILING_SWAP_16BIT ++#define AMDGPU_TILING_SWAP_32BIT 0x8 ++/* this object requires a surface when mapped - i.e. front buffer */ ++#define AMDGPU_TILING_SURFACE 0x10 ++#define AMDGPU_TILING_MICRO_SQUARE 0x20 ++#define AMDGPU_TILING_EG_BANKW_SHIFT 8 ++#define AMDGPU_TILING_EG_BANKW_MASK 0xf ++#define AMDGPU_TILING_EG_BANKH_SHIFT 12 ++#define AMDGPU_TILING_EG_BANKH_MASK 0xf ++#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT 16 ++#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK 0xf ++#define AMDGPU_TILING_EG_TILE_SPLIT_SHIFT 24 ++#define AMDGPU_TILING_EG_TILE_SPLIT_MASK 0xf ++#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT 28 ++#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK 0xf ++ ++#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 ++#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 ++ ++/** The same structure is shared for input/output */ ++struct drm_amdgpu_gem_metadata { ++ uint32_t handle; /* GEM Object handle */ ++ uint32_t op; /** Do we want get or set metadata */ ++ struct { ++ uint64_t flags; ++ uint64_t tiling_info; /* family specific tiling info */ ++ uint32_t data_size_bytes; ++ uint32_t data[64]; ++ } data; ++}; ++ ++struct drm_amdgpu_gem_mmap_in { ++ uint32_t handle; /** the GEM object handle */ ++}; ++ ++struct drm_amdgpu_gem_mmap_out { ++ uint64_t addr_ptr; /** mmap offset from the vma offset manager */ ++}; ++ ++union drm_amdgpu_gem_mmap { ++ struct drm_amdgpu_gem_mmap_in in; ++ struct drm_amdgpu_gem_mmap_out out; ++}; ++ ++struct drm_amdgpu_gem_wait_idle_in { ++ uint32_t handle; /* GEM object handle */ ++ uint32_t flags; ++ uint64_t timeout; /* Timeout to wait. If 0 then returned immediately with the status */ ++}; ++ ++struct drm_amdgpu_gem_wait_idle_out { ++ uint32_t status; /* BO status: 0 - BO is idle, 1 - BO is busy */ ++ uint32_t domain; /* Returned current memory domain */ ++}; ++ ++union drm_amdgpu_gem_wait_idle { ++ struct drm_amdgpu_gem_wait_idle_in in; ++ struct drm_amdgpu_gem_wait_idle_out out; ++}; ++ ++struct drm_amdgpu_wait_cs_in { ++ uint64_t handle; ++ uint64_t timeout; ++ uint32_t ip_type; ++ uint32_t ip_instance; ++ uint32_t ring; ++}; ++ ++struct drm_amdgpu_wait_cs_out { ++ uint64_t status; ++}; ++ ++union drm_amdgpu_wait_cs { ++ struct drm_amdgpu_wait_cs_in in; ++ struct drm_amdgpu_wait_cs_out out; ++}; ++ ++/* Sets or returns a value associated with a buffer. */ ++struct drm_amdgpu_gem_op { ++ uint32_t handle; /* buffer */ ++ uint32_t op; /* AMDGPU_GEM_OP_* */ ++ uint64_t value; /* input or return value */ ++}; ++ ++#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 ++#define AMDGPU_GEM_OP_SET_INITIAL_DOMAIN 1 ++ ++#define AMDGPU_VA_OP_MAP 1 ++#define AMDGPU_VA_OP_UNMAP 2 ++ ++#define AMDGPU_VA_RESULT_OK 0 ++#define AMDGPU_VA_RESULT_ERROR 1 ++#define AMDGPU_VA_RESULT_VA_INVALID_ALIGNMENT 2 ++ ++/* Mapping flags */ ++/* readable mapping */ ++#define AMDGPU_VM_PAGE_READABLE (1 << 1) ++/* writable mapping */ ++#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) ++/* executable mapping, new for VI */ ++#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) ++ ++struct drm_amdgpu_gem_va_in { ++ /* GEM object handle */ ++ uint32_t handle; ++ uint32_t pad; ++ /* map or unmap*/ ++ uint32_t operation; ++ /* specify mapping flags */ ++ uint32_t flags; ++ /* va address to assign . Must be correctly aligned.*/ ++ uint64_t va_address; ++ /* Specify offset inside of BO to assign. Must be correctly aligned.*/ ++ uint64_t offset_in_bo; ++ /* Specify mapping size. If 0 and offset is 0 then map the whole BO.*/ ++ /* Must be correctly aligned. */ ++ uint64_t map_size; ++}; ++ ++struct drm_amdgpu_gem_va_out { ++ uint32_t result; ++}; ++ ++union drm_amdgpu_gem_va { ++ struct drm_amdgpu_gem_va_in in; ++ struct drm_amdgpu_gem_va_out out; ++}; ++ ++#define AMDGPU_HW_IP_GFX 0 ++#define AMDGPU_HW_IP_COMPUTE 1 ++#define AMDGPU_HW_IP_DMA 2 ++#define AMDGPU_HW_IP_UVD 3 ++#define AMDGPU_HW_IP_VCE 4 ++#define AMDGPU_HW_IP_NUM 5 ++ ++#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 ++ ++#define AMDGPU_CHUNK_ID_IB 0x01 ++#define AMDGPU_CHUNK_ID_FENCE 0x02 ++struct drm_amdgpu_cs_chunk { ++ uint32_t chunk_id; ++ uint32_t length_dw; ++ uint64_t chunk_data; ++}; ++ ++struct drm_amdgpu_cs_in { ++ /** Rendering context id */ ++ uint32_t ctx_id; ++ /** Handle of resource list associated with CS */ ++ uint32_t bo_list_handle; ++ uint32_t num_chunks; ++ uint32_t pad; ++ /* this points to uint64_t * which point to cs chunks */ ++ uint64_t chunks; ++}; ++ ++struct drm_amdgpu_cs_out { ++ uint64_t handle; ++}; ++ ++union drm_amdgpu_cs { ++ struct drm_amdgpu_cs_in in; ++ struct drm_amdgpu_cs_out out; ++}; ++ ++/* Specify flags to be used for IB */ ++ ++/* This IB should be submitted to CE */ ++#define AMDGPU_IB_FLAG_CE (1<<0) ++ ++/* GDS is used by this IB */ ++#define AMDGPU_IB_FLAG_GDS (1<<1) ++ ++struct drm_amdgpu_cs_chunk_ib { ++ /** ++ * Handle of GEM object to be used as IB or 0 if it is already in ++ * residency list. ++ */ ++ uint32_t handle; ++ uint32_t flags; /* IB Flags */ ++ uint64_t va_start; /* Virtual address to begin IB execution */ ++ uint32_t ib_bytes; /* Size of submission */ ++ uint32_t ip_type; /* HW IP to submit to */ ++ uint32_t ip_instance; /* HW IP index of the same type to submit to */ ++ uint32_t ring; /* Ring index to submit to */ ++}; ++ ++struct drm_amdgpu_cs_chunk_fence { ++ uint32_t handle; ++ uint32_t offset; ++}; ++ ++struct drm_amdgpu_cs_chunk_data { ++ union { ++ struct drm_amdgpu_cs_chunk_ib ib_data; ++ struct drm_amdgpu_cs_chunk_fence fence_data; ++ }; ++}; ++ ++/** ++ * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU ++ * ++ */ ++#define AMDGPU_IDS_FLAGS_FUSION 0x1 ++ ++/* indicate if acceleration can be working */ ++#define AMDGPU_INFO_ACCEL_WORKING 0x00 ++/* get the crtc_id from the mode object id? */ ++#define AMDGPU_INFO_CRTC_FROM_ID 0x01 ++/* query hw IP info */ ++#define AMDGPU_INFO_HW_IP_INFO 0x02 ++/* query hw IP instance count for the specified type */ ++#define AMDGPU_INFO_HW_IP_COUNT 0x03 ++/* timestamp for GL_ARB_timer_query */ ++#define AMDGPU_INFO_TIMESTAMP 0x05 ++/* Query the firmware version */ ++#define AMDGPU_INFO_FW_VERSION 0x0e ++ /* Subquery id: Query VCE firmware version */ ++ #define AMDGPU_INFO_FW_VCE 0x1 ++ /* Subquery id: Query UVD firmware version */ ++ #define AMDGPU_INFO_FW_UVD 0x2 ++ /* Subquery id: Query GMC firmware version */ ++ #define AMDGPU_INFO_FW_GMC 0x03 ++ /* Subquery id: Query GFX ME firmware version */ ++ #define AMDGPU_INFO_FW_GFX_ME 0x04 ++ /* Subquery id: Query GFX PFP firmware version */ ++ #define AMDGPU_INFO_FW_GFX_PFP 0x05 ++ /* Subquery id: Query GFX CE firmware version */ ++ #define AMDGPU_INFO_FW_GFX_CE 0x06 ++ /* Subquery id: Query GFX RLC firmware version */ ++ #define AMDGPU_INFO_FW_GFX_RLC 0x07 ++ /* Subquery id: Query GFX MEC firmware version */ ++ #define AMDGPU_INFO_FW_GFX_MEC 0x08 ++ /* Subquery id: Query SMC firmware version */ ++ #define AMDGPU_INFO_FW_SMC 0x0a ++ /* Subquery id: Query SDMA firmware version */ ++ #define AMDGPU_INFO_FW_SDMA 0x0b ++/* number of bytes moved for TTM migration */ ++#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f ++/* the used VRAM size */ ++#define AMDGPU_INFO_VRAM_USAGE 0x10 ++/* the used GTT size */ ++#define AMDGPU_INFO_GTT_USAGE 0x11 ++/* Information about GDS, etc. resource configuration */ ++#define AMDGPU_INFO_GDS_CONFIG 0x13 ++/* Query information about VRAM and GTT domains */ ++#define AMDGPU_INFO_VRAM_GTT 0x14 ++/* Query information about register in MMR address space*/ ++#define AMDGPU_INFO_READ_MMR_REG 0x15 ++/* Query information about device: rev id, family, etc. */ ++#define AMDGPU_INFO_DEV_INFO 0x16 ++/* visible vram usage */ ++#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 ++ ++#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 ++#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff ++#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 ++#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff ++ ++/* Input structure for the INFO ioctl */ ++struct drm_amdgpu_info { ++ /* Where the return value will be stored */ ++ uint64_t return_pointer; ++ /* The size of the return value. Just like "size" in "snprintf", ++ * it limits how many bytes the kernel can write. */ ++ uint32_t return_size; ++ /* The query request id. */ ++ uint32_t query; ++ ++ union { ++ struct { ++ uint32_t id; ++ } mode_crtc; ++ ++ struct { ++ /** AMDGPU_HW_IP_* */ ++ uint32_t type; ++ /** ++ * Index of the IP if there are more IPs of the same type. ++ * Ignored by AMDGPU_INFO_HW_IP_COUNT. ++ */ ++ uint32_t ip_instance; ++ } query_hw_ip; ++ ++ struct { ++ uint32_t dword_offset; ++ uint32_t count; /* number of registers to read */ ++ uint32_t instance; ++ uint32_t flags; ++ } read_mmr_reg; ++ ++ struct { ++ /** AMDGPU_INFO_FW_* */ ++ uint32_t fw_type; ++ /** Index of the IP if there are more IPs of the same type. */ ++ uint32_t ip_instance; ++ /** ++ * Index of the engine. Whether this is used depends ++ * on the firmware type. (e.g. MEC, SDMA) ++ */ ++ uint32_t index; ++ } query_fw; ++ }; ++}; ++ ++struct drm_amdgpu_info_gds { ++ /** GDS GFX partition size */ ++ uint32_t gds_gfx_partition_size; ++ /** GDS compute partition size */ ++ uint32_t compute_partition_size; ++ /** total GDS memory size */ ++ uint32_t gds_total_size; ++ /** GWS size per GFX partition */ ++ uint32_t gws_per_gfx_partition; ++ /** GSW size per compute partition */ ++ uint32_t gws_per_compute_partition; ++ /** OA size per GFX partition */ ++ uint32_t oa_per_gfx_partition; ++ /** OA size per compute partition */ ++ uint32_t oa_per_compute_partition; ++}; ++ ++struct drm_amdgpu_info_vram_gtt { ++ uint64_t vram_size; ++ uint64_t vram_cpu_accessible_size; ++ uint64_t gtt_size; ++}; ++ ++struct drm_amdgpu_info_firmware { ++ uint32_t ver; ++ uint32_t feature; ++}; ++ ++struct drm_amdgpu_info_device { ++ /** PCI Device ID */ ++ uint32_t device_id; ++ /** Internal chip revision: A0, A1, etc.) */ ++ uint32_t chip_rev; ++ uint32_t external_rev; ++ /** Revision id in PCI Config space */ ++ uint32_t pci_rev; ++ uint32_t family; ++ uint32_t num_shader_engines; ++ uint32_t num_shader_arrays_per_engine; ++ uint32_t gpu_counter_freq; /* in KHz */ ++ uint64_t max_engine_clock; /* in KHz */ ++ /* cu information */ ++ uint32_t cu_active_number; ++ uint32_t cu_ao_mask; ++ uint32_t cu_bitmap[4][4]; ++ /** Render backend pipe mask. One render backend is CB+DB. */ ++ uint32_t enabled_rb_pipes_mask; ++ uint32_t num_rb_pipes; ++ uint32_t num_hw_gfx_contexts; ++ uint32_t _pad; ++ uint64_t ids_flags; ++ /** Starting virtual address for UMDs. */ ++ uint64_t virtual_address_offset; ++ /** Required alignment of virtual addresses. */ ++ uint32_t virtual_address_alignment; ++ /** Page table entry - fragment size */ ++ uint32_t pte_fragment_size; ++ uint32_t gart_page_size; ++}; ++ ++struct drm_amdgpu_info_hw_ip { ++ /** Version of h/w IP */ ++ uint32_t hw_ip_version_major; ++ uint32_t hw_ip_version_minor; ++ /** Capabilities */ ++ uint64_t capabilities_flags; ++ /** Bitmask of available rings. Bit 0 means ring 0, etc. */ ++ uint32_t available_rings; ++}; ++ ++/* Those correspond to the tile index to use, this is to explicitly state ++ * the API that is implicitly defined by the tile mode array. ++ */ ++#define SI_TILE_MODE_COLOR_LINEAR_ALIGNED 8 ++#define SI_TILE_MODE_COLOR_1D 13 ++#define SI_TILE_MODE_COLOR_1D_SCANOUT 9 ++#define SI_TILE_MODE_COLOR_2D_8BPP 14 ++#define SI_TILE_MODE_COLOR_2D_16BPP 15 ++#define SI_TILE_MODE_COLOR_2D_32BPP 16 ++#define SI_TILE_MODE_COLOR_2D_64BPP 17 ++#define SI_TILE_MODE_COLOR_2D_SCANOUT_16BPP 11 ++#define SI_TILE_MODE_COLOR_2D_SCANOUT_32BPP 12 ++#define SI_TILE_MODE_DEPTH_STENCIL_1D 4 ++#define SI_TILE_MODE_DEPTH_STENCIL_2D 0 ++#define SI_TILE_MODE_DEPTH_STENCIL_2D_2AA 3 ++#define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA 3 ++#define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2 ++ ++#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 ++ ++/* ++ * Supported GPU families ++ */ ++#define AMDGPU_FAMILY_UNKNOWN 0 ++#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ ++#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ ++#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ ++#define AMDGPU_FAMILY_CZ 135 /* Carrizo */ ++ ++#endif +-- +1.9.1 + |