aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch1587
1 files changed, 1587 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch b/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch
new file mode 100644
index 00000000..1d0a08ec
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch
@@ -0,0 +1,1587 @@
+From 3d6d0e4175d9d78c4bd2e338accedfe06275e898 Mon Sep 17 00:00:00 2001
+From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+Date: Fri, 9 Feb 2024 18:25:37 +0530
+Subject: [PATCH 1/5] ae4dma: Initial ae4dma controller driver with
+ multi-channel
+
+Add support for AMD AE4DMA controller. It performs high-bandwidth
+memory to memory and IO copy operation. Device commands are managed
+via a circular queue of 'descriptors', each of which specifies source
+and destination addresses for copying a single buffer of data.
+
+Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+---
+ drivers/dma/Kconfig | 2 +
+ drivers/dma/Makefile | 1 +
+ drivers/dma/ae4dma/Kconfig | 13 +
+ drivers/dma/ae4dma/Makefile | 10 +
+ drivers/dma/ae4dma/ae4dma-dev.c | 387 ++++++++++++++++++++++++
+ drivers/dma/ae4dma/ae4dma-dmaengine.c | 417 ++++++++++++++++++++++++++
+ drivers/dma/ae4dma/ae4dma-pci.c | 251 ++++++++++++++++
+ drivers/dma/ae4dma/ae4dma.h | 416 +++++++++++++++++++++++++
+ 8 files changed, 1497 insertions(+)
+ create mode 100644 drivers/dma/ae4dma/Kconfig
+ create mode 100644 drivers/dma/ae4dma/Makefile
+ create mode 100644 drivers/dma/ae4dma/ae4dma-dev.c
+ create mode 100644 drivers/dma/ae4dma/ae4dma-dmaengine.c
+ create mode 100644 drivers/dma/ae4dma/ae4dma-pci.c
+ create mode 100644 drivers/dma/ae4dma/ae4dma.h
+
+diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
+index e928f2ca0f1e..34344fc2454c 100644
+--- a/drivers/dma/Kconfig
++++ b/drivers/dma/Kconfig
+@@ -772,6 +772,8 @@ source "drivers/dma/fsl-dpaa2-qdma/Kconfig"
+
+ source "drivers/dma/lgm/Kconfig"
+
++source "drivers/dma/ae4dma/Kconfig"
++
+ # clients
+ comment "DMA Clients"
+ depends on DMA_ENGINE
+diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
+index dfd40d14e408..9df21ec28966 100644
+--- a/drivers/dma/Makefile
++++ b/drivers/dma/Makefile
+@@ -83,6 +83,7 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
+ obj-$(CONFIG_ST_FDMA) += st_fdma.o
+ obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
+ obj-$(CONFIG_INTEL_LDMA) += lgm/
++obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
+
+ obj-y += mediatek/
+ obj-y += qcom/
+diff --git a/drivers/dma/ae4dma/Kconfig b/drivers/dma/ae4dma/Kconfig
+new file mode 100644
+index 000000000000..50a69f1b984d
+--- /dev/null
++++ b/drivers/dma/ae4dma/Kconfig
+@@ -0,0 +1,13 @@
++# SPDX-License-Identifier: GPL-2.0-only
++config AMD_AE4DMA
++ tristate "AMD AE4DMA Engine"
++ depends on X86_64 && PCI
++ select DMA_ENGINE
++ select DMA_VIRTUAL_CHANNELS
++ help
++ Enable support for the AMD AE4DMA controller. This controller
++ provides DMA capabilities to perform high bandwidth memory to
++ memory and IO copy operations. It performs DMA transfer through
++ queue-based descriptor management. This DMA controller is intended
++ to be used with AMD Non-Transparent Bridge devices and not for
++ general purpose peripheral DMA.
+diff --git a/drivers/dma/ae4dma/Makefile b/drivers/dma/ae4dma/Makefile
+new file mode 100644
+index 000000000000..b1e431842d18
+--- /dev/null
++++ b/drivers/dma/ae4dma/Makefile
+@@ -0,0 +1,10 @@
++# SPDX-License-Identifier: GPL-2.0-only
++#
++# AMD AE4DMA driver
++#
++
++obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
++
++ae4dma-objs := ae4dma-dev.o ae4dma-dmaengine.o
++
++ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
+diff --git a/drivers/dma/ae4dma/ae4dma-dev.c b/drivers/dma/ae4dma/ae4dma-dev.c
+new file mode 100644
+index 000000000000..9163327a8fc4
+--- /dev/null
++++ b/drivers/dma/ae4dma/ae4dma-dev.c
+@@ -0,0 +1,387 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++/*
++ * AMD AE4DMA device driver
++ * -- Based on the PTDMA driver
++ *
++ * Copyright (C) 2024 Advanced Micro Devices, Inc.
++ *
++ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
++
++ */
++
++#include <linux/bitfield.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/delay.h>
++#include <linux/time.h>
++
++#include "ae4dma.h"
++#include "../dmaengine.h"
++#include "../virt-dma.h"
++
++static unsigned int max_hw_q = 2;
++module_param(max_hw_q, uint, 0444);
++MODULE_PARM_DESC(max_hw_q, "Max hw queues supported by engine (any non-zero value, default: 1)");
++
++static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
++{
++ return container_of(dma_chan, struct pt_dma_chan, vc.chan);
++}
++/* Human-readable error strings */
++static char *pt_error_codes[] = {
++ "",
++ "ERR 01: INVALID HEADER DW0",
++ "ERR 02: INVALID STATUS",
++ "ERR 03: INVALID LENGHT - 4 BYTE ALIGNMENT",
++ "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
++ "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
++ "ERR 06: INVALID ALIGNMENT",
++ "ERR 07: INVALID DESCRIPTOR",
++};
++
++static void pt_log_error(struct pt_device *d, int e)
++{
++ if (e<=7)
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", pt_error_codes[e], e);
++ if((e>7) && (e<=15))
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
++ else if((e>15) && (e<=31))
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
++ else if((e>31) && (e<=63))
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
++ else if((e>63) && (e<=127))
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
++ else if((e>127) && (e<=255))
++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
++
++}
++
++void pt_start_queue(struct pt_cmd_queue *cmd_q)
++{
++ u32 status = readl(cmd_q->reg_control);
++ status |= (cmd_q->qcontrol | CMD_Q_RUN);
++ /* Turn on the run bit */
++ writel(status, cmd_q->reg_control);
++}
++
++void pt_stop_queue(struct pt_cmd_queue *cmd_q)
++{
++ /* Turn off the run bit */
++ writel(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control);
++}
++
++static u16 pt_check_status_error(struct pt_cmd_queue *cmd_q, int idx)
++{
++ struct pt_device *pt = cmd_q->pt;
++ struct device *dev = pt->dev;
++ struct ptdma_desc desc;
++ u8 status;
++
++ do {
++ dma_sync_single_for_device(dev, ((cmd_q->qbase_dma) + (idx * sizeof(struct ptdma_desc))), sizeof(struct ptdma_desc), DMA_FROM_DEVICE);
++ memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ptdma_desc));
++ dma_sync_single_for_device(dev, ((cmd_q->qbase_dma) + (idx * sizeof(struct ptdma_desc))), sizeof(struct ptdma_desc), DMA_FROM_DEVICE);
++ mb();
++
++ status = desc.dw1.status;
++ if (status) {
++ if (status != 0x3) {
++ /* On error, only save the first error value */
++ cmd_q->cmd_error = desc.dw1.err_code;
++ if (cmd_q->cmd_error) {
++ /*
++ * Log the error and flush the queue by
++ * moving the head pointer
++ */
++ pt_log_error(cmd_q->pt, cmd_q->cmd_error);
++ }
++ }
++ }
++ } while (status == 0);
++
++ return desc.dwouv.dws.timestamp;
++}
++
++static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q)
++{
++ struct pt_device *pt = cmd_q->pt;
++ struct device *dev = pt->dev;
++ unsigned long flags;
++
++ bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
++
++ if (soc) {
++ desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0);
++ desc->dwouv.dw0 &= ~DWORD0_SOC;
++ }
++
++ mutex_lock(&cmd_q->q_mutex);
++ spin_lock_irqsave(&cmd_q->cmd_lock, flags);
++
++ desc->dwouv.dws.timestamp = cmd_q->desc_id_counter++;
++
++ volatile u32 tail_wi = atomic_read(&cmd_q->tail_wi);
++
++ dma_sync_single_for_device(dev, (cmd_q->qbase_dma + (tail_wi * sizeof(struct ptdma_desc))),
++ sizeof(struct ptdma_desc), DMA_TO_DEVICE);
++ memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct ptdma_desc));
++ dma_sync_single_for_device(dev, (cmd_q->qbase_dma + (tail_wi * sizeof(struct ptdma_desc))),
++ sizeof(struct ptdma_desc), DMA_TO_DEVICE);
++ cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN;
++
++ atomic64_inc(&cmd_q->q_cmd_count);
++ tail_wi = (tail_wi + 1) % CMD_Q_LEN;
++ atomic_set(&cmd_q->tail_wi, tail_wi);
++ mb();
++ writel(tail_wi , cmd_q->reg_control + 0x10);
++ mb();
++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags);
++ mutex_unlock(&cmd_q->q_mutex);
++
++ return 0;
++}
++
++int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
++ struct pt_passthru_engine *pt_engine)
++{
++ struct ptdma_desc desc;
++
++ cmd_q->cmd_error = 0;
++ cmd_q->total_pt_ops++;
++ memset(&desc, 0, sizeof(desc));
++ desc.dwouv.dws.byte0 = CMD_DESC_DW0_VAL;
++
++ desc.dw1.status = 0;
++ desc.dw1.err_code = 0;
++ desc.dw1.desc_id = 0;
++
++ desc.length = pt_engine->src_len;
++
++ desc.src_lo = upper_32_bits(pt_engine->src_dma);
++ desc.src_hi = lower_32_bits(pt_engine->src_dma);
++ desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
++ desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
++
++ return pt_core_execute_cmd(&desc, cmd_q);
++}
++
++static irqreturn_t pt_core_irq_handler(int irq, void *data)
++{
++ struct pt_cmd_queue *cmd_q = data;
++ struct pt_device *pt = cmd_q->pt;
++ u32 status = readl(cmd_q->reg_control + 0x4);
++ u8 q_intr_type = (status>>24) & 0xf;
++ unsigned long flags;
++ struct pt_cmd *cmd;
++
++ pt->total_interrupts++;
++
++ if (q_intr_type == 0x4)
++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue desc error", q_intr_type);
++ else if (q_intr_type == 0x2)
++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue stopped", q_intr_type);
++ else if (q_intr_type == 0x1)
++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue empty", q_intr_type);
++ else if (q_intr_type != 0x3)
++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "unknown error", q_intr_type);
++
++ spin_lock_irqsave(&cmd_q->cmd_lock, flags);
++ volatile u32 crdi = readl(cmd_q->reg_control + 0x0C);
++ volatile u32 dridx = atomic_read(&cmd_q->dridx);
++ while (dridx != crdi) {
++
++ if(list_empty(&cmd_q->cmd)) {
++
++ break;
++ }
++ cmd = list_first_entry(&cmd_q->cmd, struct pt_cmd, entry);
++ list_del(&cmd->entry);
++ pt_check_status_error(cmd_q, dridx);
++ cmd->pt_cmd_callback(cmd->data, cmd->ret);
++ atomic64_dec(&cmd_q->q_cmd_count);
++ dridx = (dridx + 1) % CMD_Q_LEN;
++ atomic_set(&cmd_q->dridx, dridx);
++ mb();
++ }
++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags);
++
++ status = readl(cmd_q->reg_control + 0x14);
++ if (status & 1) {
++ status = status & ~1;
++ writel(status , cmd_q->reg_control + 0x14);
++ }
++
++ return IRQ_HANDLED;
++}
++
++int pt_core_init(struct pt_device *pt)
++{
++ char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
++ struct pt_cmd_queue *cmd_q;
++ u32 dma_addr_lo, dma_addr_hi;
++ struct device *dev = pt->dev;
++ struct dma_pool *dma_pool;
++ unsigned int i;
++ int ret;
++ u32 q_per_eng = max_hw_q;
++
++ /* Update the device registers with queue information. */
++ writel(q_per_eng, pt->io_regs);
++
++ q_per_eng = readl(pt->io_regs);
++
++ for (i = 0; i < q_per_eng; i++) {
++
++ /* Allocate a dma pool for the queue */
++ snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", dev_name(pt->dev), i);
++
++ dma_pool = dma_pool_create(dma_pool_name, dev,
++ PT_DMAPOOL_MAX_SIZE,
++ PT_DMAPOOL_ALIGN, 0);
++ if (!dma_pool)
++ return -ENOMEM;
++
++ /* ae4dma core initialisation */
++ cmd_q = &pt->cmd_q[i];
++ cmd_q->id = pt->cmd_q_count;
++ pt->cmd_q_count++;
++
++ cmd_q->pt = pt;
++ cmd_q->dma_pool = dma_pool;
++ mutex_init(&cmd_q->q_mutex);
++ spin_lock_init(&cmd_q->q_lock);
++
++ /* Preset some register values (Q size is 32byte (0x20)) */
++ cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20);
++
++ /* Page alignment satisfies our needs for N <= 128 */
++ cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
++
++ cmd_q->qbase = kmalloc(cmd_q->qsize, GFP_KERNEL);
++ cmd_q->qbase_dma = dma_map_single(dev, cmd_q->qbase, cmd_q->qsize, DMA_BIDIRECTIONAL);
++
++ if (dma_mapping_error(dev, cmd_q->qbase_dma)) {
++ dev_err(dev, "dma mapping error \n");
++ goto e_destroy_pool;
++ }
++
++ cmd_q->qidx = 0;
++ atomic64_set(&cmd_q->q_cmd_count, 0);
++ atomic_set(&cmd_q->dridx ,0);
++ cmd_q->q_space_available = 0;
++
++ atomic_set(&cmd_q->tail_wi ,readl(cmd_q->reg_control + 0x10));
++
++ init_waitqueue_head(&cmd_q->int_queue);
++ init_waitqueue_head(&cmd_q->q_space);
++
++ dev_dbg(dev, "queue #%u available\n", i);
++ }
++
++ if (pt->cmd_q_count == 0) {
++ dev_notice(dev, "no command queues available\n");
++ ret = -EIO;
++ goto e_free_dma;
++ }
++
++ dev_info(dev, "BB1.0011 AE4DMA\n");
++ for (i = 0; i < pt->cmd_q_count; i++) {
++ cmd_q = &pt->cmd_q[i];
++
++ cmd_q->qcontrol = 0; /* Start with nothing */
++
++ /* Request an irq */
++ ret = request_irq(pt->pt_irq[i], pt_core_irq_handler, 0, dev_name(pt->dev), cmd_q);
++ if (ret) {
++ dev_err(dev, "unable to allocate an IRQ\n");
++ goto e_free_dma;
++ }
++
++ /* Update the device registers with queue information. */
++ writel(CMD_Q_LEN, cmd_q->reg_control + 0x08); // Max Index (cmd queue lenght)
++
++ cmd_q->qdma_tail = cmd_q->qbase_dma;
++
++ dma_addr_lo = lower_32_bits(cmd_q->qdma_tail);
++ writel((u32)dma_addr_lo, cmd_q->reg_control + 0x18);
++
++ dma_addr_lo = readl(cmd_q->reg_control + 0x18);
++
++ dma_addr_hi = upper_32_bits(cmd_q->qdma_tail);
++ writel((u32)dma_addr_hi, cmd_q->reg_control + 0x1C);
++
++ dma_addr_hi = readl(cmd_q->reg_control + 0x1C);
++
++ pt_core_enable_queue_interrupts(pt, cmd_q);
++
++ INIT_LIST_HEAD(&cmd_q->cmd);
++ }
++
++ /* Register the DMA engine support */
++ ret = pt_dmaengine_register(pt);
++ if (ret)
++ goto e_free_irq;
++
++ return 0;
++
++e_free_irq:
++ for (i = 0; i < pt->cmd_q_count; i++)
++ free_irq(pt->pt_irq[i], pt);
++
++e_free_dma:
++ for (i = 0; i < pt->cmd_q_count; i++) {
++ cmd_q = &pt->cmd_q[i];
++ dma_unmap_single(dev, cmd_q->qbase_dma, cmd_q->qsize, DMA_BIDIRECTIONAL);
++ kfree(cmd_q->qbase);
++ }
++
++e_destroy_pool:
++ for (i = 0; i < pt->cmd_q_count; i++)
++ dma_pool_destroy(pt->cmd_q[i].dma_pool);
++
++ kmem_cache_destroy(pt->dma_desc_cache);
++ return ret;
++}
++
++void pt_core_destroy(struct pt_device *pt)
++{
++ struct device *dev = pt->dev;
++ struct pt_cmd_queue *cmd_q;
++ struct pt_cmd *cmd;
++ unsigned int i;
++
++ /* Unregister the DMA engine */
++ pt_dmaengine_unregister(pt);
++
++ for (i = 0; i < pt->cmd_q_count; i++) {
++ cmd_q = &pt->cmd_q[i];
++
++ wake_up_all(&cmd_q->q_space);
++ wake_up_all(&cmd_q->int_queue);
++
++ /* Disable and clear interrupts */
++ pt_core_disable_queue_interrupts(pt, cmd_q);
++
++ /* Turn off the run bit */
++ pt_stop_queue(cmd_q);
++
++ free_irq(pt->pt_irq[i], cmd_q);
++
++ dma_unmap_single(dev, cmd_q->qbase_dma, cmd_q->qsize, DMA_BIDIRECTIONAL);
++ kfree(cmd_q->qbase);
++ }
++
++ /* Flush the cmd queue */
++ while (!list_empty(&pt->cmd)) {
++ /* Invoke the callback directly with an error code */
++ cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry);
++ list_del(&cmd->entry);
++ cmd->pt_cmd_callback(cmd->data, -ENODEV);
++ }
++
++ kmem_cache_destroy(pt->dma_desc_cache);
++}
+diff --git a/drivers/dma/ae4dma/ae4dma-dmaengine.c b/drivers/dma/ae4dma/ae4dma-dmaengine.c
+new file mode 100644
+index 000000000000..03f28eb10ad6
+--- /dev/null
++++ b/drivers/dma/ae4dma/ae4dma-dmaengine.c
+@@ -0,0 +1,417 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * AMD AE4DMA device driver
++ * -- Based on the PTDMA driver
++ *
++ * Copyright (C) 2024 Advanced Micro Devices, Inc.
++ *
++ */
++#include <linux/delay.h>
++#include "ae4dma.h"
++#include "../dmaengine.h"
++#include "../virt-dma.h"
++
++static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
++{
++ return container_of(dma_chan, struct pt_dma_chan, vc.chan);
++}
++
++static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd)
++{
++ return container_of(vd, struct pt_dma_desc, vd);
++}
++
++static void pt_free_chan_resources(struct dma_chan *dma_chan)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++
++ vchan_free_chan_resources(&chan->vc);
++}
++
++static void pt_synchronize(struct dma_chan *dma_chan)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++
++ vchan_synchronize(&chan->vc);
++}
++
++static void pt_do_cleanup(struct virt_dma_desc *vd)
++{
++ struct pt_dma_desc *desc = to_pt_desc(vd);
++ struct pt_device *pt = desc->pt;
++
++ kmem_cache_free(pt->dma_desc_cache, desc);
++}
++
++static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan)
++{
++ struct pt_passthru_engine *pt_engine;
++ struct pt_device *pt;
++ struct pt_cmd *pt_cmd;
++ struct pt_cmd_queue *cmd_q;
++
++ desc->issued_to_hw = 1;
++ list_del(&desc->vd.node);
++
++ pt_cmd = &desc->pt_cmd;
++ pt = pt_cmd->pt;
++ cmd_q = chan->cmd_q;
++ pt_engine = &pt_cmd->passthru;
++
++ pt_cmd->qid = cmd_q->qidx;
++ cmd_q->tdata.cmd = pt_cmd;
++
++ /* Execute the command */
++ pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
++
++ return 0;
++}
++
++static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan)
++{
++ struct virt_dma_desc *vd = vchan_next_desc(&chan->vc);
++
++ return vd ? to_pt_desc(vd) : NULL;
++}
++
++static void pt_cmd_callback_tasklet(void *data, int err)
++{
++ struct pt_dma_desc *desc = data;
++ struct dma_chan *dma_chan;
++ struct pt_dma_chan *chan;
++ struct dma_async_tx_descriptor *tx_desc;
++ struct virt_dma_desc *vd;
++ unsigned long flags;
++
++ dma_chan = desc->vd.tx.chan;
++ chan = to_pt_chan(dma_chan);
++
++ if (err == -EINPROGRESS)
++ return;
++
++ tx_desc = &desc->vd.tx;
++ vd = &desc->vd;
++
++ if (err)
++ desc->status = DMA_ERROR;
++
++
++ spin_lock_irqsave(&chan->vc.lock, flags);
++ if (desc) {
++ if (desc->status != DMA_COMPLETE) {
++ if (desc->status != DMA_ERROR)
++ desc->status = DMA_COMPLETE;
++
++ dma_cookie_complete(tx_desc);
++ dma_descriptor_unmap(tx_desc);
++ } else {
++ /* Don't handle it twice */
++ tx_desc = NULL;
++ }
++ }
++ spin_unlock_irqrestore(&chan->vc.lock, flags);
++
++ if (tx_desc) {
++ dmaengine_desc_get_callback_invoke(tx_desc, NULL);
++ dma_run_dependencies(tx_desc);
++ vchan_vdesc_fini(vd);
++ }
++}
++
++static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
++ struct pt_dma_desc *desc)
++{
++ struct dma_async_tx_descriptor *tx_desc;
++ struct virt_dma_desc *vd;
++ unsigned long flags;
++
++ /* Loop over descriptors until one is found with commands */
++ do {
++ if (desc) {
++ if (!desc->issued_to_hw) {
++ /* No errors, keep going */
++ if (desc->status != DMA_ERROR)
++ return desc;
++ }
++ tx_desc = &desc->vd.tx;
++ vd = &desc->vd;
++ } else {
++ tx_desc = NULL;
++ }
++ spin_lock_irqsave(&chan->vc.lock, flags);
++ desc = pt_next_dma_desc(chan);
++ spin_unlock_irqrestore(&chan->vc.lock, flags);
++ } while (desc);
++
++ return NULL;
++}
++
++static void pt_cmd_callback(void *data, int err)
++{
++ struct pt_dma_desc *desc = data;
++ struct dma_chan *dma_chan;
++ struct pt_dma_chan *chan;
++ struct pt_device *pt;
++ int ret;
++
++ if (err == -EINPROGRESS)
++ return;
++
++ dma_chan = desc->vd.tx.chan;
++ chan = to_pt_chan(dma_chan);
++ pt = chan->pt;
++
++ if (err)
++ desc->status = DMA_ERROR;
++
++ while (true) {
++ /* if queue is full dont submit to queue */
++ if((atomic64_read(&chan->cmd_q->q_cmd_count) >= (CMD_Q_LEN - 1)) || pt_core_queue_full(pt, chan->cmd_q)) {
++ cpu_relax();
++ continue;
++ }
++
++ /* Check for DMA descriptor completion */
++ desc = pt_handle_active_desc(chan, desc);
++
++ /* Don't submit cmd if no descriptor or DMA is paused */
++ if (!desc)
++ break;
++
++ ret = pt_dma_start_desc(desc, chan);
++ if (!ret)
++ break;
++
++ desc->status = DMA_ERROR;
++ }
++}
++
++static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
++ unsigned long flags)
++{
++ struct pt_dma_desc *desc;
++ struct pt_cmd_queue *cmd_q = chan->cmd_q;
++
++ desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT);
++ if (!desc)
++ return NULL;
++
++ vchan_tx_prep(&chan->vc, &desc->vd, flags);
++
++ desc->pt = chan->pt;
++ cmd_q->int_en = !!(flags & DMA_PREP_INTERRUPT);
++ desc->issued_to_hw = 0;
++ desc->status = DMA_IN_PROGRESS;
++
++ return desc;
++}
++
++static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
++ dma_addr_t dst,
++ dma_addr_t src,
++ unsigned int len,
++ unsigned long flags)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++ struct pt_cmd_queue *cmd_q = chan->cmd_q;
++ struct pt_passthru_engine *pt_engine;
++ struct pt_dma_desc *desc;
++ struct pt_cmd *pt_cmd;
++
++ desc = pt_alloc_dma_desc(chan, flags);
++ if (!desc)
++ return NULL;
++
++ pt_cmd = &desc->pt_cmd;
++ pt_cmd->pt = chan->pt;
++ pt_engine = &pt_cmd->passthru;
++ pt_cmd->engine = PT_ENGINE_PASSTHRU;
++ pt_engine->src_dma = src;
++ pt_engine->dst_dma = dst;
++ pt_engine->src_len = len;
++ pt_cmd->pt_cmd_callback = pt_cmd_callback_tasklet;
++ pt_cmd->data = desc;
++
++ desc->len = len;
++
++ spin_lock_irqsave(&cmd_q->cmd_lock, flags);
++ list_add_tail(&pt_cmd->entry, &cmd_q->cmd);
++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags);
++
++ return desc;
++}
++
++static struct dma_async_tx_descriptor *
++pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst,
++ dma_addr_t src, size_t len, unsigned long flags)
++{
++ struct pt_dma_desc *desc;
++
++ desc = pt_create_desc(dma_chan, dst, src, len, flags);
++ if (!desc)
++ return NULL;
++
++ return &desc->vd.tx;
++}
++
++static struct dma_async_tx_descriptor *
++pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++ struct pt_dma_desc *desc;
++
++ desc = pt_alloc_dma_desc(chan, flags);
++ if (!desc)
++ return NULL;
++
++ return &desc->vd.tx;
++}
++
++static void pt_issue_pending(struct dma_chan *dma_chan)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++ struct pt_dma_desc *desc;
++ unsigned long flags;
++
++ spin_lock_irqsave(&chan->vc.lock, flags);
++ vchan_issue_pending(&chan->vc);
++ desc = pt_next_dma_desc(chan);
++ spin_unlock_irqrestore(&chan->vc.lock, flags);
++
++ pt_cmd_callback(desc, 0);
++}
++
++static int pt_pause(struct dma_chan *dma_chan)
++{
++ return 0;
++}
++
++static int pt_resume(struct dma_chan *dma_chan)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++ struct pt_dma_desc *desc = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&chan->vc.lock, flags);
++ pt_start_queue(chan->cmd_q);
++ desc = pt_next_dma_desc(chan);
++ spin_unlock_irqrestore(&chan->vc.lock, flags);
++
++ /* If there was something active, re-start */
++ if (desc)
++ pt_cmd_callback(desc, 0);
++
++ return 0;
++}
++
++static int pt_terminate_all(struct dma_chan *dma_chan)
++{
++ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
++ unsigned long flags;
++ LIST_HEAD(head);
++
++ spin_lock_irqsave(&chan->vc.lock, flags);
++ vchan_get_all_descriptors(&chan->vc, &head);
++ spin_unlock_irqrestore(&chan->vc.lock, flags);
++
++ vchan_dma_desc_free_list(&chan->vc, &head);
++ vchan_free_chan_resources(&chan->vc);
++
++ return 0;
++}
++
++int pt_dmaengine_register(struct pt_device *pt)
++{
++ struct pt_dma_chan *chan;
++ struct pt_cmd_queue *cmd_q;
++ struct dma_device *dma_dev = &pt->dma_dev;
++ char *cmd_cache_name;
++ char *desc_cache_name;
++ unsigned int i;
++ int ret;
++
++ pt->pt_dma_chan = devm_kcalloc(pt->dev, pt->cmd_q_count, sizeof(*pt->pt_dma_chan),
++ GFP_KERNEL);
++ if (!pt->pt_dma_chan)
++ return -ENOMEM;
++
++ cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
++ "%s-dmaengine-cmd-cache",
++ dev_name(pt->dev));
++ if (!cmd_cache_name)
++ return -ENOMEM;
++
++ desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
++ "%s-dmaengine-desc-cache",
++ dev_name(pt->dev));
++ if (!desc_cache_name) {
++ ret = -ENOMEM;
++ goto err_cache;
++ }
++
++ pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
++ sizeof(struct pt_dma_desc), 0,
++ SLAB_HWCACHE_ALIGN, NULL);
++ if (!pt->dma_desc_cache) {
++ ret = -ENOMEM;
++ goto err_cache;
++ }
++
++ dma_dev->dev = pt->dev;
++ dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
++ dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
++ dma_dev->directions = DMA_MEM_TO_MEM;
++ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
++ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
++ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
++
++ /*
++ * PTDMA is intended to be used with the AMD NTB devices, hence
++ * marking it as DMA_PRIVATE.
++ */
++ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
++
++ /* Set base and prep routines */
++ dma_dev->device_free_chan_resources = pt_free_chan_resources;
++ dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
++ dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt;
++ dma_dev->device_issue_pending = pt_issue_pending;
++ dma_dev->device_tx_status = dma_cookie_status;
++ dma_dev->device_pause = pt_pause;
++ dma_dev->device_resume = pt_resume;
++ dma_dev->device_terminate_all = pt_terminate_all;
++ dma_dev->device_synchronize = pt_synchronize;
++
++ INIT_LIST_HEAD(&dma_dev->channels);
++ for (i = 0; i < pt->cmd_q_count; i++) {
++ chan = pt->pt_dma_chan + i;
++ cmd_q = &pt->cmd_q[i];
++ chan->cmd_q = cmd_q;
++ chan->id = cmd_q->id;
++ chan->pt = pt;
++ chan->vc.desc_free = pt_do_cleanup;
++ vchan_init(&chan->vc, dma_dev);
++ }
++
++ ret = dma_async_device_register(dma_dev);
++ if (ret)
++ goto err_reg;
++
++ return 0;
++
++err_reg:
++ kmem_cache_destroy(pt->dma_desc_cache);
++
++err_cache:
++ kmem_cache_destroy(pt->dma_cmd_cache);
++
++ return ret;
++}
++
++void pt_dmaengine_unregister(struct pt_device *pt)
++{
++ struct dma_device *dma_dev = &pt->dma_dev;
++
++ dma_async_device_unregister(dma_dev);
++ kmem_cache_destroy(pt->dma_cmd_cache);
++}
+diff --git a/drivers/dma/ae4dma/ae4dma-pci.c b/drivers/dma/ae4dma/ae4dma-pci.c
+new file mode 100644
+index 000000000000..bd1170d05081
+--- /dev/null
++++ b/drivers/dma/ae4dma/ae4dma-pci.c
+@@ -0,0 +1,251 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * AMD AE4DMA device driver
++ * -- Based on the PTDMA driver
++ *
++ * Copyright (C) 2024 Advanced Micro Devices, Inc.
++ *
++ */
++
++#include <linux/device.h>
++#include <linux/dma-mapping.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/kernel.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/pci_ids.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++
++#include "ae4dma.h"
++
++static char test_device[32];
++module_param_string(device, test_device, sizeof(test_device), 0644);
++MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
++
++struct pt_msix {
++ int msix_count;
++ struct msix_entry msix_entry[MAX_HW_QUEUES];
++};
++
++/*
++ * pt_alloc_struct - allocate and initialize the pt_device struct
++ *
++ * @dev: device struct of the PTDMA
++ */
++static struct pt_device *pt_alloc_struct(struct device *dev)
++{
++ struct pt_device *pt;
++
++ pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL);
++
++ if (!pt)
++ return NULL;
++ pt->dev = dev;
++
++ INIT_LIST_HEAD(&pt->cmd);
++
++ return pt;
++}
++
++static int pt_get_msix_irqs(struct pt_device *pt)
++{
++ struct pt_msix *pt_msix = pt->pt_msix;
++ struct device *dev = pt->dev;
++ struct pci_dev *pdev = to_pci_dev(dev);
++ int v, i, ret;
++
++ for (v = 0; v < ARRAY_SIZE(pt_msix->msix_entry); v++)
++ pt_msix->msix_entry[v].entry = v;
++
++ ret = pci_enable_msix_range(pdev, pt_msix->msix_entry, 1, v);
++ if (ret < 0)
++ return ret;
++
++ pt_msix->msix_count = ret;
++
++ for(i=0; i<MAX_HW_QUEUES; i++ )
++ pt->pt_irq[i] = pt_msix->msix_entry[i].vector;
++
++ return 0;
++}
++
++static int pt_get_msi_irq(struct pt_device *pt)
++{
++ struct device *dev = pt->dev;
++ struct pci_dev *pdev = to_pci_dev(dev);
++ int ret, i;
++
++ ret = pci_enable_msi(pdev);
++ if (ret)
++ return ret;
++
++ for(i=0; i<MAX_HW_QUEUES; i++ )
++ pt->pt_irq[i] = pdev->irq;
++
++ return 0;
++}
++
++static int pt_get_irqs(struct pt_device *pt)
++{
++ struct device *dev = pt->dev;
++ int ret;
++
++ ret = pt_get_msix_irqs(pt);
++ if (!ret)
++ return 0;
++
++ /* Couldn't get MSI-X vectors, try MSI */
++ dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
++ ret = pt_get_msi_irq(pt);
++ if (!ret)
++ return 0;
++
++ /* Couldn't get MSI interrupt */
++ dev_err(dev, "could not enable MSI (%d)\n", ret);
++
++ return ret;
++}
++
++static void pt_free_irqs(struct pt_device *pt)
++{
++ struct pt_msix *pt_msix = pt->pt_msix;
++ struct device *dev = pt->dev;
++ struct pci_dev *pdev = to_pci_dev(dev);
++ unsigned int i;
++
++ if (pt_msix->msix_count)
++ pci_disable_msix(pdev);
++ else if (pt->pt_irq)
++ pci_disable_msi(pdev);
++
++ for(i=0; i<MAX_HW_QUEUES; i++ )
++ pt->pt_irq[i] = 0;
++}
++
++static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
++{
++ struct pt_device *pt;
++ struct pt_msix *pt_msix;
++ struct device *dev = &pdev->dev;
++ void __iomem * const *iomap_table;
++ int bar_mask;
++ int ret = -ENOMEM;
++
++ pt = pt_alloc_struct(dev);
++ if (!pt)
++ goto e_err;
++
++ pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL);
++ if (!pt_msix)
++ goto e_err;
++
++ pt->pt_msix = pt_msix;
++ pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data;
++ if (!pt->dev_vdata) {
++ ret = -ENODEV;
++ dev_err(dev, "missing driver data\n");
++ goto e_err;
++ }
++
++ ret = pcim_enable_device(pdev);
++ if (ret) {
++ dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
++ goto e_err;
++ }
++
++ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
++ ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
++ if (ret) {
++ dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
++ goto e_err;
++ }
++
++ iomap_table = pcim_iomap_table(pdev);
++ if (!iomap_table) {
++ dev_err(dev, "pcim_iomap_table failed\n");
++ ret = -ENOMEM;
++ goto e_err;
++ }
++
++ pt->io_regs = iomap_table[pt->dev_vdata->bar];
++ if (!pt->io_regs) {
++ dev_err(dev, "ioremap failed\n");
++ ret = -ENOMEM;
++ goto e_err;
++ }
++
++ ret = pt_get_irqs(pt);
++ if (ret)
++ goto e_err;
++
++ pci_set_master(pdev);
++
++ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
++ if (ret) {
++ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
++ if (ret) {
++ dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
++ ret);
++ goto e_err;
++ }
++ }
++
++ dev_set_drvdata(dev, pt);
++
++ if (pt->dev_vdata)
++ ret = pt_core_init(pt);
++
++ if (ret)
++ goto e_err;
++
++ return 0;
++
++e_err:
++ dev_err(dev, "initialization failed ret = %d\n", ret);
++
++ return ret;
++}
++
++static void pt_pci_remove(struct pci_dev *pdev)
++{
++ struct device *dev = &pdev->dev;
++ struct pt_device *pt = dev_get_drvdata(dev);
++
++ if (!pt)
++ return;
++
++ if (pt->dev_vdata)
++ pt_core_destroy(pt);
++
++ pt_free_irqs(pt);
++}
++
++static const struct pt_dev_vdata dev_vdata[] = {
++ {
++ .bar = 0,
++ },
++};
++
++static const struct pci_device_id pt_pci_table[] = {
++ { PCI_VDEVICE(AMD, 0x14C8), (kernel_ulong_t)&dev_vdata[0] },
++ { PCI_VDEVICE(AMD, 0x14DC), (kernel_ulong_t)&dev_vdata[0] },
++ { PCI_VDEVICE(AMD, 0x149B), (kernel_ulong_t)&dev_vdata[0] },
++ /* Last entry must be zero */
++ { 0, }
++};
++MODULE_DEVICE_TABLE(pci, pt_pci_table);
++
++static struct pci_driver pt_pci_driver = {
++ .name = "ae4dma",
++ .id_table = pt_pci_table,
++ .probe = pt_pci_probe,
++ .remove = pt_pci_remove,
++};
++
++module_pci_driver(pt_pci_driver);
++
++MODULE_AUTHOR("amd");
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("AMD AE4DMA driver");
+diff --git a/drivers/dma/ae4dma/ae4dma.h b/drivers/dma/ae4dma/ae4dma.h
+new file mode 100644
+index 000000000000..30ce1c1ee29c
+--- /dev/null
++++ b/drivers/dma/ae4dma/ae4dma.h
+@@ -0,0 +1,416 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * AMD AE4DMA device driver
++ *
++ * Copyright (C) 2024 Advanced Micro Devices, Inc.
++ *
++ */
++
++#ifndef __PT_DEV_H__
++#define __PT_DEV_H__
++
++#include <linux/device.h>
++#include <linux/dmaengine.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/mutex.h>
++#include <linux/list.h>
++#include <linux/wait.h>
++#include <linux/dmapool.h>
++
++#include "../virt-dma.h"
++
++#define MAX_PT_NAME_LEN 16
++#define MAX_DMAPOOL_NAME_LEN 32
++
++#define MAX_HW_QUEUES 16
++#define MAX_CMD_QLEN 32
++
++#define PT_ENGINE_PASSTHRU 5
++
++/* Register Mappings */
++#define IRQ_MASK_REG 0x040
++#define IRQ_STATUS_REG 0x200
++
++#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
++
++#define CMD_QUEUE_PRIO_OFFSET 0x00
++#define CMD_REQID_CONFIG_OFFSET 0x04
++#define CMD_TIMEOUT_OFFSET 0x08
++#define CMD_PT_VERSION 0x10
++
++#define CMD_Q_CONTROL_BASE 0x0000
++#define CMD_Q_TAIL_LO_BASE 0x0004
++#define CMD_Q_HEAD_LO_BASE 0x0008
++#define CMD_Q_INT_ENABLE_BASE 0x000C
++#define CMD_Q_INTERRUPT_STATUS_BASE 0x0010
++
++#define CMD_Q_STATUS_BASE 0x0100
++#define CMD_Q_INT_STATUS_BASE 0x0104
++#define CMD_Q_DMA_STATUS_BASE 0x0108
++#define CMD_Q_DMA_READ_STATUS_BASE 0x010C
++#define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110
++#define CMD_Q_ABORT_BASE 0x0114
++#define CMD_Q_AX_CACHE_BASE 0x0118
++
++#define CMD_CONFIG_OFFSET 0x1120
++#define CMD_CLK_GATE_CTL_OFFSET 0x6004
++
++#define CMD_DESC_DW0_VAL 0x000002
++
++/* Address offset for virtual queue registers */
++#define CMD_Q_STATUS_INCR 0x1000
++
++/* Bit masks */
++#define CMD_CONFIG_REQID 0
++#define CMD_TIMEOUT_DISABLE 0
++#define CMD_CLK_DYN_GATING_DIS 0
++#define CMD_CLK_SW_GATE_MODE 0
++#define CMD_CLK_GATE_CTL 0
++#define CMD_QUEUE_PRIO GENMASK(2, 1)
++#define CMD_CONFIG_VHB_EN BIT(0)
++#define CMD_CLK_DYN_GATING_EN BIT(0)
++#define CMD_CLK_HW_GATE_MODE BIT(0)
++#define CMD_CLK_GATE_ON_DELAY BIT(12)
++#define CMD_CLK_GATE_OFF_DELAY BIT(12)
++
++#define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \
++ CMD_CLK_HW_GATE_MODE | \
++ CMD_CLK_GATE_ON_DELAY | \
++ CMD_CLK_DYN_GATING_EN | \
++ CMD_CLK_GATE_OFF_DELAY)
++
++#define CMD_Q_LEN 32
++#define CMD_Q_RUN BIT(0)
++#define CMD_Q_HALT BIT(1)
++#define CMD_Q_MEM_LOCATION BIT(2)
++#define CMD_Q_SIZE_MASK GENMASK(4, 0)
++#define CMD_Q_SIZE GENMASK(7, 3)
++#define CMD_Q_SHIFT GENMASK(1, 0)
++#define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \
++ CMD_Q_SIZE_MASK)
++#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1)
++#define Q_DESC_SIZE sizeof(struct ptdma_desc)
++#define Q_SIZE(n) (CMD_Q_LEN * (n))
++
++#define INT_DESC_VALIDATED BIT(1)
++#define INT_DESC_PROCESSED BIT(2)
++#define INT_COMPLETION BIT(3)
++#define INT_ERROR BIT(4)
++
++#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR)
++
++/****** Local Storage Block ******/
++#define LSB_START 0
++#define LSB_END 127
++#define LSB_COUNT (LSB_END - LSB_START + 1)
++
++#define PT_DMAPOOL_MAX_SIZE 64
++#define PT_DMAPOOL_ALIGN BIT(5)
++
++#define PT_PASSTHRU_BLOCKSIZE 512
++
++struct pt_device;
++
++struct pt_tasklet_data {
++ struct completion completion;
++ struct pt_cmd *cmd;
++};
++
++/*
++ * struct pt_passthru_engine - pass-through operation
++ * without performing DMA mapping
++ * @mask: mask to be applied to data
++ * @mask_len: length in bytes of mask
++ * @src_dma: data to be used for this operation
++ * @dst_dma: data produced by this operation
++ * @src_len: length in bytes of data used for this operation
++ *
++ * Variables required to be set when calling pt_enqueue_cmd():
++ * - bit_mod, byte_swap, src, dst, src_len
++ * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP
++ */
++struct pt_passthru_engine {
++ dma_addr_t mask;
++ u32 mask_len; /* In bytes */
++
++ dma_addr_t src_dma, dst_dma;
++ u64 src_len; /* In bytes */
++};
++
++/*
++ * struct pt_cmd - PTDMA operation request
++ * @entry: list element
++ * @work: work element used for callbacks
++ * @pt: PT device to be run on
++ * @ret: operation return code
++ * @flags: cmd processing flags
++ * @engine: PTDMA operation to perform (passthru)
++ * @engine_error: PT engine return code
++ * @passthru: engine specific structures, refer to specific engine struct below
++ * @callback: operation completion callback function
++ * @data: parameter value to be supplied to the callback function
++ *
++ * Variables required to be set when calling pt_enqueue_cmd():
++ * - engine, callback
++ * - See the operation structures below for what is required for each
++ * operation.
++ */
++struct pt_cmd {
++ struct list_head entry;
++ struct work_struct work;
++ struct pt_device *pt;
++ int ret;
++ u32 engine;
++ u32 engine_error;
++ struct pt_passthru_engine passthru;
++ /* Completion callback support */
++ void (*pt_cmd_callback)(void *data, int err);
++ void *data;
++ u8 qid;
++};
++
++struct pt_dma_desc {
++ struct virt_dma_desc vd;
++ struct pt_device *pt;
++ enum dma_status status;
++ size_t len;
++ bool issued_to_hw;
++ struct pt_cmd pt_cmd;
++};
++
++struct pt_dma_chan {
++ struct virt_dma_chan vc;
++ struct pt_device *pt;
++ struct pt_cmd_queue *cmd_q;
++ u32 id;
++};
++
++struct pt_cmd_queue {
++ struct pt_device *pt;
++
++ /* Queue identifier */
++ u32 id;
++
++ /* Queue dma pool */
++ struct dma_pool *dma_pool;
++
++ /* Queue base address (not neccessarily aligned)*/
++ struct ptdma_desc *qbase;
++
++ /* Aligned queue start address (per requirement) */
++ struct mutex q_mutex ____cacheline_aligned;
++ spinlock_t q_lock ____cacheline_aligned;
++ volatile unsigned long qidx;
++ volatile unsigned long ridx;
++
++
++ unsigned int qsize;
++ dma_addr_t qbase_dma;
++ dma_addr_t qdma_tail;
++
++ unsigned int active;
++ unsigned int suspended;
++
++ /* Interrupt flag */
++ bool int_en;
++
++ /* Register addresses for queue */
++ void __iomem *reg_control;
++ u32 qcontrol; /* Cached control register */
++
++ /* Status values from job */
++ u32 int_status;
++ u32 q_status;
++ u32 q_int_status;
++ u32 cmd_error;
++ atomic_t dridx;
++ /* Interrupt wait queue */
++ wait_queue_head_t int_queue;
++ unsigned int int_rcvd;
++
++ wait_queue_head_t q_space;
++ unsigned int q_space_available;
++
++ /* Queue Statistics */
++ unsigned long total_pt_ops;
++ atomic64_t q_cmd_count;
++ atomic_t tail_wi;
++ volatile unsigned long desc_id_counter;
++ struct pt_tasklet_data tdata;
++
++ struct list_head cmd;
++ spinlock_t cmd_lock ____cacheline_aligned;
++ spinlock_t cmd_control ____cacheline_aligned;
++ struct mutex cmd_mutex;
++} ____cacheline_aligned;
++
++struct pt_device {
++ struct list_head entry;
++
++ unsigned int ord;
++ char name[MAX_PT_NAME_LEN];
++
++ struct device *dev;
++
++ /* Bus specific device information */
++ struct pt_msix *pt_msix;
++
++ struct pt_dev_vdata *dev_vdata;
++
++ unsigned int pt_irq[MAX_HW_QUEUES];
++
++ /* I/O area used for device communication */
++ void __iomem *io_regs;
++
++ spinlock_t cmd_lock ____cacheline_aligned;
++ unsigned int cmd_count;
++ struct list_head cmd;
++
++ /*
++ * The command queue. This represent the queue available on the
++ * PTDMA that are available for processing cmds
++ */
++ struct pt_cmd_queue cmd_q[MAX_HW_QUEUES];
++ unsigned int cmd_q_count;
++
++ /* Support for the DMA Engine capabilities */
++ struct dma_device dma_dev;
++ struct pt_dma_chan *pt_dma_chan;
++ struct kmem_cache *dma_cmd_cache;
++ struct kmem_cache *dma_desc_cache;
++
++ wait_queue_head_t lsb_queue;
++
++ /* Device Statistics */
++ volatile unsigned long current_interrupts;
++ volatile unsigned long total_interrupts;
++
++};
++
++/*
++ * descriptor for PTDMA commands
++ * 8 32-bit words:
++ * word 0: function; engine; control bits
++ * word 1: length of source data
++ * word 2: low 32 bits of source pointer
++ * word 3: upper 16 bits of source pointer; source memory type
++ * word 4: low 32 bits of destination pointer
++ * word 5: upper 16 bits of destination pointer; destination memory type
++ * word 6: reserved 32 bits
++ * word 7: reserved 32 bits
++ */
++
++#define DWORD0_SOC BIT(0)
++#define DWORD0_IOC BIT(1)
++#define DWORD0_SOM BIT(3)
++#define DWORD0_EOM BIT(4)
++#define DWORD0_DMT GENMASK(5, 4)
++#define DWORD0_SMT GENMASK(7, 6)
++
++#define DWORD0_DMT_MEM 0x0
++#define DWORD0_DMT_IO 1<<4
++#define DWORD0_SMT_MEM 0x0
++#define DWORD0_SMT_IO 1<<6
++
++union dwou {
++ u32 dw0;
++ struct dword0 {
++ u8 byte0;
++ u8 byte1;
++ u16 timestamp;
++ } dws;
++};
++
++struct dword1 {
++ u8 status;
++ u8 err_code;
++ u16 desc_id;
++};
++
++struct ptdma_desc {
++ union dwou dwouv;
++ struct dword1 dw1;
++ u32 length;
++ struct dword1 uu;
++ u32 src_hi;
++ u32 src_lo;
++ u32 dst_hi;
++ u32 dst_lo;
++};
++
++struct desc_work {
++ struct ptdma_desc *desc;
++ struct pt_cmd_queue *cmd_q;
++ struct list_head entry;
++ bool submitted;
++ bool processed;
++ bool init;
++ struct work_struct work;
++};
++
++/* Structure to hold PT device data */
++struct pt_dev_vdata {
++ const unsigned int bar;
++};
++
++int pt_dmaengine_register(struct pt_device *pt);
++void pt_dmaengine_unregister(struct pt_device *pt);
++
++int pt_core_init(struct pt_device *pt);
++void pt_core_destroy(struct pt_device *pt);
++
++int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
++ struct pt_passthru_engine *pt_engine);
++
++void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q);
++void pt_start_queue(struct pt_cmd_queue *cmd_q);
++void pt_stop_queue(struct pt_cmd_queue *cmd_q);
++
++static inline void pt_core_disable_queue_interrupts(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
++
++{
++
++ u32 status;
++ status = ioread32(cmd_q->reg_control);
++ status &= ~0x7;
++ iowrite32(status, cmd_q->reg_control);
++}
++
++static inline void pt_core_enable_queue_interrupts(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
++
++{
++
++ u32 status;
++ status = ioread32(cmd_q->reg_control);
++ status |= 0x7;
++ iowrite32(status, cmd_q->reg_control);
++}
++
++static inline bool pt_core_queue_full(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
++{
++ u32 q_sts = ioread32(cmd_q->reg_control + 0x4) & 0x06;
++
++ u32 rear_ri = ioread32(cmd_q->reg_control + 0x0C);
++ u32 front_wi = ioread32(cmd_q->reg_control + 0x10);
++
++ q_sts >>= 1;
++
++ if ( ((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN-1) ) {
++ return true;
++ }
++
++ return false;
++}
++
++static inline bool pt_core_queue_empty(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
++{
++ u32 rear_ri = ioread32(cmd_q->reg_control + 0x0C);
++ u32 front_wi = ioread32(cmd_q->reg_control + 0x10);
++
++ if (front_wi == rear_ri)
++ return true;
++
++ return false;
++}
++#endif
+--
+2.34.1
+