diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch | 1587 |
1 files changed, 1587 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch b/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch new file mode 100644 index 00000000..1d0a08ec --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux-6.6/linux-yocto-6.6/0001-ae4dma-Initial-ae4dma-controller-driver-with-multi-c.patch @@ -0,0 +1,1587 @@ +From 3d6d0e4175d9d78c4bd2e338accedfe06275e898 Mon Sep 17 00:00:00 2001 +From: Basavaraj Natikar <Basavaraj.Natikar@amd.com> +Date: Fri, 9 Feb 2024 18:25:37 +0530 +Subject: [PATCH 1/5] ae4dma: Initial ae4dma controller driver with + multi-channel + +Add support for AMD AE4DMA controller. It performs high-bandwidth +memory to memory and IO copy operation. Device commands are managed +via a circular queue of 'descriptors', each of which specifies source +and destination addresses for copying a single buffer of data. + +Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com> +--- + drivers/dma/Kconfig | 2 + + drivers/dma/Makefile | 1 + + drivers/dma/ae4dma/Kconfig | 13 + + drivers/dma/ae4dma/Makefile | 10 + + drivers/dma/ae4dma/ae4dma-dev.c | 387 ++++++++++++++++++++++++ + drivers/dma/ae4dma/ae4dma-dmaengine.c | 417 ++++++++++++++++++++++++++ + drivers/dma/ae4dma/ae4dma-pci.c | 251 ++++++++++++++++ + drivers/dma/ae4dma/ae4dma.h | 416 +++++++++++++++++++++++++ + 8 files changed, 1497 insertions(+) + create mode 100644 drivers/dma/ae4dma/Kconfig + create mode 100644 drivers/dma/ae4dma/Makefile + create mode 100644 drivers/dma/ae4dma/ae4dma-dev.c + create mode 100644 drivers/dma/ae4dma/ae4dma-dmaengine.c + create mode 100644 drivers/dma/ae4dma/ae4dma-pci.c + create mode 100644 drivers/dma/ae4dma/ae4dma.h + +diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig +index e928f2ca0f1e..34344fc2454c 100644 +--- a/drivers/dma/Kconfig ++++ b/drivers/dma/Kconfig +@@ -772,6 +772,8 @@ source "drivers/dma/fsl-dpaa2-qdma/Kconfig" + + source "drivers/dma/lgm/Kconfig" + ++source "drivers/dma/ae4dma/Kconfig" ++ + # clients + comment "DMA Clients" + depends on DMA_ENGINE +diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile +index dfd40d14e408..9df21ec28966 100644 +--- a/drivers/dma/Makefile ++++ b/drivers/dma/Makefile +@@ -83,6 +83,7 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o + obj-$(CONFIG_ST_FDMA) += st_fdma.o + obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/ + obj-$(CONFIG_INTEL_LDMA) += lgm/ ++obj-$(CONFIG_AMD_AE4DMA) += ae4dma/ + + obj-y += mediatek/ + obj-y += qcom/ +diff --git a/drivers/dma/ae4dma/Kconfig b/drivers/dma/ae4dma/Kconfig +new file mode 100644 +index 000000000000..50a69f1b984d +--- /dev/null ++++ b/drivers/dma/ae4dma/Kconfig +@@ -0,0 +1,13 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++config AMD_AE4DMA ++ tristate "AMD AE4DMA Engine" ++ depends on X86_64 && PCI ++ select DMA_ENGINE ++ select DMA_VIRTUAL_CHANNELS ++ help ++ Enable support for the AMD AE4DMA controller. This controller ++ provides DMA capabilities to perform high bandwidth memory to ++ memory and IO copy operations. It performs DMA transfer through ++ queue-based descriptor management. This DMA controller is intended ++ to be used with AMD Non-Transparent Bridge devices and not for ++ general purpose peripheral DMA. +diff --git a/drivers/dma/ae4dma/Makefile b/drivers/dma/ae4dma/Makefile +new file mode 100644 +index 000000000000..b1e431842d18 +--- /dev/null ++++ b/drivers/dma/ae4dma/Makefile +@@ -0,0 +1,10 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++# ++# AMD AE4DMA driver ++# ++ ++obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o ++ ++ae4dma-objs := ae4dma-dev.o ae4dma-dmaengine.o ++ ++ae4dma-$(CONFIG_PCI) += ae4dma-pci.o +diff --git a/drivers/dma/ae4dma/ae4dma-dev.c b/drivers/dma/ae4dma/ae4dma-dev.c +new file mode 100644 +index 000000000000..9163327a8fc4 +--- /dev/null ++++ b/drivers/dma/ae4dma/ae4dma-dev.c +@@ -0,0 +1,387 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++ ++/* ++ * AMD AE4DMA device driver ++ * -- Based on the PTDMA driver ++ * ++ * Copyright (C) 2024 Advanced Micro Devices, Inc. ++ * ++ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com> ++ ++ */ ++ ++#include <linux/bitfield.h> ++#include <linux/dma-mapping.h> ++#include <linux/interrupt.h> ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/pci.h> ++#include <linux/delay.h> ++#include <linux/time.h> ++ ++#include "ae4dma.h" ++#include "../dmaengine.h" ++#include "../virt-dma.h" ++ ++static unsigned int max_hw_q = 2; ++module_param(max_hw_q, uint, 0444); ++MODULE_PARM_DESC(max_hw_q, "Max hw queues supported by engine (any non-zero value, default: 1)"); ++ ++static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan) ++{ ++ return container_of(dma_chan, struct pt_dma_chan, vc.chan); ++} ++/* Human-readable error strings */ ++static char *pt_error_codes[] = { ++ "", ++ "ERR 01: INVALID HEADER DW0", ++ "ERR 02: INVALID STATUS", ++ "ERR 03: INVALID LENGHT - 4 BYTE ALIGNMENT", ++ "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT", ++ "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT", ++ "ERR 06: INVALID ALIGNMENT", ++ "ERR 07: INVALID DESCRIPTOR", ++}; ++ ++static void pt_log_error(struct pt_device *d, int e) ++{ ++ if (e<=7) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", pt_error_codes[e], e); ++ if((e>7) && (e<=15)) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e); ++ else if((e>15) && (e<=31)) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e); ++ else if((e>31) && (e<=63)) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e); ++ else if((e>63) && (e<=127)) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e); ++ else if((e>127) && (e<=255)) ++ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e); ++ ++} ++ ++void pt_start_queue(struct pt_cmd_queue *cmd_q) ++{ ++ u32 status = readl(cmd_q->reg_control); ++ status |= (cmd_q->qcontrol | CMD_Q_RUN); ++ /* Turn on the run bit */ ++ writel(status, cmd_q->reg_control); ++} ++ ++void pt_stop_queue(struct pt_cmd_queue *cmd_q) ++{ ++ /* Turn off the run bit */ ++ writel(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control); ++} ++ ++static u16 pt_check_status_error(struct pt_cmd_queue *cmd_q, int idx) ++{ ++ struct pt_device *pt = cmd_q->pt; ++ struct device *dev = pt->dev; ++ struct ptdma_desc desc; ++ u8 status; ++ ++ do { ++ dma_sync_single_for_device(dev, ((cmd_q->qbase_dma) + (idx * sizeof(struct ptdma_desc))), sizeof(struct ptdma_desc), DMA_FROM_DEVICE); ++ memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ptdma_desc)); ++ dma_sync_single_for_device(dev, ((cmd_q->qbase_dma) + (idx * sizeof(struct ptdma_desc))), sizeof(struct ptdma_desc), DMA_FROM_DEVICE); ++ mb(); ++ ++ status = desc.dw1.status; ++ if (status) { ++ if (status != 0x3) { ++ /* On error, only save the first error value */ ++ cmd_q->cmd_error = desc.dw1.err_code; ++ if (cmd_q->cmd_error) { ++ /* ++ * Log the error and flush the queue by ++ * moving the head pointer ++ */ ++ pt_log_error(cmd_q->pt, cmd_q->cmd_error); ++ } ++ } ++ } ++ } while (status == 0); ++ ++ return desc.dwouv.dws.timestamp; ++} ++ ++static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q) ++{ ++ struct pt_device *pt = cmd_q->pt; ++ struct device *dev = pt->dev; ++ unsigned long flags; ++ ++ bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); ++ ++ if (soc) { ++ desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0); ++ desc->dwouv.dw0 &= ~DWORD0_SOC; ++ } ++ ++ mutex_lock(&cmd_q->q_mutex); ++ spin_lock_irqsave(&cmd_q->cmd_lock, flags); ++ ++ desc->dwouv.dws.timestamp = cmd_q->desc_id_counter++; ++ ++ volatile u32 tail_wi = atomic_read(&cmd_q->tail_wi); ++ ++ dma_sync_single_for_device(dev, (cmd_q->qbase_dma + (tail_wi * sizeof(struct ptdma_desc))), ++ sizeof(struct ptdma_desc), DMA_TO_DEVICE); ++ memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct ptdma_desc)); ++ dma_sync_single_for_device(dev, (cmd_q->qbase_dma + (tail_wi * sizeof(struct ptdma_desc))), ++ sizeof(struct ptdma_desc), DMA_TO_DEVICE); ++ cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN; ++ ++ atomic64_inc(&cmd_q->q_cmd_count); ++ tail_wi = (tail_wi + 1) % CMD_Q_LEN; ++ atomic_set(&cmd_q->tail_wi, tail_wi); ++ mb(); ++ writel(tail_wi , cmd_q->reg_control + 0x10); ++ mb(); ++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags); ++ mutex_unlock(&cmd_q->q_mutex); ++ ++ return 0; ++} ++ ++int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, ++ struct pt_passthru_engine *pt_engine) ++{ ++ struct ptdma_desc desc; ++ ++ cmd_q->cmd_error = 0; ++ cmd_q->total_pt_ops++; ++ memset(&desc, 0, sizeof(desc)); ++ desc.dwouv.dws.byte0 = CMD_DESC_DW0_VAL; ++ ++ desc.dw1.status = 0; ++ desc.dw1.err_code = 0; ++ desc.dw1.desc_id = 0; ++ ++ desc.length = pt_engine->src_len; ++ ++ desc.src_lo = upper_32_bits(pt_engine->src_dma); ++ desc.src_hi = lower_32_bits(pt_engine->src_dma); ++ desc.dst_lo = upper_32_bits(pt_engine->dst_dma); ++ desc.dst_hi = lower_32_bits(pt_engine->dst_dma); ++ ++ return pt_core_execute_cmd(&desc, cmd_q); ++} ++ ++static irqreturn_t pt_core_irq_handler(int irq, void *data) ++{ ++ struct pt_cmd_queue *cmd_q = data; ++ struct pt_device *pt = cmd_q->pt; ++ u32 status = readl(cmd_q->reg_control + 0x4); ++ u8 q_intr_type = (status>>24) & 0xf; ++ unsigned long flags; ++ struct pt_cmd *cmd; ++ ++ pt->total_interrupts++; ++ ++ if (q_intr_type == 0x4) ++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue desc error", q_intr_type); ++ else if (q_intr_type == 0x2) ++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue stopped", q_intr_type); ++ else if (q_intr_type == 0x1) ++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "queue empty", q_intr_type); ++ else if (q_intr_type != 0x3) ++ dev_info(pt->dev, "AE4DMA INTR: %s (0x%x)\n", "unknown error", q_intr_type); ++ ++ spin_lock_irqsave(&cmd_q->cmd_lock, flags); ++ volatile u32 crdi = readl(cmd_q->reg_control + 0x0C); ++ volatile u32 dridx = atomic_read(&cmd_q->dridx); ++ while (dridx != crdi) { ++ ++ if(list_empty(&cmd_q->cmd)) { ++ ++ break; ++ } ++ cmd = list_first_entry(&cmd_q->cmd, struct pt_cmd, entry); ++ list_del(&cmd->entry); ++ pt_check_status_error(cmd_q, dridx); ++ cmd->pt_cmd_callback(cmd->data, cmd->ret); ++ atomic64_dec(&cmd_q->q_cmd_count); ++ dridx = (dridx + 1) % CMD_Q_LEN; ++ atomic_set(&cmd_q->dridx, dridx); ++ mb(); ++ } ++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags); ++ ++ status = readl(cmd_q->reg_control + 0x14); ++ if (status & 1) { ++ status = status & ~1; ++ writel(status , cmd_q->reg_control + 0x14); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++int pt_core_init(struct pt_device *pt) ++{ ++ char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; ++ struct pt_cmd_queue *cmd_q; ++ u32 dma_addr_lo, dma_addr_hi; ++ struct device *dev = pt->dev; ++ struct dma_pool *dma_pool; ++ unsigned int i; ++ int ret; ++ u32 q_per_eng = max_hw_q; ++ ++ /* Update the device registers with queue information. */ ++ writel(q_per_eng, pt->io_regs); ++ ++ q_per_eng = readl(pt->io_regs); ++ ++ for (i = 0; i < q_per_eng; i++) { ++ ++ /* Allocate a dma pool for the queue */ ++ snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", dev_name(pt->dev), i); ++ ++ dma_pool = dma_pool_create(dma_pool_name, dev, ++ PT_DMAPOOL_MAX_SIZE, ++ PT_DMAPOOL_ALIGN, 0); ++ if (!dma_pool) ++ return -ENOMEM; ++ ++ /* ae4dma core initialisation */ ++ cmd_q = &pt->cmd_q[i]; ++ cmd_q->id = pt->cmd_q_count; ++ pt->cmd_q_count++; ++ ++ cmd_q->pt = pt; ++ cmd_q->dma_pool = dma_pool; ++ mutex_init(&cmd_q->q_mutex); ++ spin_lock_init(&cmd_q->q_lock); ++ ++ /* Preset some register values (Q size is 32byte (0x20)) */ ++ cmd_q->reg_control = pt->io_regs + ((i + 1) * 0x20); ++ ++ /* Page alignment satisfies our needs for N <= 128 */ ++ cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); ++ ++ cmd_q->qbase = kmalloc(cmd_q->qsize, GFP_KERNEL); ++ cmd_q->qbase_dma = dma_map_single(dev, cmd_q->qbase, cmd_q->qsize, DMA_BIDIRECTIONAL); ++ ++ if (dma_mapping_error(dev, cmd_q->qbase_dma)) { ++ dev_err(dev, "dma mapping error \n"); ++ goto e_destroy_pool; ++ } ++ ++ cmd_q->qidx = 0; ++ atomic64_set(&cmd_q->q_cmd_count, 0); ++ atomic_set(&cmd_q->dridx ,0); ++ cmd_q->q_space_available = 0; ++ ++ atomic_set(&cmd_q->tail_wi ,readl(cmd_q->reg_control + 0x10)); ++ ++ init_waitqueue_head(&cmd_q->int_queue); ++ init_waitqueue_head(&cmd_q->q_space); ++ ++ dev_dbg(dev, "queue #%u available\n", i); ++ } ++ ++ if (pt->cmd_q_count == 0) { ++ dev_notice(dev, "no command queues available\n"); ++ ret = -EIO; ++ goto e_free_dma; ++ } ++ ++ dev_info(dev, "BB1.0011 AE4DMA\n"); ++ for (i = 0; i < pt->cmd_q_count; i++) { ++ cmd_q = &pt->cmd_q[i]; ++ ++ cmd_q->qcontrol = 0; /* Start with nothing */ ++ ++ /* Request an irq */ ++ ret = request_irq(pt->pt_irq[i], pt_core_irq_handler, 0, dev_name(pt->dev), cmd_q); ++ if (ret) { ++ dev_err(dev, "unable to allocate an IRQ\n"); ++ goto e_free_dma; ++ } ++ ++ /* Update the device registers with queue information. */ ++ writel(CMD_Q_LEN, cmd_q->reg_control + 0x08); // Max Index (cmd queue lenght) ++ ++ cmd_q->qdma_tail = cmd_q->qbase_dma; ++ ++ dma_addr_lo = lower_32_bits(cmd_q->qdma_tail); ++ writel((u32)dma_addr_lo, cmd_q->reg_control + 0x18); ++ ++ dma_addr_lo = readl(cmd_q->reg_control + 0x18); ++ ++ dma_addr_hi = upper_32_bits(cmd_q->qdma_tail); ++ writel((u32)dma_addr_hi, cmd_q->reg_control + 0x1C); ++ ++ dma_addr_hi = readl(cmd_q->reg_control + 0x1C); ++ ++ pt_core_enable_queue_interrupts(pt, cmd_q); ++ ++ INIT_LIST_HEAD(&cmd_q->cmd); ++ } ++ ++ /* Register the DMA engine support */ ++ ret = pt_dmaengine_register(pt); ++ if (ret) ++ goto e_free_irq; ++ ++ return 0; ++ ++e_free_irq: ++ for (i = 0; i < pt->cmd_q_count; i++) ++ free_irq(pt->pt_irq[i], pt); ++ ++e_free_dma: ++ for (i = 0; i < pt->cmd_q_count; i++) { ++ cmd_q = &pt->cmd_q[i]; ++ dma_unmap_single(dev, cmd_q->qbase_dma, cmd_q->qsize, DMA_BIDIRECTIONAL); ++ kfree(cmd_q->qbase); ++ } ++ ++e_destroy_pool: ++ for (i = 0; i < pt->cmd_q_count; i++) ++ dma_pool_destroy(pt->cmd_q[i].dma_pool); ++ ++ kmem_cache_destroy(pt->dma_desc_cache); ++ return ret; ++} ++ ++void pt_core_destroy(struct pt_device *pt) ++{ ++ struct device *dev = pt->dev; ++ struct pt_cmd_queue *cmd_q; ++ struct pt_cmd *cmd; ++ unsigned int i; ++ ++ /* Unregister the DMA engine */ ++ pt_dmaengine_unregister(pt); ++ ++ for (i = 0; i < pt->cmd_q_count; i++) { ++ cmd_q = &pt->cmd_q[i]; ++ ++ wake_up_all(&cmd_q->q_space); ++ wake_up_all(&cmd_q->int_queue); ++ ++ /* Disable and clear interrupts */ ++ pt_core_disable_queue_interrupts(pt, cmd_q); ++ ++ /* Turn off the run bit */ ++ pt_stop_queue(cmd_q); ++ ++ free_irq(pt->pt_irq[i], cmd_q); ++ ++ dma_unmap_single(dev, cmd_q->qbase_dma, cmd_q->qsize, DMA_BIDIRECTIONAL); ++ kfree(cmd_q->qbase); ++ } ++ ++ /* Flush the cmd queue */ ++ while (!list_empty(&pt->cmd)) { ++ /* Invoke the callback directly with an error code */ ++ cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry); ++ list_del(&cmd->entry); ++ cmd->pt_cmd_callback(cmd->data, -ENODEV); ++ } ++ ++ kmem_cache_destroy(pt->dma_desc_cache); ++} +diff --git a/drivers/dma/ae4dma/ae4dma-dmaengine.c b/drivers/dma/ae4dma/ae4dma-dmaengine.c +new file mode 100644 +index 000000000000..03f28eb10ad6 +--- /dev/null ++++ b/drivers/dma/ae4dma/ae4dma-dmaengine.c +@@ -0,0 +1,417 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * AMD AE4DMA device driver ++ * -- Based on the PTDMA driver ++ * ++ * Copyright (C) 2024 Advanced Micro Devices, Inc. ++ * ++ */ ++#include <linux/delay.h> ++#include "ae4dma.h" ++#include "../dmaengine.h" ++#include "../virt-dma.h" ++ ++static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan) ++{ ++ return container_of(dma_chan, struct pt_dma_chan, vc.chan); ++} ++ ++static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd) ++{ ++ return container_of(vd, struct pt_dma_desc, vd); ++} ++ ++static void pt_free_chan_resources(struct dma_chan *dma_chan) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ ++ vchan_free_chan_resources(&chan->vc); ++} ++ ++static void pt_synchronize(struct dma_chan *dma_chan) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ ++ vchan_synchronize(&chan->vc); ++} ++ ++static void pt_do_cleanup(struct virt_dma_desc *vd) ++{ ++ struct pt_dma_desc *desc = to_pt_desc(vd); ++ struct pt_device *pt = desc->pt; ++ ++ kmem_cache_free(pt->dma_desc_cache, desc); ++} ++ ++static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan) ++{ ++ struct pt_passthru_engine *pt_engine; ++ struct pt_device *pt; ++ struct pt_cmd *pt_cmd; ++ struct pt_cmd_queue *cmd_q; ++ ++ desc->issued_to_hw = 1; ++ list_del(&desc->vd.node); ++ ++ pt_cmd = &desc->pt_cmd; ++ pt = pt_cmd->pt; ++ cmd_q = chan->cmd_q; ++ pt_engine = &pt_cmd->passthru; ++ ++ pt_cmd->qid = cmd_q->qidx; ++ cmd_q->tdata.cmd = pt_cmd; ++ ++ /* Execute the command */ ++ pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine); ++ ++ return 0; ++} ++ ++static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan) ++{ ++ struct virt_dma_desc *vd = vchan_next_desc(&chan->vc); ++ ++ return vd ? to_pt_desc(vd) : NULL; ++} ++ ++static void pt_cmd_callback_tasklet(void *data, int err) ++{ ++ struct pt_dma_desc *desc = data; ++ struct dma_chan *dma_chan; ++ struct pt_dma_chan *chan; ++ struct dma_async_tx_descriptor *tx_desc; ++ struct virt_dma_desc *vd; ++ unsigned long flags; ++ ++ dma_chan = desc->vd.tx.chan; ++ chan = to_pt_chan(dma_chan); ++ ++ if (err == -EINPROGRESS) ++ return; ++ ++ tx_desc = &desc->vd.tx; ++ vd = &desc->vd; ++ ++ if (err) ++ desc->status = DMA_ERROR; ++ ++ ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ if (desc) { ++ if (desc->status != DMA_COMPLETE) { ++ if (desc->status != DMA_ERROR) ++ desc->status = DMA_COMPLETE; ++ ++ dma_cookie_complete(tx_desc); ++ dma_descriptor_unmap(tx_desc); ++ } else { ++ /* Don't handle it twice */ ++ tx_desc = NULL; ++ } ++ } ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ ++ if (tx_desc) { ++ dmaengine_desc_get_callback_invoke(tx_desc, NULL); ++ dma_run_dependencies(tx_desc); ++ vchan_vdesc_fini(vd); ++ } ++} ++ ++static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, ++ struct pt_dma_desc *desc) ++{ ++ struct dma_async_tx_descriptor *tx_desc; ++ struct virt_dma_desc *vd; ++ unsigned long flags; ++ ++ /* Loop over descriptors until one is found with commands */ ++ do { ++ if (desc) { ++ if (!desc->issued_to_hw) { ++ /* No errors, keep going */ ++ if (desc->status != DMA_ERROR) ++ return desc; ++ } ++ tx_desc = &desc->vd.tx; ++ vd = &desc->vd; ++ } else { ++ tx_desc = NULL; ++ } ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ desc = pt_next_dma_desc(chan); ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ } while (desc); ++ ++ return NULL; ++} ++ ++static void pt_cmd_callback(void *data, int err) ++{ ++ struct pt_dma_desc *desc = data; ++ struct dma_chan *dma_chan; ++ struct pt_dma_chan *chan; ++ struct pt_device *pt; ++ int ret; ++ ++ if (err == -EINPROGRESS) ++ return; ++ ++ dma_chan = desc->vd.tx.chan; ++ chan = to_pt_chan(dma_chan); ++ pt = chan->pt; ++ ++ if (err) ++ desc->status = DMA_ERROR; ++ ++ while (true) { ++ /* if queue is full dont submit to queue */ ++ if((atomic64_read(&chan->cmd_q->q_cmd_count) >= (CMD_Q_LEN - 1)) || pt_core_queue_full(pt, chan->cmd_q)) { ++ cpu_relax(); ++ continue; ++ } ++ ++ /* Check for DMA descriptor completion */ ++ desc = pt_handle_active_desc(chan, desc); ++ ++ /* Don't submit cmd if no descriptor or DMA is paused */ ++ if (!desc) ++ break; ++ ++ ret = pt_dma_start_desc(desc, chan); ++ if (!ret) ++ break; ++ ++ desc->status = DMA_ERROR; ++ } ++} ++ ++static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, ++ unsigned long flags) ++{ ++ struct pt_dma_desc *desc; ++ struct pt_cmd_queue *cmd_q = chan->cmd_q; ++ ++ desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT); ++ if (!desc) ++ return NULL; ++ ++ vchan_tx_prep(&chan->vc, &desc->vd, flags); ++ ++ desc->pt = chan->pt; ++ cmd_q->int_en = !!(flags & DMA_PREP_INTERRUPT); ++ desc->issued_to_hw = 0; ++ desc->status = DMA_IN_PROGRESS; ++ ++ return desc; ++} ++ ++static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, ++ dma_addr_t dst, ++ dma_addr_t src, ++ unsigned int len, ++ unsigned long flags) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ struct pt_cmd_queue *cmd_q = chan->cmd_q; ++ struct pt_passthru_engine *pt_engine; ++ struct pt_dma_desc *desc; ++ struct pt_cmd *pt_cmd; ++ ++ desc = pt_alloc_dma_desc(chan, flags); ++ if (!desc) ++ return NULL; ++ ++ pt_cmd = &desc->pt_cmd; ++ pt_cmd->pt = chan->pt; ++ pt_engine = &pt_cmd->passthru; ++ pt_cmd->engine = PT_ENGINE_PASSTHRU; ++ pt_engine->src_dma = src; ++ pt_engine->dst_dma = dst; ++ pt_engine->src_len = len; ++ pt_cmd->pt_cmd_callback = pt_cmd_callback_tasklet; ++ pt_cmd->data = desc; ++ ++ desc->len = len; ++ ++ spin_lock_irqsave(&cmd_q->cmd_lock, flags); ++ list_add_tail(&pt_cmd->entry, &cmd_q->cmd); ++ spin_unlock_irqrestore(&cmd_q->cmd_lock, flags); ++ ++ return desc; ++} ++ ++static struct dma_async_tx_descriptor * ++pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst, ++ dma_addr_t src, size_t len, unsigned long flags) ++{ ++ struct pt_dma_desc *desc; ++ ++ desc = pt_create_desc(dma_chan, dst, src, len, flags); ++ if (!desc) ++ return NULL; ++ ++ return &desc->vd.tx; ++} ++ ++static struct dma_async_tx_descriptor * ++pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ struct pt_dma_desc *desc; ++ ++ desc = pt_alloc_dma_desc(chan, flags); ++ if (!desc) ++ return NULL; ++ ++ return &desc->vd.tx; ++} ++ ++static void pt_issue_pending(struct dma_chan *dma_chan) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ struct pt_dma_desc *desc; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ vchan_issue_pending(&chan->vc); ++ desc = pt_next_dma_desc(chan); ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ ++ pt_cmd_callback(desc, 0); ++} ++ ++static int pt_pause(struct dma_chan *dma_chan) ++{ ++ return 0; ++} ++ ++static int pt_resume(struct dma_chan *dma_chan) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ struct pt_dma_desc *desc = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ pt_start_queue(chan->cmd_q); ++ desc = pt_next_dma_desc(chan); ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ ++ /* If there was something active, re-start */ ++ if (desc) ++ pt_cmd_callback(desc, 0); ++ ++ return 0; ++} ++ ++static int pt_terminate_all(struct dma_chan *dma_chan) ++{ ++ struct pt_dma_chan *chan = to_pt_chan(dma_chan); ++ unsigned long flags; ++ LIST_HEAD(head); ++ ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ vchan_get_all_descriptors(&chan->vc, &head); ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ ++ vchan_dma_desc_free_list(&chan->vc, &head); ++ vchan_free_chan_resources(&chan->vc); ++ ++ return 0; ++} ++ ++int pt_dmaengine_register(struct pt_device *pt) ++{ ++ struct pt_dma_chan *chan; ++ struct pt_cmd_queue *cmd_q; ++ struct dma_device *dma_dev = &pt->dma_dev; ++ char *cmd_cache_name; ++ char *desc_cache_name; ++ unsigned int i; ++ int ret; ++ ++ pt->pt_dma_chan = devm_kcalloc(pt->dev, pt->cmd_q_count, sizeof(*pt->pt_dma_chan), ++ GFP_KERNEL); ++ if (!pt->pt_dma_chan) ++ return -ENOMEM; ++ ++ cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, ++ "%s-dmaengine-cmd-cache", ++ dev_name(pt->dev)); ++ if (!cmd_cache_name) ++ return -ENOMEM; ++ ++ desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, ++ "%s-dmaengine-desc-cache", ++ dev_name(pt->dev)); ++ if (!desc_cache_name) { ++ ret = -ENOMEM; ++ goto err_cache; ++ } ++ ++ pt->dma_desc_cache = kmem_cache_create(desc_cache_name, ++ sizeof(struct pt_dma_desc), 0, ++ SLAB_HWCACHE_ALIGN, NULL); ++ if (!pt->dma_desc_cache) { ++ ret = -ENOMEM; ++ goto err_cache; ++ } ++ ++ dma_dev->dev = pt->dev; ++ dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; ++ dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; ++ dma_dev->directions = DMA_MEM_TO_MEM; ++ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; ++ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); ++ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); ++ ++ /* ++ * PTDMA is intended to be used with the AMD NTB devices, hence ++ * marking it as DMA_PRIVATE. ++ */ ++ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); ++ ++ /* Set base and prep routines */ ++ dma_dev->device_free_chan_resources = pt_free_chan_resources; ++ dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy; ++ dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt; ++ dma_dev->device_issue_pending = pt_issue_pending; ++ dma_dev->device_tx_status = dma_cookie_status; ++ dma_dev->device_pause = pt_pause; ++ dma_dev->device_resume = pt_resume; ++ dma_dev->device_terminate_all = pt_terminate_all; ++ dma_dev->device_synchronize = pt_synchronize; ++ ++ INIT_LIST_HEAD(&dma_dev->channels); ++ for (i = 0; i < pt->cmd_q_count; i++) { ++ chan = pt->pt_dma_chan + i; ++ cmd_q = &pt->cmd_q[i]; ++ chan->cmd_q = cmd_q; ++ chan->id = cmd_q->id; ++ chan->pt = pt; ++ chan->vc.desc_free = pt_do_cleanup; ++ vchan_init(&chan->vc, dma_dev); ++ } ++ ++ ret = dma_async_device_register(dma_dev); ++ if (ret) ++ goto err_reg; ++ ++ return 0; ++ ++err_reg: ++ kmem_cache_destroy(pt->dma_desc_cache); ++ ++err_cache: ++ kmem_cache_destroy(pt->dma_cmd_cache); ++ ++ return ret; ++} ++ ++void pt_dmaengine_unregister(struct pt_device *pt) ++{ ++ struct dma_device *dma_dev = &pt->dma_dev; ++ ++ dma_async_device_unregister(dma_dev); ++ kmem_cache_destroy(pt->dma_cmd_cache); ++} +diff --git a/drivers/dma/ae4dma/ae4dma-pci.c b/drivers/dma/ae4dma/ae4dma-pci.c +new file mode 100644 +index 000000000000..bd1170d05081 +--- /dev/null ++++ b/drivers/dma/ae4dma/ae4dma-pci.c +@@ -0,0 +1,251 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * AMD AE4DMA device driver ++ * -- Based on the PTDMA driver ++ * ++ * Copyright (C) 2024 Advanced Micro Devices, Inc. ++ * ++ */ ++ ++#include <linux/device.h> ++#include <linux/dma-mapping.h> ++#include <linux/delay.h> ++#include <linux/interrupt.h> ++#include <linux/kernel.h> ++#include <linux/kthread.h> ++#include <linux/module.h> ++#include <linux/pci_ids.h> ++#include <linux/pci.h> ++#include <linux/spinlock.h> ++ ++#include "ae4dma.h" ++ ++static char test_device[32]; ++module_param_string(device, test_device, sizeof(test_device), 0644); ++MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); ++ ++struct pt_msix { ++ int msix_count; ++ struct msix_entry msix_entry[MAX_HW_QUEUES]; ++}; ++ ++/* ++ * pt_alloc_struct - allocate and initialize the pt_device struct ++ * ++ * @dev: device struct of the PTDMA ++ */ ++static struct pt_device *pt_alloc_struct(struct device *dev) ++{ ++ struct pt_device *pt; ++ ++ pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL); ++ ++ if (!pt) ++ return NULL; ++ pt->dev = dev; ++ ++ INIT_LIST_HEAD(&pt->cmd); ++ ++ return pt; ++} ++ ++static int pt_get_msix_irqs(struct pt_device *pt) ++{ ++ struct pt_msix *pt_msix = pt->pt_msix; ++ struct device *dev = pt->dev; ++ struct pci_dev *pdev = to_pci_dev(dev); ++ int v, i, ret; ++ ++ for (v = 0; v < ARRAY_SIZE(pt_msix->msix_entry); v++) ++ pt_msix->msix_entry[v].entry = v; ++ ++ ret = pci_enable_msix_range(pdev, pt_msix->msix_entry, 1, v); ++ if (ret < 0) ++ return ret; ++ ++ pt_msix->msix_count = ret; ++ ++ for(i=0; i<MAX_HW_QUEUES; i++ ) ++ pt->pt_irq[i] = pt_msix->msix_entry[i].vector; ++ ++ return 0; ++} ++ ++static int pt_get_msi_irq(struct pt_device *pt) ++{ ++ struct device *dev = pt->dev; ++ struct pci_dev *pdev = to_pci_dev(dev); ++ int ret, i; ++ ++ ret = pci_enable_msi(pdev); ++ if (ret) ++ return ret; ++ ++ for(i=0; i<MAX_HW_QUEUES; i++ ) ++ pt->pt_irq[i] = pdev->irq; ++ ++ return 0; ++} ++ ++static int pt_get_irqs(struct pt_device *pt) ++{ ++ struct device *dev = pt->dev; ++ int ret; ++ ++ ret = pt_get_msix_irqs(pt); ++ if (!ret) ++ return 0; ++ ++ /* Couldn't get MSI-X vectors, try MSI */ ++ dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret); ++ ret = pt_get_msi_irq(pt); ++ if (!ret) ++ return 0; ++ ++ /* Couldn't get MSI interrupt */ ++ dev_err(dev, "could not enable MSI (%d)\n", ret); ++ ++ return ret; ++} ++ ++static void pt_free_irqs(struct pt_device *pt) ++{ ++ struct pt_msix *pt_msix = pt->pt_msix; ++ struct device *dev = pt->dev; ++ struct pci_dev *pdev = to_pci_dev(dev); ++ unsigned int i; ++ ++ if (pt_msix->msix_count) ++ pci_disable_msix(pdev); ++ else if (pt->pt_irq) ++ pci_disable_msi(pdev); ++ ++ for(i=0; i<MAX_HW_QUEUES; i++ ) ++ pt->pt_irq[i] = 0; ++} ++ ++static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ++{ ++ struct pt_device *pt; ++ struct pt_msix *pt_msix; ++ struct device *dev = &pdev->dev; ++ void __iomem * const *iomap_table; ++ int bar_mask; ++ int ret = -ENOMEM; ++ ++ pt = pt_alloc_struct(dev); ++ if (!pt) ++ goto e_err; ++ ++ pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL); ++ if (!pt_msix) ++ goto e_err; ++ ++ pt->pt_msix = pt_msix; ++ pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data; ++ if (!pt->dev_vdata) { ++ ret = -ENODEV; ++ dev_err(dev, "missing driver data\n"); ++ goto e_err; ++ } ++ ++ ret = pcim_enable_device(pdev); ++ if (ret) { ++ dev_err(dev, "pcim_enable_device failed (%d)\n", ret); ++ goto e_err; ++ } ++ ++ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); ++ ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma"); ++ if (ret) { ++ dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret); ++ goto e_err; ++ } ++ ++ iomap_table = pcim_iomap_table(pdev); ++ if (!iomap_table) { ++ dev_err(dev, "pcim_iomap_table failed\n"); ++ ret = -ENOMEM; ++ goto e_err; ++ } ++ ++ pt->io_regs = iomap_table[pt->dev_vdata->bar]; ++ if (!pt->io_regs) { ++ dev_err(dev, "ioremap failed\n"); ++ ret = -ENOMEM; ++ goto e_err; ++ } ++ ++ ret = pt_get_irqs(pt); ++ if (ret) ++ goto e_err; ++ ++ pci_set_master(pdev); ++ ++ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); ++ if (ret) { ++ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); ++ if (ret) { ++ dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ++ ret); ++ goto e_err; ++ } ++ } ++ ++ dev_set_drvdata(dev, pt); ++ ++ if (pt->dev_vdata) ++ ret = pt_core_init(pt); ++ ++ if (ret) ++ goto e_err; ++ ++ return 0; ++ ++e_err: ++ dev_err(dev, "initialization failed ret = %d\n", ret); ++ ++ return ret; ++} ++ ++static void pt_pci_remove(struct pci_dev *pdev) ++{ ++ struct device *dev = &pdev->dev; ++ struct pt_device *pt = dev_get_drvdata(dev); ++ ++ if (!pt) ++ return; ++ ++ if (pt->dev_vdata) ++ pt_core_destroy(pt); ++ ++ pt_free_irqs(pt); ++} ++ ++static const struct pt_dev_vdata dev_vdata[] = { ++ { ++ .bar = 0, ++ }, ++}; ++ ++static const struct pci_device_id pt_pci_table[] = { ++ { PCI_VDEVICE(AMD, 0x14C8), (kernel_ulong_t)&dev_vdata[0] }, ++ { PCI_VDEVICE(AMD, 0x14DC), (kernel_ulong_t)&dev_vdata[0] }, ++ { PCI_VDEVICE(AMD, 0x149B), (kernel_ulong_t)&dev_vdata[0] }, ++ /* Last entry must be zero */ ++ { 0, } ++}; ++MODULE_DEVICE_TABLE(pci, pt_pci_table); ++ ++static struct pci_driver pt_pci_driver = { ++ .name = "ae4dma", ++ .id_table = pt_pci_table, ++ .probe = pt_pci_probe, ++ .remove = pt_pci_remove, ++}; ++ ++module_pci_driver(pt_pci_driver); ++ ++MODULE_AUTHOR("amd"); ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("AMD AE4DMA driver"); +diff --git a/drivers/dma/ae4dma/ae4dma.h b/drivers/dma/ae4dma/ae4dma.h +new file mode 100644 +index 000000000000..30ce1c1ee29c +--- /dev/null ++++ b/drivers/dma/ae4dma/ae4dma.h +@@ -0,0 +1,416 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * AMD AE4DMA device driver ++ * ++ * Copyright (C) 2024 Advanced Micro Devices, Inc. ++ * ++ */ ++ ++#ifndef __PT_DEV_H__ ++#define __PT_DEV_H__ ++ ++#include <linux/device.h> ++#include <linux/dmaengine.h> ++#include <linux/pci.h> ++#include <linux/spinlock.h> ++#include <linux/mutex.h> ++#include <linux/list.h> ++#include <linux/wait.h> ++#include <linux/dmapool.h> ++ ++#include "../virt-dma.h" ++ ++#define MAX_PT_NAME_LEN 16 ++#define MAX_DMAPOOL_NAME_LEN 32 ++ ++#define MAX_HW_QUEUES 16 ++#define MAX_CMD_QLEN 32 ++ ++#define PT_ENGINE_PASSTHRU 5 ++ ++/* Register Mappings */ ++#define IRQ_MASK_REG 0x040 ++#define IRQ_STATUS_REG 0x200 ++ ++#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) ++ ++#define CMD_QUEUE_PRIO_OFFSET 0x00 ++#define CMD_REQID_CONFIG_OFFSET 0x04 ++#define CMD_TIMEOUT_OFFSET 0x08 ++#define CMD_PT_VERSION 0x10 ++ ++#define CMD_Q_CONTROL_BASE 0x0000 ++#define CMD_Q_TAIL_LO_BASE 0x0004 ++#define CMD_Q_HEAD_LO_BASE 0x0008 ++#define CMD_Q_INT_ENABLE_BASE 0x000C ++#define CMD_Q_INTERRUPT_STATUS_BASE 0x0010 ++ ++#define CMD_Q_STATUS_BASE 0x0100 ++#define CMD_Q_INT_STATUS_BASE 0x0104 ++#define CMD_Q_DMA_STATUS_BASE 0x0108 ++#define CMD_Q_DMA_READ_STATUS_BASE 0x010C ++#define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110 ++#define CMD_Q_ABORT_BASE 0x0114 ++#define CMD_Q_AX_CACHE_BASE 0x0118 ++ ++#define CMD_CONFIG_OFFSET 0x1120 ++#define CMD_CLK_GATE_CTL_OFFSET 0x6004 ++ ++#define CMD_DESC_DW0_VAL 0x000002 ++ ++/* Address offset for virtual queue registers */ ++#define CMD_Q_STATUS_INCR 0x1000 ++ ++/* Bit masks */ ++#define CMD_CONFIG_REQID 0 ++#define CMD_TIMEOUT_DISABLE 0 ++#define CMD_CLK_DYN_GATING_DIS 0 ++#define CMD_CLK_SW_GATE_MODE 0 ++#define CMD_CLK_GATE_CTL 0 ++#define CMD_QUEUE_PRIO GENMASK(2, 1) ++#define CMD_CONFIG_VHB_EN BIT(0) ++#define CMD_CLK_DYN_GATING_EN BIT(0) ++#define CMD_CLK_HW_GATE_MODE BIT(0) ++#define CMD_CLK_GATE_ON_DELAY BIT(12) ++#define CMD_CLK_GATE_OFF_DELAY BIT(12) ++ ++#define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \ ++ CMD_CLK_HW_GATE_MODE | \ ++ CMD_CLK_GATE_ON_DELAY | \ ++ CMD_CLK_DYN_GATING_EN | \ ++ CMD_CLK_GATE_OFF_DELAY) ++ ++#define CMD_Q_LEN 32 ++#define CMD_Q_RUN BIT(0) ++#define CMD_Q_HALT BIT(1) ++#define CMD_Q_MEM_LOCATION BIT(2) ++#define CMD_Q_SIZE_MASK GENMASK(4, 0) ++#define CMD_Q_SIZE GENMASK(7, 3) ++#define CMD_Q_SHIFT GENMASK(1, 0) ++#define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \ ++ CMD_Q_SIZE_MASK) ++#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) ++#define Q_DESC_SIZE sizeof(struct ptdma_desc) ++#define Q_SIZE(n) (CMD_Q_LEN * (n)) ++ ++#define INT_DESC_VALIDATED BIT(1) ++#define INT_DESC_PROCESSED BIT(2) ++#define INT_COMPLETION BIT(3) ++#define INT_ERROR BIT(4) ++ ++#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) ++ ++/****** Local Storage Block ******/ ++#define LSB_START 0 ++#define LSB_END 127 ++#define LSB_COUNT (LSB_END - LSB_START + 1) ++ ++#define PT_DMAPOOL_MAX_SIZE 64 ++#define PT_DMAPOOL_ALIGN BIT(5) ++ ++#define PT_PASSTHRU_BLOCKSIZE 512 ++ ++struct pt_device; ++ ++struct pt_tasklet_data { ++ struct completion completion; ++ struct pt_cmd *cmd; ++}; ++ ++/* ++ * struct pt_passthru_engine - pass-through operation ++ * without performing DMA mapping ++ * @mask: mask to be applied to data ++ * @mask_len: length in bytes of mask ++ * @src_dma: data to be used for this operation ++ * @dst_dma: data produced by this operation ++ * @src_len: length in bytes of data used for this operation ++ * ++ * Variables required to be set when calling pt_enqueue_cmd(): ++ * - bit_mod, byte_swap, src, dst, src_len ++ * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP ++ */ ++struct pt_passthru_engine { ++ dma_addr_t mask; ++ u32 mask_len; /* In bytes */ ++ ++ dma_addr_t src_dma, dst_dma; ++ u64 src_len; /* In bytes */ ++}; ++ ++/* ++ * struct pt_cmd - PTDMA operation request ++ * @entry: list element ++ * @work: work element used for callbacks ++ * @pt: PT device to be run on ++ * @ret: operation return code ++ * @flags: cmd processing flags ++ * @engine: PTDMA operation to perform (passthru) ++ * @engine_error: PT engine return code ++ * @passthru: engine specific structures, refer to specific engine struct below ++ * @callback: operation completion callback function ++ * @data: parameter value to be supplied to the callback function ++ * ++ * Variables required to be set when calling pt_enqueue_cmd(): ++ * - engine, callback ++ * - See the operation structures below for what is required for each ++ * operation. ++ */ ++struct pt_cmd { ++ struct list_head entry; ++ struct work_struct work; ++ struct pt_device *pt; ++ int ret; ++ u32 engine; ++ u32 engine_error; ++ struct pt_passthru_engine passthru; ++ /* Completion callback support */ ++ void (*pt_cmd_callback)(void *data, int err); ++ void *data; ++ u8 qid; ++}; ++ ++struct pt_dma_desc { ++ struct virt_dma_desc vd; ++ struct pt_device *pt; ++ enum dma_status status; ++ size_t len; ++ bool issued_to_hw; ++ struct pt_cmd pt_cmd; ++}; ++ ++struct pt_dma_chan { ++ struct virt_dma_chan vc; ++ struct pt_device *pt; ++ struct pt_cmd_queue *cmd_q; ++ u32 id; ++}; ++ ++struct pt_cmd_queue { ++ struct pt_device *pt; ++ ++ /* Queue identifier */ ++ u32 id; ++ ++ /* Queue dma pool */ ++ struct dma_pool *dma_pool; ++ ++ /* Queue base address (not neccessarily aligned)*/ ++ struct ptdma_desc *qbase; ++ ++ /* Aligned queue start address (per requirement) */ ++ struct mutex q_mutex ____cacheline_aligned; ++ spinlock_t q_lock ____cacheline_aligned; ++ volatile unsigned long qidx; ++ volatile unsigned long ridx; ++ ++ ++ unsigned int qsize; ++ dma_addr_t qbase_dma; ++ dma_addr_t qdma_tail; ++ ++ unsigned int active; ++ unsigned int suspended; ++ ++ /* Interrupt flag */ ++ bool int_en; ++ ++ /* Register addresses for queue */ ++ void __iomem *reg_control; ++ u32 qcontrol; /* Cached control register */ ++ ++ /* Status values from job */ ++ u32 int_status; ++ u32 q_status; ++ u32 q_int_status; ++ u32 cmd_error; ++ atomic_t dridx; ++ /* Interrupt wait queue */ ++ wait_queue_head_t int_queue; ++ unsigned int int_rcvd; ++ ++ wait_queue_head_t q_space; ++ unsigned int q_space_available; ++ ++ /* Queue Statistics */ ++ unsigned long total_pt_ops; ++ atomic64_t q_cmd_count; ++ atomic_t tail_wi; ++ volatile unsigned long desc_id_counter; ++ struct pt_tasklet_data tdata; ++ ++ struct list_head cmd; ++ spinlock_t cmd_lock ____cacheline_aligned; ++ spinlock_t cmd_control ____cacheline_aligned; ++ struct mutex cmd_mutex; ++} ____cacheline_aligned; ++ ++struct pt_device { ++ struct list_head entry; ++ ++ unsigned int ord; ++ char name[MAX_PT_NAME_LEN]; ++ ++ struct device *dev; ++ ++ /* Bus specific device information */ ++ struct pt_msix *pt_msix; ++ ++ struct pt_dev_vdata *dev_vdata; ++ ++ unsigned int pt_irq[MAX_HW_QUEUES]; ++ ++ /* I/O area used for device communication */ ++ void __iomem *io_regs; ++ ++ spinlock_t cmd_lock ____cacheline_aligned; ++ unsigned int cmd_count; ++ struct list_head cmd; ++ ++ /* ++ * The command queue. This represent the queue available on the ++ * PTDMA that are available for processing cmds ++ */ ++ struct pt_cmd_queue cmd_q[MAX_HW_QUEUES]; ++ unsigned int cmd_q_count; ++ ++ /* Support for the DMA Engine capabilities */ ++ struct dma_device dma_dev; ++ struct pt_dma_chan *pt_dma_chan; ++ struct kmem_cache *dma_cmd_cache; ++ struct kmem_cache *dma_desc_cache; ++ ++ wait_queue_head_t lsb_queue; ++ ++ /* Device Statistics */ ++ volatile unsigned long current_interrupts; ++ volatile unsigned long total_interrupts; ++ ++}; ++ ++/* ++ * descriptor for PTDMA commands ++ * 8 32-bit words: ++ * word 0: function; engine; control bits ++ * word 1: length of source data ++ * word 2: low 32 bits of source pointer ++ * word 3: upper 16 bits of source pointer; source memory type ++ * word 4: low 32 bits of destination pointer ++ * word 5: upper 16 bits of destination pointer; destination memory type ++ * word 6: reserved 32 bits ++ * word 7: reserved 32 bits ++ */ ++ ++#define DWORD0_SOC BIT(0) ++#define DWORD0_IOC BIT(1) ++#define DWORD0_SOM BIT(3) ++#define DWORD0_EOM BIT(4) ++#define DWORD0_DMT GENMASK(5, 4) ++#define DWORD0_SMT GENMASK(7, 6) ++ ++#define DWORD0_DMT_MEM 0x0 ++#define DWORD0_DMT_IO 1<<4 ++#define DWORD0_SMT_MEM 0x0 ++#define DWORD0_SMT_IO 1<<6 ++ ++union dwou { ++ u32 dw0; ++ struct dword0 { ++ u8 byte0; ++ u8 byte1; ++ u16 timestamp; ++ } dws; ++}; ++ ++struct dword1 { ++ u8 status; ++ u8 err_code; ++ u16 desc_id; ++}; ++ ++struct ptdma_desc { ++ union dwou dwouv; ++ struct dword1 dw1; ++ u32 length; ++ struct dword1 uu; ++ u32 src_hi; ++ u32 src_lo; ++ u32 dst_hi; ++ u32 dst_lo; ++}; ++ ++struct desc_work { ++ struct ptdma_desc *desc; ++ struct pt_cmd_queue *cmd_q; ++ struct list_head entry; ++ bool submitted; ++ bool processed; ++ bool init; ++ struct work_struct work; ++}; ++ ++/* Structure to hold PT device data */ ++struct pt_dev_vdata { ++ const unsigned int bar; ++}; ++ ++int pt_dmaengine_register(struct pt_device *pt); ++void pt_dmaengine_unregister(struct pt_device *pt); ++ ++int pt_core_init(struct pt_device *pt); ++void pt_core_destroy(struct pt_device *pt); ++ ++int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, ++ struct pt_passthru_engine *pt_engine); ++ ++void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q); ++void pt_start_queue(struct pt_cmd_queue *cmd_q); ++void pt_stop_queue(struct pt_cmd_queue *cmd_q); ++ ++static inline void pt_core_disable_queue_interrupts(struct pt_device *pt, struct pt_cmd_queue *cmd_q) ++ ++{ ++ ++ u32 status; ++ status = ioread32(cmd_q->reg_control); ++ status &= ~0x7; ++ iowrite32(status, cmd_q->reg_control); ++} ++ ++static inline void pt_core_enable_queue_interrupts(struct pt_device *pt, struct pt_cmd_queue *cmd_q) ++ ++{ ++ ++ u32 status; ++ status = ioread32(cmd_q->reg_control); ++ status |= 0x7; ++ iowrite32(status, cmd_q->reg_control); ++} ++ ++static inline bool pt_core_queue_full(struct pt_device *pt, struct pt_cmd_queue *cmd_q) ++{ ++ u32 q_sts = ioread32(cmd_q->reg_control + 0x4) & 0x06; ++ ++ u32 rear_ri = ioread32(cmd_q->reg_control + 0x0C); ++ u32 front_wi = ioread32(cmd_q->reg_control + 0x10); ++ ++ q_sts >>= 1; ++ ++ if ( ((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN-1) ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++static inline bool pt_core_queue_empty(struct pt_device *pt, struct pt_cmd_queue *cmd_q) ++{ ++ u32 rear_ri = ioread32(cmd_q->reg_control + 0x0C); ++ u32 front_wi = ioread32(cmd_q->reg_control + 0x10); ++ ++ if (front_wi == rear_ri) ++ return true; ++ ++ return false; ++} ++#endif +-- +2.34.1 + |