aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vdpa
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vdpa')
-rw-r--r--drivers/vdpa/Kconfig30
-rw-r--r--drivers/vdpa/Makefile1
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.c32
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_base.h10
-rw-r--r--drivers/vdpa/ifcvf/ifcvf_main.c164
-rw-r--r--drivers/vdpa/mlx5/Makefile2
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h6
-rw-r--r--drivers/vdpa/mlx5/core/mr.c47
-rw-r--r--drivers/vdpa/mlx5/core/resources.c3
-rw-r--r--drivers/vdpa/mlx5/net/debug.c152
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c600
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.h94
-rw-r--r--drivers/vdpa/solidrun/Makefile7
-rw-r--r--drivers/vdpa/solidrun/snet_ctrl.c330
-rw-r--r--drivers/vdpa/solidrun/snet_hwmon.c188
-rw-r--r--drivers/vdpa/solidrun/snet_main.c1117
-rw-r--r--drivers/vdpa/solidrun/snet_vdpa.h208
-rw-r--r--drivers/vdpa/vdpa.c121
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.c391
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim.h21
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_blk.c96
-rw-r--r--drivers/vdpa/vdpa_sim/vdpa_sim_net.c265
-rw-r--r--drivers/vdpa/vdpa_user/iova_domain.c2
-rw-r--r--drivers/vdpa/vdpa_user/iova_domain.h1
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c424
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c4
26 files changed, 3624 insertions, 692 deletions
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig
index 50f45d037611..cd6ad92f3f05 100644
--- a/drivers/vdpa/Kconfig
+++ b/drivers/vdpa/Kconfig
@@ -71,6 +71,18 @@ config MLX5_VDPA_NET
be executed by the hardware. It also supports a variety of stateless
offloads depending on the actual device used and firmware version.
+config MLX5_VDPA_STEERING_DEBUG
+ bool "expose steering counters on debugfs"
+ select MLX5_VDPA
+ help
+ Expose RX steering counters in debugfs to aid in debugging. For each VLAN
+ or non VLAN interface, two hardware counters are added to the RX flow
+ table: one for unicast and one for multicast.
+ The counters counts the number of packets and bytes and exposes them in
+ debugfs. Once can read the counters using, e.g.:
+ cat /sys/kernel/debug/mlx5/mlx5_core.sf.1/vdpa-0/rx/untagged/ucast/packets
+ cat /sys/kernel/debug/mlx5/mlx5_core.sf.1/vdpa-0/rx/untagged/mcast/bytes
+
config VP_VDPA
tristate "Virtio PCI bridge vDPA driver"
select VIRTIO_PCI_LIB
@@ -86,4 +98,22 @@ config ALIBABA_ENI_VDPA
VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon
virtio 0.9.5 specification.
+ config SNET_VDPA
+ tristate "SolidRun's vDPA driver for SolidNET"
+ depends on PCI_MSI && PCI_IOV && (HWMON || HWMON=n)
+
+ # This driver MAY create a HWMON device.
+ # Depending on (HWMON || HWMON=n) ensures that:
+ # If HWMON=n the driver can be compiled either as a module or built-in.
+ # If HWMON=y the driver can be compiled either as a module or built-in.
+ # If HWMON=m the driver is forced to be compiled as a module.
+ # By doing so, IS_ENABLED can be used instead of IS_REACHABLE
+
+ help
+ vDPA driver for SolidNET DPU.
+ With this driver, the VirtIO dataplane can be
+ offloaded to a SolidNET DPU.
+ This driver includes a HW monitor device that
+ reads health values from the DPU.
+
endif # VDPA
diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 15665563a7f4..59396ff2a318 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
obj-$(CONFIG_VP_VDPA) += virtio_pci/
obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/
+obj-$(CONFIG_SNET_VDPA) += solidrun/
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 3e4486bfa0b7..5563b3a773c7 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -10,11 +10,6 @@
#include "ifcvf_base.h"
-struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
-{
- return container_of(hw, struct ifcvf_adapter, vf);
-}
-
u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
@@ -37,8 +32,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
struct virtio_pci_cap *cap)
{
- struct ifcvf_adapter *ifcvf;
- struct pci_dev *pdev;
u32 length, offset;
u8 bar;
@@ -46,17 +39,14 @@ static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
offset = le32_to_cpu(cap->offset);
bar = cap->bar;
- ifcvf= vf_to_adapter(hw);
- pdev = ifcvf->pdev;
-
if (bar >= IFCVF_PCI_MAX_RESOURCE) {
- IFCVF_DBG(pdev,
+ IFCVF_DBG(hw->pdev,
"Invalid bar number %u to get capabilities\n", bar);
return NULL;
}
- if (offset + length > pci_resource_len(pdev, bar)) {
- IFCVF_DBG(pdev,
+ if (offset + length > pci_resource_len(hw->pdev, bar)) {
+ IFCVF_DBG(hw->pdev,
"offset(%u) + len(%u) overflows bar%u's capability\n",
offset, length, bar);
return NULL;
@@ -92,6 +82,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
IFCVF_ERR(pdev, "Failed to read PCI capability list\n");
return -EIO;
}
+ hw->pdev = pdev;
while (pos) {
ret = ifcvf_read_config_range(pdev, (u32 *)&cap,
@@ -215,15 +206,13 @@ u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
u64 ifcvf_get_features(struct ifcvf_hw *hw)
{
- return hw->hw_features;
+ return hw->dev_features;
}
int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
{
- struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
-
if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
- IFCVF_ERR(ifcvf->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
+ IFCVF_ERR(hw->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
return -EINVAL;
}
@@ -232,13 +221,11 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
{
- struct ifcvf_adapter *adapter;
u32 net_config_size = sizeof(struct virtio_net_config);
u32 blk_config_size = sizeof(struct virtio_blk_config);
u32 cap_size = hw->cap_dev_config_size;
u32 config_size;
- adapter = vf_to_adapter(hw);
/* If the onboard device config space size is greater than
* the size of struct virtio_net/blk_config, only the spec
* implementing contents size is returned, this is very
@@ -253,7 +240,7 @@ u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
break;
default:
config_size = 0;
- IFCVF_ERR(adapter->pdev, "VIRTIO ID %u not supported\n", hw->dev_type);
+ IFCVF_ERR(hw->pdev, "VIRTIO ID %u not supported\n", hw->dev_type);
}
return config_size;
@@ -301,14 +288,11 @@ static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features)
static int ifcvf_config_features(struct ifcvf_hw *hw)
{
- struct ifcvf_adapter *ifcvf;
-
- ifcvf = vf_to_adapter(hw);
ifcvf_set_features(hw, hw->req_features);
ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK);
if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) {
- IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n");
+ IFCVF_ERR(hw->pdev, "Failed to set FEATURES_OK status\n");
return -EIO;
}
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index f5563f665cc6..c20d1c40214e 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -19,6 +19,7 @@
#include <uapi/linux/virtio_blk.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h>
+#include <uapi/linux/vdpa.h>
#define N3000_DEVICE_ID 0x1041
#define N3000_SUBSYS_DEVICE_ID 0x001A
@@ -38,9 +39,6 @@
#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__)
#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__)
-#define ifcvf_private_to_vf(adapter) \
- (&((struct ifcvf_adapter *)adapter)->vf)
-
/* all vqs and config interrupt has its own vector */
#define MSIX_VECTOR_PER_VQ_AND_CONFIG 1
/* all vqs share a vector, and config interrupt has a separate vector */
@@ -78,6 +76,8 @@ struct ifcvf_hw {
u32 dev_type;
u64 req_features;
u64 hw_features;
+ /* provisioned device features */
+ u64 dev_features;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *dev_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES];
@@ -89,12 +89,13 @@ struct ifcvf_hw {
u16 nr_vring;
/* VIRTIO_PCI_CAP_DEVICE_CFG size */
u32 cap_dev_config_size;
+ struct pci_dev *pdev;
};
struct ifcvf_adapter {
struct vdpa_device vdpa;
struct pci_dev *pdev;
- struct ifcvf_hw vf;
+ struct ifcvf_hw *vf;
};
struct ifcvf_vring_lm_cfg {
@@ -109,6 +110,7 @@ struct ifcvf_lm_cfg {
struct ifcvf_vdpa_mgmt_dev {
struct vdpa_mgmt_dev mdev;
+ struct ifcvf_hw vf;
struct ifcvf_adapter *adapter;
struct pci_dev *pdev;
};
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index f9c0044c6442..7f78c47e40d6 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -69,10 +69,9 @@ static void ifcvf_free_irq_vectors(void *data)
pci_free_irq_vectors(data);
}
-static void ifcvf_free_per_vq_irq(struct ifcvf_adapter *adapter)
+static void ifcvf_free_per_vq_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int i;
for (i = 0; i < vf->nr_vring; i++) {
@@ -83,10 +82,9 @@ static void ifcvf_free_per_vq_irq(struct ifcvf_adapter *adapter)
}
}
-static void ifcvf_free_vqs_reused_irq(struct ifcvf_adapter *adapter)
+static void ifcvf_free_vqs_reused_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
if (vf->vqs_reused_irq != -EINVAL) {
devm_free_irq(&pdev->dev, vf->vqs_reused_irq, vf);
@@ -95,20 +93,17 @@ static void ifcvf_free_vqs_reused_irq(struct ifcvf_adapter *adapter)
}
-static void ifcvf_free_vq_irq(struct ifcvf_adapter *adapter)
+static void ifcvf_free_vq_irq(struct ifcvf_hw *vf)
{
- struct ifcvf_hw *vf = &adapter->vf;
-
if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG)
- ifcvf_free_per_vq_irq(adapter);
+ ifcvf_free_per_vq_irq(vf);
else
- ifcvf_free_vqs_reused_irq(adapter);
+ ifcvf_free_vqs_reused_irq(vf);
}
-static void ifcvf_free_config_irq(struct ifcvf_adapter *adapter)
+static void ifcvf_free_config_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
if (vf->config_irq == -EINVAL)
return;
@@ -123,12 +118,12 @@ static void ifcvf_free_config_irq(struct ifcvf_adapter *adapter)
}
}
-static void ifcvf_free_irq(struct ifcvf_adapter *adapter)
+static void ifcvf_free_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
+ struct pci_dev *pdev = vf->pdev;
- ifcvf_free_vq_irq(adapter);
- ifcvf_free_config_irq(adapter);
+ ifcvf_free_vq_irq(vf);
+ ifcvf_free_config_irq(vf);
ifcvf_free_irq_vectors(pdev);
}
@@ -137,10 +132,9 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter)
* It returns the number of allocated vectors, negative
* return value when fails.
*/
-static int ifcvf_alloc_vectors(struct ifcvf_adapter *adapter)
+static int ifcvf_alloc_vectors(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int max_intr, ret;
/* all queues and config interrupt */
@@ -160,10 +154,9 @@ static int ifcvf_alloc_vectors(struct ifcvf_adapter *adapter)
return ret;
}
-static int ifcvf_request_per_vq_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_per_vq_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int i, vector, ret, irq;
vf->vqs_reused_irq = -EINVAL;
@@ -190,15 +183,14 @@ static int ifcvf_request_per_vq_irq(struct ifcvf_adapter *adapter)
return 0;
err:
- ifcvf_free_irq(adapter);
+ ifcvf_free_irq(vf);
return -EFAULT;
}
-static int ifcvf_request_vqs_reused_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_vqs_reused_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int i, vector, ret, irq;
vector = 0;
@@ -224,15 +216,14 @@ static int ifcvf_request_vqs_reused_irq(struct ifcvf_adapter *adapter)
return 0;
err:
- ifcvf_free_irq(adapter);
+ ifcvf_free_irq(vf);
return -EFAULT;
}
-static int ifcvf_request_dev_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_dev_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int i, vector, ret, irq;
vector = 0;
@@ -265,29 +256,27 @@ static int ifcvf_request_dev_irq(struct ifcvf_adapter *adapter)
return 0;
err:
- ifcvf_free_irq(adapter);
+ ifcvf_free_irq(vf);
return -EFAULT;
}
-static int ifcvf_request_vq_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_vq_irq(struct ifcvf_hw *vf)
{
- struct ifcvf_hw *vf = &adapter->vf;
int ret;
if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG)
- ret = ifcvf_request_per_vq_irq(adapter);
+ ret = ifcvf_request_per_vq_irq(vf);
else
- ret = ifcvf_request_vqs_reused_irq(adapter);
+ ret = ifcvf_request_vqs_reused_irq(vf);
return ret;
}
-static int ifcvf_request_config_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_config_irq(struct ifcvf_hw *vf)
{
- struct pci_dev *pdev = adapter->pdev;
- struct ifcvf_hw *vf = &adapter->vf;
+ struct pci_dev *pdev = vf->pdev;
int config_vector, ret;
if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG)
@@ -320,17 +309,16 @@ static int ifcvf_request_config_irq(struct ifcvf_adapter *adapter)
return 0;
err:
- ifcvf_free_irq(adapter);
+ ifcvf_free_irq(vf);
return -EFAULT;
}
-static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
+static int ifcvf_request_irq(struct ifcvf_hw *vf)
{
- struct ifcvf_hw *vf = &adapter->vf;
int nvectors, ret, max_intr;
- nvectors = ifcvf_alloc_vectors(adapter);
+ nvectors = ifcvf_alloc_vectors(vf);
if (nvectors <= 0)
return -EFAULT;
@@ -341,16 +329,16 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
if (nvectors == 1) {
vf->msix_vector_status = MSIX_VECTOR_DEV_SHARED;
- ret = ifcvf_request_dev_irq(adapter);
+ ret = ifcvf_request_dev_irq(vf);
return ret;
}
- ret = ifcvf_request_vq_irq(adapter);
+ ret = ifcvf_request_vq_irq(vf);
if (ret)
return ret;
- ret = ifcvf_request_config_irq(adapter);
+ ret = ifcvf_request_config_irq(vf);
if (ret)
return ret;
@@ -358,9 +346,9 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
return 0;
}
-static int ifcvf_start_datapath(void *private)
+static int ifcvf_start_datapath(struct ifcvf_adapter *adapter)
{
- struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
+ struct ifcvf_hw *vf = adapter->vf;
u8 status;
int ret;
@@ -374,9 +362,9 @@ static int ifcvf_start_datapath(void *private)
return ret;
}
-static int ifcvf_stop_datapath(void *private)
+static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
{
- struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
+ struct ifcvf_hw *vf = adapter->vf;
int i;
for (i = 0; i < vf->nr_vring; i++)
@@ -389,7 +377,7 @@ static int ifcvf_stop_datapath(void *private)
static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
{
- struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
+ struct ifcvf_hw *vf = adapter->vf;
int i;
for (i = 0; i < vf->nr_vring; i++) {
@@ -414,7 +402,7 @@ static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev)
{
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
- return &adapter->vf;
+ return adapter->vf;
}
static u64 ifcvf_vdpa_get_device_features(struct vdpa_device *vdpa_dev)
@@ -479,7 +467,7 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
!(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
- ret = ifcvf_request_irq(adapter);
+ ret = ifcvf_request_irq(vf);
if (ret) {
status = ifcvf_get_status(vf);
status |= VIRTIO_CONFIG_S_FAILED;
@@ -511,7 +499,7 @@ static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
ifcvf_stop_datapath(adapter);
- ifcvf_free_irq(adapter);
+ ifcvf_free_irq(vf);
}
ifcvf_reset_vring(adapter);
@@ -755,17 +743,37 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
struct vdpa_device *vdpa_dev;
struct pci_dev *pdev;
struct ifcvf_hw *vf;
+ u64 device_features;
int ret;
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
- if (!ifcvf_mgmt_dev->adapter)
- return -EOPNOTSUPP;
+ vf = &ifcvf_mgmt_dev->vf;
+ pdev = vf->pdev;
+ adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
+ &pdev->dev, &ifc_vdpa_ops, 1, 1, NULL, false);
+ if (IS_ERR(adapter)) {
+ IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
+ return PTR_ERR(adapter);
+ }
- adapter = ifcvf_mgmt_dev->adapter;
- vf = &adapter->vf;
- pdev = adapter->pdev;
+ ifcvf_mgmt_dev->adapter = adapter;
+ adapter->pdev = pdev;
+ adapter->vdpa.dma_dev = &pdev->dev;
+ adapter->vdpa.mdev = mdev;
+ adapter->vf = vf;
vdpa_dev = &adapter->vdpa;
+ device_features = vf->hw_features;
+ if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
+ if (config->device_features & ~device_features) {
+ IFCVF_ERR(pdev, "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
+ config->device_features, device_features);
+ return -EINVAL;
+ }
+ device_features &= config->device_features;
+ }
+ vf->dev_features = device_features;
+
if (name)
ret = dev_set_name(&vdpa_dev->dev, "%s", name);
else
@@ -781,7 +789,6 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
return 0;
}
-
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
@@ -800,7 +807,6 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct device *dev = &pdev->dev;
- struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
u32 dev_type;
int ret, i;
@@ -831,25 +837,21 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
pci_set_master(pdev);
-
- adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
- dev, &ifc_vdpa_ops, 1, 1, NULL, false);
- if (IS_ERR(adapter)) {
- IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
- return PTR_ERR(adapter);
+ ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
+ if (!ifcvf_mgmt_dev) {
+ IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
+ return -ENOMEM;
}
- vf = &adapter->vf;
+ vf = &ifcvf_mgmt_dev->vf;
vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
-
- adapter->pdev = pdev;
- adapter->vdpa.dma_dev = &pdev->dev;
+ vf->pdev = pdev;
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
- return ret;
+ goto err;
}
for (i = 0; i < vf->nr_vring; i++)
@@ -858,16 +860,6 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
- ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
- if (!ifcvf_mgmt_dev) {
- IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
- return -ENOMEM;
- }
-
- ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
- ifcvf_mgmt_dev->mdev.device = dev;
- ifcvf_mgmt_dev->adapter = adapter;
-
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
@@ -882,11 +874,11 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err;
}
+ ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
+ ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
-
- adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
-
+ ifcvf_mgmt_dev->mdev.config_attr_mask = (1 << VDPA_ATTR_DEV_FEATURES);
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
if (ret) {
diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
index f717978c83bf..e791394c33e3 100644
--- a/drivers/vdpa/mlx5/Makefile
+++ b/drivers/vdpa/mlx5/Makefile
@@ -1,4 +1,4 @@
subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core
obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o
-mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o
+mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/mlx5_vnet.o core/resources.o core/mr.o net/debug.o
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 6af9fdbb86b7..25fc4120b618 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -96,6 +96,7 @@ struct mlx5_vdpa_dev {
struct mlx5_control_vq cvq;
struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
+ bool suspended;
};
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
@@ -116,8 +117,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
int inlen);
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map);
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb);
+ bool *change_map, unsigned int asid);
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid);
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
#define mlx5_vdpa_warn(__dev, format, ...) \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index a639b9208d41..03e543229791 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -311,7 +311,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
u64 st;
u64 sz;
int err;
- int i = 0;
st = start;
while (size) {
@@ -336,7 +335,6 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
mr->num_directs++;
mr->num_klms++;
st += sz;
- i++;
}
list_splice_tail(&tmp, &mr->head);
return 0;
@@ -505,13 +503,13 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
else
destroy_dma_mr(mvdev, mr);
- memset(mr, 0, sizeof(*mr));
mr->initialized = false;
out:
mutex_unlock(&mr->mkey_mtx);
}
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb, unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err;
@@ -519,42 +517,49 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
if (mr->initialized)
return 0;
- if (iotlb)
- err = create_user_mr(mvdev, iotlb);
- else
- err = create_dma_mr(mvdev, mr);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+ if (iotlb)
+ err = create_user_mr(mvdev, iotlb);
+ else
+ err = create_dma_mr(mvdev, mr);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
- err = dup_iotlb(mvdev, iotlb);
- if (err)
- goto out_err;
+ if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
+ err = dup_iotlb(mvdev, iotlb);
+ if (err)
+ goto out_err;
+ }
mr->initialized = true;
return 0;
out_err:
- if (iotlb)
- destroy_user_mr(mvdev, mr);
- else
- destroy_dma_mr(mvdev, mr);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+ if (iotlb)
+ destroy_user_mr(mvdev, mr);
+ else
+ destroy_dma_mr(mvdev, mr);
+ }
return err;
}
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid)
{
int err;
mutex_lock(&mvdev->mr.mkey_mtx);
- err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
mutex_unlock(&mvdev->mr.mkey_mtx);
return err;
}
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
- bool *change_map)
+ bool *change_map, unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err = 0;
@@ -566,7 +571,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
*change_map = true;
}
if (!*change_map)
- err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
mutex_unlock(&mr->mkey_mtx);
return err;
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 9800f9bec225..d5a59c9035fb 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -213,7 +213,7 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
return err;
mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
- *mkey |= mlx5_idx_to_mkey(mkey_index);
+ *mkey = mlx5_idx_to_mkey(mkey_index);
return 0;
}
@@ -233,6 +233,7 @@ static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
if (!mvdev->cvq.iotlb)
return -ENOMEM;
+ spin_lock_init(&mvdev->cvq.iommu_lock);
vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
return 0;
diff --git a/drivers/vdpa/mlx5/net/debug.c b/drivers/vdpa/mlx5/net/debug.c
new file mode 100644
index 000000000000..60d6ac68cdc4
--- /dev/null
+++ b/drivers/vdpa/mlx5/net/debug.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_vnet.h"
+
+static int tirn_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_net *ndev = file->private;
+
+ seq_printf(file, "0x%x\n", ndev->res.tirn);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(tirn);
+
+void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev)
+{
+ if (ndev->debugfs)
+ debugfs_remove(ndev->res.tirn_dent);
+}
+
+void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev)
+{
+ ndev->res.tirn_dent = debugfs_create_file("tirn", 0444, ndev->rx_dent,
+ ndev, &tirn_fops);
+}
+
+static int rx_flow_table_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_net *ndev = file->private;
+
+ seq_printf(file, "0x%x\n", mlx5_flow_table_id(ndev->rxft));
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(rx_flow_table);
+
+void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev)
+{
+ if (ndev->debugfs)
+ debugfs_remove(ndev->rx_table_dent);
+}
+
+void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev)
+{
+ ndev->rx_table_dent = debugfs_create_file("table_id", 0444, ndev->rx_dent,
+ ndev, &rx_flow_table_fops);
+}
+
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+static int packets_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_counter *counter = file->private;
+ u64 packets;
+ u64 bytes;
+ int err;
+
+ err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes);
+ if (err)
+ return err;
+
+ seq_printf(file, "0x%llx\n", packets);
+ return 0;
+}
+
+static int bytes_show(struct seq_file *file, void *priv)
+{
+ struct mlx5_vdpa_counter *counter = file->private;
+ u64 packets;
+ u64 bytes;
+ int err;
+
+ err = mlx5_fc_query(counter->mdev, counter->counter, &packets, &bytes);
+ if (err)
+ return err;
+
+ seq_printf(file, "0x%llx\n", bytes);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(packets);
+DEFINE_SHOW_ATTRIBUTE(bytes);
+
+static void add_counter_node(struct mlx5_vdpa_counter *counter,
+ struct dentry *parent)
+{
+ debugfs_create_file("packets", 0444, parent, counter,
+ &packets_fops);
+ debugfs_create_file("bytes", 0444, parent, counter,
+ &bytes_fops);
+}
+
+void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+ static const char *ut = "untagged";
+ char vidstr[9];
+ u16 vid;
+
+ node->ucast_counter.mdev = ndev->mvdev.mdev;
+ node->mcast_counter.mdev = ndev->mvdev.mdev;
+ if (node->tagged) {
+ vid = key2vid(node->macvlan);
+ snprintf(vidstr, sizeof(vidstr), "0x%x", vid);
+ } else {
+ strcpy(vidstr, ut);
+ }
+
+ node->dent = debugfs_create_dir(vidstr, ndev->rx_dent);
+ if (IS_ERR(node->dent)) {
+ node->dent = NULL;
+ return;
+ }
+
+ node->ucast_counter.dent = debugfs_create_dir("ucast", node->dent);
+ if (IS_ERR(node->ucast_counter.dent))
+ return;
+
+ add_counter_node(&node->ucast_counter, node->ucast_counter.dent);
+
+ node->mcast_counter.dent = debugfs_create_dir("mcast", node->dent);
+ if (IS_ERR(node->mcast_counter.dent))
+ return;
+
+ add_counter_node(&node->mcast_counter, node->mcast_counter.dent);
+}
+
+void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+ if (node->dent && ndev->debugfs)
+ debugfs_remove_recursive(node->dent);
+}
+#endif
+
+void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = ndev->mvdev.mdev;
+ ndev->debugfs = debugfs_create_dir(dev_name(&ndev->mvdev.vdev.dev),
+ mlx5_debugfs_get_dev_root(mdev));
+ if (!IS_ERR(ndev->debugfs))
+ ndev->rx_dent = debugfs_create_dir("rx", ndev->debugfs);
+}
+
+void mlx5_vdpa_remove_debugfs(struct dentry *dbg)
+{
+ debugfs_remove_recursive(dbg);
+}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 90913365def4..279ac6a558d2 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -18,15 +18,12 @@
#include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/mpfs.h>
#include "mlx5_vdpa.h"
+#include "mlx5_vnet.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox VDPA driver");
MODULE_LICENSE("Dual BSD/GPL");
-#define to_mlx5_vdpa_ndev(__mvdev) \
- container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
-#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
-
#define VALID_FEATURES_MASK \
(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
@@ -50,14 +47,6 @@ MODULE_LICENSE("Dual BSD/GPL");
#define MLX5V_UNTAGGED 0x1000
-struct mlx5_vdpa_net_resources {
- u32 tisn;
- u32 tdn;
- u32 tirn;
- u32 rqtn;
- bool valid;
-};
-
struct mlx5_vdpa_cq_buf {
struct mlx5_frag_buf_ctrl fbc;
struct mlx5_frag_buf frag_buf;
@@ -146,38 +135,6 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
return idx <= mvdev->max_idx;
}
-#define MLX5V_MACVLAN_SIZE 256
-
-struct mlx5_vdpa_net {
- struct mlx5_vdpa_dev mvdev;
- struct mlx5_vdpa_net_resources res;
- struct virtio_net_config config;
- struct mlx5_vdpa_virtqueue *vqs;
- struct vdpa_callback *event_cbs;
-
- /* Serialize vq resources creation and destruction. This is required
- * since memory map might change and we need to destroy and create
- * resources while driver in operational.
- */
- struct rw_semaphore reslock;
- struct mlx5_flow_table *rxft;
- bool setup;
- u32 cur_num_vqs;
- u32 rqt_size;
- bool nb_registered;
- struct notifier_block nb;
- struct vdpa_callback config_cb;
- struct mlx5_vdpa_wq_ent cvq_ent;
- struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
-};
-
-struct macvlan_node {
- struct hlist_node hlist;
- struct mlx5_flow_handle *ucast_rule;
- struct mlx5_flow_handle *mcast_rule;
- u64 macvlan;
-};
-
static void free_resources(struct mlx5_vdpa_net *ndev);
static void init_mvqs(struct mlx5_vdpa_net *ndev);
static int setup_driver(struct mlx5_vdpa_dev *mvdev);
@@ -821,12 +778,28 @@ static bool vq_is_tx(u16 idx)
return idx % 2;
}
-static u16 get_features_12_3(u64 features)
+enum {
+ MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
+ MLX5_VIRTIO_NET_F_HOST_ECN = 4,
+ MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
+ MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
+ MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
+ MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
+ MLX5_VIRTIO_NET_F_CSUM = 10,
+ MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
+ MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
+};
+
+static u16 get_features(u64 features)
{
- return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
- (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
+ return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
+ (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}
static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
@@ -840,6 +813,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
void *obj_context;
+ u16 mlx_features;
void *cmd_hdr;
void *vq_ctx;
void *in;
@@ -855,6 +829,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
goto err_alloc;
}
+ mlx_features = get_features(ndev->mvdev.actual_features);
cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
@@ -865,7 +840,9 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
- get_features_12_3(ndev->mvdev.actual_features));
+ mlx_features >> 3);
+ MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
+ mlx_features & 7);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
@@ -1431,36 +1408,85 @@ static int create_tir(struct mlx5_vdpa_net *ndev)
err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
kfree(in);
+ if (err)
+ return err;
+
+ mlx5_vdpa_add_tirn(ndev);
return err;
}
static void destroy_tir(struct mlx5_vdpa_net *ndev)
{
+ mlx5_vdpa_remove_tirn(ndev);
mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
}
#define MAX_STEERING_ENT 0x8000
#define MAX_STEERING_GROUPS 2
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ #define NUM_DESTS 2
+#else
+ #define NUM_DESTS 1
+#endif
+
+static int add_steering_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dests)
+{
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ int err;
+
+ node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
+ if (IS_ERR(node->ucast_counter.counter))
+ return PTR_ERR(node->ucast_counter.counter);
+
+ node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
+ if (IS_ERR(node->mcast_counter.counter)) {
+ err = PTR_ERR(node->mcast_counter.counter);
+ goto err_mcast_counter;
+ }
+
+ dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ return 0;
+
+err_mcast_counter:
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
+ return err;
+#else
+ return 0;
+#endif
+}
+
+static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node)
+{
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
+ mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
+#endif
+}
+
static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
- u16 vid, bool tagged,
- struct mlx5_flow_handle **ucast,
- struct mlx5_flow_handle **mcast)
+ struct macvlan_node *node)
{
- struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_destination dests[NUM_DESTS] = {};
struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
void *headers_c;
void *headers_v;
u8 *dmac_c;
u8 *dmac_v;
int err;
+ u16 vid;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
+ vid = key2vid(node->macvlan);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
@@ -1468,46 +1494,62 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
- if (tagged) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+ if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid);
+ }
+ if (node->tagged) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = ndev->res.tirn;
- rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
- if (IS_ERR(rule))
- return PTR_ERR(rule);
+ dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dests[0].tir_num = ndev->res.tirn;
+ err = add_steering_counters(ndev, node, &flow_act, dests);
+ if (err)
+ goto out_free;
+
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
+#endif
+ node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
+ if (IS_ERR(node->ucast_rule)) {
+ err = PTR_ERR(node->ucast_rule);
+ goto err_ucast;
+ }
- *ucast = rule;
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
+#endif
memset(dmac_c, 0, ETH_ALEN);
memset(dmac_v, 0, ETH_ALEN);
dmac_c[0] = 1;
dmac_v[0] = 1;
- rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1);
- kvfree(spec);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
+ node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
+ if (IS_ERR(node->mcast_rule)) {
+ err = PTR_ERR(node->mcast_rule);
goto err_mcast;
}
-
- *mcast = rule;
+ kvfree(spec);
+ mlx5_vdpa_add_rx_counters(ndev, node);
return 0;
err_mcast:
- mlx5_del_flow_rules(*ucast);
+ mlx5_del_flow_rules(node->ucast_rule);
+err_ucast:
+ remove_steering_counters(ndev, node);
+out_free:
+ kvfree(spec);
return err;
}
static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
- struct mlx5_flow_handle *ucast,
- struct mlx5_flow_handle *mcast)
+ struct macvlan_node *node)
{
- mlx5_del_flow_rules(ucast);
- mlx5_del_flow_rules(mcast);
+ mlx5_vdpa_remove_rx_counters(ndev, node);
+ mlx5_del_flow_rules(node->ucast_rule);
+ mlx5_del_flow_rules(node->mcast_rule);
}
static u64 search_val(u8 *mac, u16 vlan, bool tagged)
@@ -1541,14 +1583,14 @@ static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 valu
return NULL;
}
-static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid
+static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
{
struct macvlan_node *ptr;
u64 val;
u32 idx;
int err;
- val = search_val(mac, vlan, tagged);
+ val = search_val(mac, vid, tagged);
if (mac_vlan_lookup(ndev, val))
return -EEXIST;
@@ -1556,12 +1598,13 @@ static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagg
if (!ptr)
return -ENOMEM;
- err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged,
- &ptr->ucast_rule, &ptr->mcast_rule);
+ ptr->tagged = tagged;
+ ptr->macvlan = val;
+ ptr->ndev = ndev;
+ err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
if (err)
goto err_add;
- ptr->macvlan = val;
idx = hash_64(val, 8);
hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
return 0;
@@ -1580,7 +1623,8 @@ static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tag
return;
hlist_del(&ptr->hlist);
- mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule);
+ mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
+ remove_steering_counters(ndev, ptr);
kfree(ptr);
}
@@ -1593,7 +1637,8 @@ static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
hlist_del(&pos->hlist);
- mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule);
+ mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
+ remove_steering_counters(ndev, pos);
kfree(pos);
}
}
@@ -1619,6 +1664,7 @@ static int setup_steering(struct mlx5_vdpa_net *ndev)
mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
return PTR_ERR(ndev->rxft);
}
+ mlx5_vdpa_add_rx_flow_table(ndev);
err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
if (err)
@@ -1627,6 +1673,7 @@ static int setup_steering(struct mlx5_vdpa_net *ndev)
return 0;
err_add:
+ mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
return err;
}
@@ -1634,6 +1681,7 @@ err_add:
static void teardown_steering(struct mlx5_vdpa_net *ndev)
{
clear_mac_vlan_table(ndev);
+ mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
}
@@ -1684,7 +1732,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
/* Need recreate the flow table entry, so that the packet could forward back
*/
- mac_vlan_del(ndev, ndev->config.mac, 0, false);
+ mac_vlan_del(ndev, mac_back, 0, false);
if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
@@ -1821,6 +1869,9 @@ static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
size_t read;
u16 id;
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
+ return status;
+
switch (cmd) {
case VIRTIO_NET_CTRL_VLAN_ADD:
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
@@ -2140,23 +2191,27 @@ static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
return MLX5_VDPA_DATAVQ_GROUP;
}
-enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
- MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
- MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
- MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
-};
-
static u64 mlx_to_vritio_features(u16 dev_features)
{
u64 result = 0;
- if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
+ result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
+ result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
result |= BIT_ULL(VIRTIO_NET_F_CSUM);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
- if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
+ if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
return result;
@@ -2178,6 +2233,7 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev)
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
+ mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
return mlx_vdpa_features;
}
@@ -2266,6 +2322,113 @@ static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
}
}
+static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+ int err;
+
+ MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+ if (err)
+ return 0;
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
+{
+ if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
+ VPORT_STATE_UP)
+ return true;
+
+ return false;
+}
+
+static void update_carrier(struct work_struct *work)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_vdpa_net *ndev;
+
+ wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+
+ if (ndev->config_cb.callback)
+ ndev->config_cb.callback(ndev->config_cb.private);
+
+ kfree(wqent);
+}
+
+static int queue_link_work(struct mlx5_vdpa_net *ndev)
+{
+ struct mlx5_vdpa_wq_ent *wqent;
+
+ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+ if (!wqent)
+ return -ENOMEM;
+
+ wqent->mvdev = &ndev->mvdev;
+ INIT_WORK(&wqent->work, update_carrier);
+ queue_work(ndev->mvdev.wq, &wqent->work);
+ return 0;
+}
+
+static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
+{
+ struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
+ struct mlx5_eqe *eqe = param;
+ int ret = NOTIFY_DONE;
+
+ if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ if (queue_link_work(ndev))
+ return NOTIFY_DONE;
+
+ ret = NOTIFY_OK;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return ret;
+ }
+ return ret;
+}
+
+static void register_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
+ return;
+
+ ndev->nb.notifier_call = event_handler;
+ mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
+ ndev->nb_registered = true;
+ queue_link_work(ndev);
+}
+
+static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
+{
+ if (!ndev->nb_registered)
+ return;
+
+ ndev->nb_registered = false;
+ mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
+ if (ndev->mvdev.wq)
+ flush_workqueue(ndev->mvdev.wq);
+}
+
static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2389,7 +2552,8 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
}
}
-static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb, unsigned int asid)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err;
@@ -2401,11 +2565,11 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
teardown_driver(ndev);
mlx5_vdpa_destroy_mr(mvdev);
- err = mlx5_vdpa_create_mr(mvdev, iotlb);
+ err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
if (err)
goto err_mr;
- if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
goto err_mr;
restore_channels_info(ndev);
@@ -2434,10 +2598,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
err = 0;
goto out;
}
+ mlx5_vdpa_add_debugfs(ndev);
err = setup_virtqueues(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
- goto out;
+ goto err_setup;
}
err = create_rqt(ndev);
@@ -2467,6 +2632,8 @@ err_tir:
destroy_rqt(ndev);
err_rqt:
teardown_virtqueues(ndev);
+err_setup:
+ mlx5_vdpa_remove_debugfs(ndev->debugfs);
out:
return err;
}
@@ -2480,6 +2647,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev)
if (!ndev->setup)
return;
+ mlx5_vdpa_remove_debugfs(ndev->debugfs);
+ ndev->debugfs = NULL;
teardown_steering(ndev);
destroy_tir(ndev);
destroy_rqt(ndev);
@@ -2529,10 +2698,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
goto err_setup;
}
+ register_link_notifier(ndev);
err = setup_driver(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
- goto err_setup;
+ goto err_driver;
}
} else {
mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -2544,6 +2714,8 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock);
return;
+err_driver:
+ unregister_link_notifier(ndev);
err_setup:
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
@@ -2569,10 +2741,12 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
mlx5_vdpa_info(mvdev, "performing device reset\n");
down_write(&ndev->reslock);
+ unregister_link_notifier(ndev);
teardown_driver(ndev);
clear_vqs_ready(ndev);
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status = 0;
+ ndev->mvdev.suspended = false;
ndev->cur_num_vqs = 0;
ndev->mvdev.cvq.received_desc = 0;
ndev->mvdev.cvq.completed_desc = 0;
@@ -2582,7 +2756,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- if (mlx5_vdpa_create_mr(mvdev, NULL))
+ if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
mlx5_vdpa_warn(mvdev, "create MR failed\n");
}
up_write(&ndev->reslock);
@@ -2618,41 +2792,20 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation;
}
-static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
-{
- u64 start = 0ULL, last = 0ULL - 1;
- struct vhost_iotlb_map *map;
- int err = 0;
-
- spin_lock(&mvdev->cvq.iommu_lock);
- vhost_iotlb_reset(mvdev->cvq.iotlb);
-
- for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
- map = vhost_iotlb_itree_next(map, start, last)) {
- err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
- map->last, map->addr, map->perm);
- if (err)
- goto out;
- }
-
-out:
- spin_unlock(&mvdev->cvq.iommu_lock);
- return err;
-}
-
-static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
+ unsigned int asid)
{
bool change_map;
int err;
- err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
+ err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
return err;
}
if (change_map)
- err = mlx5_vdpa_change_map(mvdev, iotlb);
+ err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
return err;
}
@@ -2665,20 +2818,21 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
int err = -EINVAL;
down_write(&ndev->reslock);
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- err = set_map_data(mvdev, iotlb);
- if (err)
- goto out;
- }
-
- if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
- err = set_map_control(mvdev, iotlb);
-
-out:
+ err = set_map_data(mvdev, iotlb, asid);
up_write(&ndev->reslock);
return err;
}
+static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ return &vdev->dev;
+
+ return mvdev->vdev.dma_dev;
+}
+
static void mlx5_vdpa_free(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -2839,15 +2993,16 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
struct mlx5_vdpa_virtqueue *mvq;
int i;
+ mlx5_vdpa_info(mvdev, "suspending device\n");
+
down_write(&ndev->reslock);
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- ndev->nb_registered = false;
- flush_workqueue(ndev->mvdev.wq);
+ unregister_link_notifier(ndev);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
}
mlx5_vdpa_cvq_suspend(mvdev);
+ mvdev->suspended = true;
up_write(&ndev->reslock);
return 0;
}
@@ -2894,6 +3049,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map,
.set_group_asid = mlx5_set_group_asid,
+ .get_vq_dma_dev = mlx5_get_vq_dma_dev,
.free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
};
@@ -2977,90 +3133,6 @@ struct mlx5_vdpa_mgmtdev {
struct mlx5_vdpa_net *ndev;
};
-static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
- u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
- int err;
-
- MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
- MLX5_SET(query_vport_state_in, in, op_mod, opmod);
- MLX5_SET(query_vport_state_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_vport_state_in, in, other_vport, 1);
-
- err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
- if (err)
- return 0;
-
- return MLX5_GET(query_vport_state_out, out, state);
-}
-
-static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
-{
- if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
- VPORT_STATE_UP)
- return true;
-
- return false;
-}
-
-static void update_carrier(struct work_struct *work)
-{
- struct mlx5_vdpa_wq_ent *wqent;
- struct mlx5_vdpa_dev *mvdev;
- struct mlx5_vdpa_net *ndev;
-
- wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
- mvdev = wqent->mvdev;
- ndev = to_mlx5_vdpa_ndev(mvdev);
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
-
- if (ndev->config_cb.callback)
- ndev->config_cb.callback(ndev->config_cb.private);
-
- kfree(wqent);
-}
-
-static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
-{
- struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
- struct mlx5_eqe *eqe = param;
- int ret = NOTIFY_DONE;
- struct mlx5_vdpa_wq_ent *wqent;
-
- if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
- switch (eqe->sub_type) {
- case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
- case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
- down_read(&ndev->reslock);
- if (!ndev->nb_registered) {
- up_read(&ndev->reslock);
- return NOTIFY_DONE;
- }
- wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
- if (!wqent) {
- up_read(&ndev->reslock);
- return NOTIFY_DONE;
- }
-
- wqent->mvdev = &ndev->mvdev;
- INIT_WORK(&wqent->work, update_carrier);
- queue_work(ndev->mvdev.wq, &wqent->work);
- up_read(&ndev->reslock);
- ret = NOTIFY_OK;
- break;
- default:
- return NOTIFY_DONE;
- }
- return ret;
- }
- return ret;
-}
-
static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
{
int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
@@ -3092,6 +3164,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
struct mlx5_core_dev *mdev;
+ u64 device_features;
u32 max_vqs;
u16 mtu;
int err;
@@ -3100,6 +3173,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
return -ENOSPC;
mdev = mgtdev->madev->mdev;
+ device_features = mgtdev->mgtdev.supported_features;
+ if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
+ if (add_config->device_features & ~device_features) {
+ dev_warn(mdev->device,
+ "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
+ add_config->device_features, device_features);
+ return -EINVAL;
+ }
+ device_features &= add_config->device_features;
+ } else {
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
+ }
+ if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
+ device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
+ dev_warn(mdev->device,
+ "Must provision minimum features 0x%llx for this device",
+ BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
+ return -EOPNOTSUPP;
+ }
+
if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
dev_warn(mdev->device, "missing support for split virtqueues\n");
@@ -3128,7 +3221,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
if (IS_ERR(ndev))
return PTR_ERR(ndev);
- ndev->mvdev.mlx_features = mgtdev->mgtdev.supported_features;
ndev->mvdev.max_vqs = max_vqs;
mvdev = &ndev->mvdev;
mvdev->mdev = mdev;
@@ -3150,20 +3242,26 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_alloc;
}
- err = query_mtu(mdev, &mtu);
- if (err)
- goto err_alloc;
+ if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
+ err = query_mtu(mdev, &mtu);
+ if (err)
+ goto err_alloc;
- ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
+ ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
+ }
- if (get_link_state(mvdev))
- ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
- else
- ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+ if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
+ if (get_link_state(mvdev))
+ ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ else
+ ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
+ }
if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
- } else {
+ /* No bother setting mac address in config if not going to provision _F_MAC */
+ } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
+ device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
if (err)
goto err_alloc;
@@ -3174,18 +3272,33 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
err = mlx5_mpfs_add_mac(pfmdev, config->mac);
if (err)
goto err_alloc;
-
- ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
+ } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
+ /*
+ * We used to clear _F_MAC feature bit if seeing
+ * zero mac address when device features are not
+ * specifically provisioned. Keep the behaviour
+ * so old scripts do not break.
+ */
+ device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
+ } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
+ /* Don't provision zero mac address for _F_MAC */
+ mlx5_vdpa_warn(&ndev->mvdev,
+ "No mac address provisioned?\n");
+ err = -EINVAL;
+ goto err_alloc;
}
- config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
+ if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
+ config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
+
+ ndev->mvdev.mlx_features = device_features;
mvdev->vdev.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err)
goto err_mpfs;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- err = mlx5_vdpa_create_mr(mvdev, NULL);
+ err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
if (err)
goto err_res;
}
@@ -3202,9 +3315,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
goto err_res2;
}
- ndev->nb.notifier_call = event_handler;
- mlx5_notifier_register(mdev, &ndev->nb);
- ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err)
@@ -3236,14 +3346,13 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq;
- if (ndev->nb_registered) {
- mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
- ndev->nb_registered = false;
- }
+ mlx5_vdpa_remove_debugfs(ndev->debugfs);
+ ndev->debugfs = NULL;
+ unregister_link_notifier(ndev);
+ _vdpa_unregister_device(dev);
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
- _vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
@@ -3275,7 +3384,8 @@ static int mlx5v_probe(struct auxiliary_device *adev,
mgtdev->mgtdev.id_table = id_table;
mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
- BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU);
+ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
+ BIT_ULL(VDPA_ATTR_DEV_FEATURES);
mgtdev->mgtdev.max_supported_vqs =
MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
mgtdev->mgtdev.supported_features = get_supported_features(mdev);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
new file mode 100644
index 000000000000..c90a89e1de4d
--- /dev/null
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_VNET_H__
+#define __MLX5_VNET_H__
+
+#include "mlx5_vdpa.h"
+
+#define to_mlx5_vdpa_ndev(__mvdev) \
+ container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
+#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
+
+struct mlx5_vdpa_net_resources {
+ u32 tisn;
+ u32 tdn;
+ u32 tirn;
+ u32 rqtn;
+ bool valid;
+ struct dentry *tirn_dent;
+};
+
+#define MLX5V_MACVLAN_SIZE 256
+
+static inline u16 key2vid(u64 key)
+{
+ return (u16)(key >> 48) & 0xfff;
+}
+
+struct mlx5_vdpa_net {
+ struct mlx5_vdpa_dev mvdev;
+ struct mlx5_vdpa_net_resources res;
+ struct virtio_net_config config;
+ struct mlx5_vdpa_virtqueue *vqs;
+ struct vdpa_callback *event_cbs;
+
+ /* Serialize vq resources creation and destruction. This is required
+ * since memory map might change and we need to destroy and create
+ * resources while driver in operational.
+ */
+ struct rw_semaphore reslock;
+ struct mlx5_flow_table *rxft;
+ struct dentry *rx_dent;
+ struct dentry *rx_table_dent;
+ bool setup;
+ u32 cur_num_vqs;
+ u32 rqt_size;
+ bool nb_registered;
+ struct notifier_block nb;
+ struct vdpa_callback config_cb;
+ struct mlx5_vdpa_wq_ent cvq_ent;
+ struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
+ struct dentry *debugfs;
+};
+
+struct mlx5_vdpa_counter {
+ struct mlx5_fc *counter;
+ struct dentry *dent;
+ struct mlx5_core_dev *mdev;
+};
+
+struct macvlan_node {
+ struct hlist_node hlist;
+ struct mlx5_flow_handle *ucast_rule;
+ struct mlx5_flow_handle *mcast_rule;
+ u64 macvlan;
+ struct mlx5_vdpa_net *ndev;
+ bool tagged;
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+ struct dentry *dent;
+ struct mlx5_vdpa_counter ucast_counter;
+ struct mlx5_vdpa_counter mcast_counter;
+#endif
+};
+
+void mlx5_vdpa_add_debugfs(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_debugfs(struct dentry *dbg);
+void mlx5_vdpa_add_rx_flow_table(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_rx_flow_table(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_add_tirn(struct mlx5_vdpa_net *ndev);
+void mlx5_vdpa_remove_tirn(struct mlx5_vdpa_net *ndev);
+#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
+void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node);
+void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node);
+#else
+static inline void mlx5_vdpa_add_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node) {}
+static inline void mlx5_vdpa_remove_rx_counters(struct mlx5_vdpa_net *ndev,
+ struct macvlan_node *node) {}
+#endif
+
+
+#endif /* __MLX5_VNET_H__ */
diff --git a/drivers/vdpa/solidrun/Makefile b/drivers/vdpa/solidrun/Makefile
new file mode 100644
index 000000000000..9116252cd5fa
--- /dev/null
+++ b/drivers/vdpa/solidrun/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_SNET_VDPA) += snet_vdpa.o
+snet_vdpa-$(CONFIG_SNET_VDPA) += snet_main.o
+snet_vdpa-$(CONFIG_SNET_VDPA) += snet_ctrl.o
+ifdef CONFIG_HWMON
+snet_vdpa-$(CONFIG_SNET_VDPA) += snet_hwmon.o
+endif
diff --git a/drivers/vdpa/solidrun/snet_ctrl.c b/drivers/vdpa/solidrun/snet_ctrl.c
new file mode 100644
index 000000000000..3858738643b4
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_ctrl.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+
+#include <linux/iopoll.h>
+
+#include "snet_vdpa.h"
+
+enum snet_ctrl_opcodes {
+ SNET_CTRL_OP_DESTROY = 1,
+ SNET_CTRL_OP_READ_VQ_STATE,
+ SNET_CTRL_OP_SUSPEND,
+};
+
+#define SNET_CTRL_TIMEOUT 2000000
+
+#define SNET_CTRL_DATA_SIZE_MASK 0x0000FFFF
+#define SNET_CTRL_IN_PROCESS_MASK 0x00010000
+#define SNET_CTRL_CHUNK_RDY_MASK 0x00020000
+#define SNET_CTRL_ERROR_MASK 0x0FFC0000
+
+#define SNET_VAL_TO_ERR(val) (-(((val) & SNET_CTRL_ERROR_MASK) >> 18))
+#define SNET_EMPTY_CTRL(val) (((val) & SNET_CTRL_ERROR_MASK) || \
+ !((val) & SNET_CTRL_IN_PROCESS_MASK))
+#define SNET_DATA_READY(val) ((val) & (SNET_CTRL_ERROR_MASK | SNET_CTRL_CHUNK_RDY_MASK))
+
+/* Control register used to read data from the DPU */
+struct snet_ctrl_reg_ctrl {
+ /* Chunk size in 4B words */
+ u16 data_size;
+ /* We are in the middle of a command */
+ u16 in_process:1;
+ /* A data chunk is ready and can be consumed */
+ u16 chunk_ready:1;
+ /* Error code */
+ u16 error:10;
+ /* Saved for future usage */
+ u16 rsvd:4;
+};
+
+/* Opcode register */
+struct snet_ctrl_reg_op {
+ u16 opcode;
+ /* Only if VQ index is relevant for the command */
+ u16 vq_idx;
+};
+
+struct snet_ctrl_regs {
+ struct snet_ctrl_reg_op op;
+ struct snet_ctrl_reg_ctrl ctrl;
+ u32 rsvd;
+ u32 data[];
+};
+
+static struct snet_ctrl_regs __iomem *snet_get_ctrl(struct snet *snet)
+{
+ return snet->bar + snet->psnet->cfg.ctrl_off;
+}
+
+static int snet_wait_for_empty_ctrl(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_EMPTY_CTRL(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_empty_op(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->op, val, !val, 10, SNET_CTRL_TIMEOUT);
+}
+
+static int snet_wait_for_data(struct snet_ctrl_regs __iomem *regs)
+{
+ u32 val;
+
+ return readx_poll_timeout(ioread32, &regs->ctrl, val, SNET_DATA_READY(val), 10,
+ SNET_CTRL_TIMEOUT);
+}
+
+static u32 snet_read32_word(struct snet_ctrl_regs __iomem *ctrl_regs, u16 word_idx)
+{
+ return ioread32(&ctrl_regs->data[word_idx]);
+}
+
+static u32 snet_read_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ return ioread32(&ctrl_regs->ctrl);
+}
+
+static void snet_write_ctrl(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->ctrl);
+}
+
+static void snet_write_op(struct snet_ctrl_regs __iomem *ctrl_regs, u32 val)
+{
+ iowrite32(val, &ctrl_regs->op);
+}
+
+static int snet_wait_for_dpu_completion(struct snet_ctrl_regs __iomem *ctrl_regs)
+{
+ /* Wait until the DPU finishes completely.
+ * It will clear the opcode register.
+ */
+ return snet_wait_for_empty_op(ctrl_regs);
+}
+
+/* Reading ctrl from the DPU:
+ * buf_size must be 4B aligned
+ *
+ * Steps:
+ *
+ * (1) Verify that the DPU is not in the middle of another operation by
+ * reading the in_process and error bits in the control register.
+ * (2) Write the request opcode and the VQ idx in the opcode register
+ * and write the buffer size in the control register.
+ * (3) Start readind chunks of data, chunk_ready bit indicates that a
+ * data chunk is available, we signal that we read the data by clearing the bit.
+ * (4) Detect that the transfer is completed when the in_process bit
+ * in the control register is cleared or when the an error appears.
+ */
+static int snet_ctrl_read_from_dpu(struct snet *snet, u16 opcode, u16 vq_idx, void *buffer,
+ u32 buf_size)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 *bfr_ptr = (u32 *)buffer;
+ u32 val;
+ u16 buf_words;
+ int ret;
+ u16 words, i, tot_words = 0;
+
+ /* Supported for config 2+ */
+ if (!SNET_CFG_VER(snet, 2))
+ return -EOPNOTSUPP;
+
+ if (!IS_ALIGNED(buf_size, 4))
+ return -EINVAL;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ buf_words = buf_size / 4;
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to write the buffer size in the control register, and the opcode + vq index in
+ * the opcode register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, buf_words);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ while (buf_words != tot_words) {
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control data\n");
+ goto exit;
+ }
+
+ val = snet_read_ctrl(regs);
+
+ /* Error? */
+ if (val & SNET_CTRL_ERROR_MASK) {
+ ret = SNET_VAL_TO_ERR(val);
+ SNET_WARN(pdev, "Error while reading control data from DPU, err %d\n", ret);
+ goto exit;
+ }
+
+ words = min_t(u16, val & SNET_CTRL_DATA_SIZE_MASK, buf_words - tot_words);
+
+ for (i = 0; i < words; i++) {
+ *bfr_ptr = snet_read32_word(regs, i);
+ bfr_ptr++;
+ }
+
+ tot_words += words;
+
+ /* Is the job completed? */
+ if (!(val & SNET_CTRL_IN_PROCESS_MASK))
+ break;
+
+ /* Clear the chunk ready bit and continue */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+ }
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for the DPU to complete a control command\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU using the old mechanism
+ * used with config version 1.
+ */
+static int snet_send_ctrl_msg_old(struct snet *snet, u32 opcode)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ int ret;
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Old mechanism uses just 1 register, the opcode register.
+ * Make sure that the opcode register is empty, and that the DPU isn't
+ * processing an old message.
+ */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Write the message */
+ snet_write_op(regs, opcode);
+
+ /* DPU ACKs the message by clearing the opcode register */
+ ret = snet_wait_for_empty_op(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for a control message to be ACKed\n");
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+/* Send a control message to the DPU.
+ * A control message is a message without payload.
+ */
+static int snet_send_ctrl_msg(struct snet *snet, u16 opcode, u16 vq_idx)
+{
+ struct pci_dev *pdev = snet->pdev;
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+ u32 val;
+ int ret;
+
+ /* If config version is not 2+, use the old mechanism */
+ if (!SNET_CFG_VER(snet, 2))
+ return snet_send_ctrl_msg_old(snet, opcode);
+
+ mutex_lock(&snet->ctrl_lock);
+
+ /* Make sure control register is empty */
+ ret = snet_wait_for_empty_ctrl(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for previous control data to be consumed\n");
+ goto exit;
+ }
+
+ /* We need to clear the control register and write the opcode + vq index in the opcode
+ * register.
+ * We use a spinlock to serialize the writes.
+ */
+ spin_lock(&snet->ctrl_spinlock);
+
+ snet_write_ctrl(regs, 0);
+ snet_write_op(regs, opcode | (vq_idx << 16));
+
+ spin_unlock(&snet->ctrl_spinlock);
+
+ /* The DPU ACKs control messages by setting the chunk ready bit
+ * without data.
+ */
+ ret = snet_wait_for_data(regs);
+ if (ret) {
+ SNET_WARN(pdev, "Timeout waiting for control message to be ACKed\n");
+ goto exit;
+ }
+
+ /* Check for errors */
+ val = snet_read_ctrl(regs);
+ ret = SNET_VAL_TO_ERR(val);
+
+ /* Clear the chunk ready bit */
+ val &= ~SNET_CTRL_CHUNK_RDY_MASK;
+ snet_write_ctrl(regs, val);
+
+ ret = snet_wait_for_dpu_completion(regs);
+ if (ret)
+ SNET_WARN(pdev, "Timeout waiting for DPU to complete a control command, err %d\n",
+ ret);
+
+exit:
+ mutex_unlock(&snet->ctrl_lock);
+ return ret;
+}
+
+void snet_ctrl_clear(struct snet *snet)
+{
+ struct snet_ctrl_regs __iomem *regs = snet_get_ctrl(snet);
+
+ snet_write_op(regs, 0);
+}
+
+int snet_destroy_dev(struct snet *snet)
+{
+ return snet_send_ctrl_msg(snet, SNET_CTRL_OP_DESTROY, 0);
+}
+
+int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state)
+{
+ return snet_ctrl_read_from_dpu(snet, SNET_CTRL_OP_READ_VQ_STATE, idx, state,
+ sizeof(*state));
+}
+
+int snet_suspend_dev(struct snet *snet)
+{
+ return snet_send_ctrl_msg(snet, SNET_CTRL_OP_SUSPEND, 0);
+}
diff --git a/drivers/vdpa/solidrun/snet_hwmon.c b/drivers/vdpa/solidrun/snet_hwmon.c
new file mode 100644
index 000000000000..42c87387a0f1
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_hwmon.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+#include <linux/hwmon.h>
+
+#include "snet_vdpa.h"
+
+/* Monitor offsets */
+#define SNET_MON_TMP0_IN_OFF 0x00
+#define SNET_MON_TMP0_MAX_OFF 0x08
+#define SNET_MON_TMP0_CRIT_OFF 0x10
+#define SNET_MON_TMP1_IN_OFF 0x18
+#define SNET_MON_TMP1_CRIT_OFF 0x20
+#define SNET_MON_CURR_IN_OFF 0x28
+#define SNET_MON_CURR_MAX_OFF 0x30
+#define SNET_MON_CURR_CRIT_OFF 0x38
+#define SNET_MON_PWR_IN_OFF 0x40
+#define SNET_MON_VOLT_IN_OFF 0x48
+#define SNET_MON_VOLT_CRIT_OFF 0x50
+#define SNET_MON_VOLT_LCRIT_OFF 0x58
+
+static void snet_hwmon_read_reg(struct psnet *psnet, u32 reg, long *out)
+{
+ *out = psnet_read64(psnet, psnet->cfg.hwmon_off + reg);
+}
+
+static umode_t snet_howmon_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ return 0444;
+}
+
+static int snet_howmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct psnet *psnet = dev_get_drvdata(dev);
+ int ret = 0;
+
+ switch (type) {
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_lcrit:
+ snet_hwmon_read_reg(psnet, SNET_MON_VOLT_LCRIT_OFF, val);
+ break;
+ case hwmon_in_crit:
+ snet_hwmon_read_reg(psnet, SNET_MON_VOLT_CRIT_OFF, val);
+ break;
+ case hwmon_in_input:
+ snet_hwmon_read_reg(psnet, SNET_MON_VOLT_IN_OFF, val);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ break;
+
+ case hwmon_power:
+ switch (attr) {
+ case hwmon_power_input:
+ snet_hwmon_read_reg(psnet, SNET_MON_PWR_IN_OFF, val);
+ break;
+
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ break;
+
+ case hwmon_curr:
+ switch (attr) {
+ case hwmon_curr_input:
+ snet_hwmon_read_reg(psnet, SNET_MON_CURR_IN_OFF, val);
+ break;
+ case hwmon_curr_max:
+ snet_hwmon_read_reg(psnet, SNET_MON_CURR_MAX_OFF, val);
+ break;
+ case hwmon_curr_crit:
+ snet_hwmon_read_reg(psnet, SNET_MON_CURR_CRIT_OFF, val);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ break;
+
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ if (channel == 0)
+ snet_hwmon_read_reg(psnet, SNET_MON_TMP0_IN_OFF, val);
+ else
+ snet_hwmon_read_reg(psnet, SNET_MON_TMP1_IN_OFF, val);
+ break;
+ case hwmon_temp_max:
+ if (channel == 0)
+ snet_hwmon_read_reg(psnet, SNET_MON_TMP0_MAX_OFF, val);
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ case hwmon_temp_crit:
+ if (channel == 0)
+ snet_hwmon_read_reg(psnet, SNET_MON_TMP0_CRIT_OFF, val);
+ else
+ snet_hwmon_read_reg(psnet, SNET_MON_TMP1_CRIT_OFF, val);
+ break;
+
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ break;
+
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+static int snet_hwmon_read_string(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ int ret = 0;
+
+ switch (type) {
+ case hwmon_in:
+ *str = "main_vin";
+ break;
+ case hwmon_power:
+ *str = "soc_pin";
+ break;
+ case hwmon_curr:
+ *str = "soc_iin";
+ break;
+ case hwmon_temp:
+ if (channel == 0)
+ *str = "power_stage_temp";
+ else
+ *str = "ic_junction_temp";
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+static const struct hwmon_ops snet_hwmon_ops = {
+ .is_visible = snet_howmon_is_visible,
+ .read = snet_howmon_read,
+ .read_string = snet_hwmon_read_string
+};
+
+static const struct hwmon_channel_info *snet_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT | HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LABEL),
+ HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_MAX | HWMON_C_CRIT | HWMON_C_LABEL),
+ HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_CRIT | HWMON_I_LCRIT | HWMON_I_LABEL),
+ NULL
+};
+
+static const struct hwmon_chip_info snet_hwmono_info = {
+ .ops = &snet_hwmon_ops,
+ .info = snet_hwmon_info,
+};
+
+/* Create an HW monitor device */
+void psnet_create_hwmon(struct pci_dev *pdev)
+{
+ struct device *hwmon;
+ struct psnet *psnet = pci_get_drvdata(pdev);
+
+ snprintf(psnet->hwmon_name, SNET_NAME_SIZE, "snet_%s", pci_name(pdev));
+ hwmon = devm_hwmon_device_register_with_info(&pdev->dev, psnet->hwmon_name, psnet,
+ &snet_hwmono_info, NULL);
+ /* The monitor is not mandatory, Just alert user in case of an error */
+ if (IS_ERR(hwmon))
+ SNET_WARN(pdev, "Failed to create SNET hwmon, error %ld\n", PTR_ERR(hwmon));
+}
diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c
new file mode 100644
index 000000000000..cdcd84ce4f5a
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_main.c
@@ -0,0 +1,1117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+#include <linux/iopoll.h>
+
+#include "snet_vdpa.h"
+
+/* SNET DPU device ID */
+#define SNET_DEVICE_ID 0x1000
+/* SNET signature */
+#define SNET_SIGNATURE 0xD0D06363
+/* Max. config version that we can work with */
+#define SNET_CFG_VERSION 0x2
+/* Queue align */
+#define SNET_QUEUE_ALIGNMENT PAGE_SIZE
+/* Kick value to notify that new data is available */
+#define SNET_KICK_VAL 0x1
+#define SNET_CONFIG_OFF 0x0
+/* How long we are willing to wait for a SNET device */
+#define SNET_DETECT_TIMEOUT 5000000
+/* How long should we wait for the DPU to read our config */
+#define SNET_READ_CFG_TIMEOUT 3000000
+/* Size of configs written to the DPU */
+#define SNET_GENERAL_CFG_LEN 36
+#define SNET_GENERAL_CFG_VQ_LEN 40
+
+static struct snet *vdpa_to_snet(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct snet, vdpa);
+}
+
+static irqreturn_t snet_cfg_irq_hndlr(int irq, void *data)
+{
+ struct snet *snet = data;
+ /* Call callback if any */
+ if (likely(snet->cb.callback))
+ return snet->cb.callback(snet->cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t snet_vq_irq_hndlr(int irq, void *data)
+{
+ struct snet_vq *vq = data;
+ /* Call callback if any */
+ if (likely(vq->cb.callback))
+ return vq->cb.callback(vq->cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static void snet_free_irqs(struct snet *snet)
+{
+ struct psnet *psnet = snet->psnet;
+ struct pci_dev *pdev;
+ u32 i;
+
+ /* Which Device allcoated the IRQs? */
+ if (PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF))
+ pdev = snet->pdev->physfn;
+ else
+ pdev = snet->pdev;
+
+ /* Free config's IRQ */
+ if (snet->cfg_irq != -1) {
+ devm_free_irq(&pdev->dev, snet->cfg_irq, snet);
+ snet->cfg_irq = -1;
+ }
+ /* Free VQ IRQs */
+ for (i = 0; i < snet->cfg->vq_num; i++) {
+ if (snet->vqs[i] && snet->vqs[i]->irq != -1) {
+ devm_free_irq(&pdev->dev, snet->vqs[i]->irq, snet->vqs[i]);
+ snet->vqs[i]->irq = -1;
+ }
+ }
+
+ /* IRQ vectors are freed when the pci remove callback is called */
+}
+
+static int snet_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
+ u64 driver_area, u64 device_area)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ /* save received parameters in vqueue sturct */
+ snet->vqs[idx]->desc_area = desc_area;
+ snet->vqs[idx]->driver_area = driver_area;
+ snet->vqs[idx]->device_area = device_area;
+
+ return 0;
+}
+
+static void snet_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ /* save num in vqueue */
+ snet->vqs[idx]->num = num;
+}
+
+static void snet_kick_vq(struct vdpa_device *vdev, u16 idx)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ /* not ready - ignore */
+ if (unlikely(!snet->vqs[idx]->ready))
+ return;
+
+ iowrite32(SNET_KICK_VAL, snet->vqs[idx]->kick_ptr);
+}
+
+static void snet_kick_vq_with_data(struct vdpa_device *vdev, u32 data)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ u16 idx = data & 0xFFFF;
+
+ /* not ready - ignore */
+ if (unlikely(!snet->vqs[idx]->ready))
+ return;
+
+ iowrite32((data & 0xFFFF0000) | SNET_KICK_VAL, snet->vqs[idx]->kick_ptr);
+}
+
+static void snet_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ snet->vqs[idx]->cb.callback = cb->callback;
+ snet->vqs[idx]->cb.private = cb->private;
+}
+
+static void snet_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ snet->vqs[idx]->ready = ready;
+}
+
+static bool snet_get_vq_ready(struct vdpa_device *vdev, u16 idx)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->vqs[idx]->ready;
+}
+
+static bool snet_vq_state_is_initial(struct snet *snet, const struct vdpa_vq_state *state)
+{
+ if (SNET_HAS_FEATURE(snet, VIRTIO_F_RING_PACKED)) {
+ const struct vdpa_vq_state_packed *p = &state->packed;
+
+ if (p->last_avail_counter == 1 && p->last_used_counter == 1 &&
+ p->last_avail_idx == 0 && p->last_used_idx == 0)
+ return true;
+ } else {
+ const struct vdpa_vq_state_split *s = &state->split;
+
+ if (s->avail_index == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static int snet_set_vq_state(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ /* We can set any state for config version 2+ */
+ if (SNET_CFG_VER(snet, 2)) {
+ memcpy(&snet->vqs[idx]->vq_state, state, sizeof(*state));
+ return 0;
+ }
+
+ /* Older config - we can't set the VQ state.
+ * Return 0 only if this is the initial state we use in the DPU.
+ */
+ if (snet_vq_state_is_initial(snet, state))
+ return 0;
+
+ return -EOPNOTSUPP;
+}
+
+static int snet_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet_read_vq_state(snet, idx, state);
+}
+
+static int snet_get_vq_irq(struct vdpa_device *vdev, u16 idx)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->vqs[idx]->irq;
+}
+
+static u32 snet_get_vq_align(struct vdpa_device *vdev)
+{
+ return (u32)SNET_QUEUE_ALIGNMENT;
+}
+
+static int snet_reset_dev(struct snet *snet)
+{
+ struct pci_dev *pdev = snet->pdev;
+ int ret = 0;
+ u32 i;
+
+ /* If status is 0, nothing to do */
+ if (!snet->status)
+ return 0;
+
+ /* If DPU started, destroy it */
+ if (snet->status & VIRTIO_CONFIG_S_DRIVER_OK)
+ ret = snet_destroy_dev(snet);
+
+ /* Clear VQs */
+ for (i = 0; i < snet->cfg->vq_num; i++) {
+ if (!snet->vqs[i])
+ continue;
+ snet->vqs[i]->cb.callback = NULL;
+ snet->vqs[i]->cb.private = NULL;
+ snet->vqs[i]->desc_area = 0;
+ snet->vqs[i]->device_area = 0;
+ snet->vqs[i]->driver_area = 0;
+ snet->vqs[i]->ready = false;
+ }
+
+ /* Clear config callback */
+ snet->cb.callback = NULL;
+ snet->cb.private = NULL;
+ /* Free IRQs */
+ snet_free_irqs(snet);
+ /* Reset status */
+ snet->status = 0;
+ snet->dpu_ready = false;
+
+ if (ret)
+ SNET_WARN(pdev, "Incomplete reset to SNET[%u] device, err: %d\n", snet->sid, ret);
+ else
+ SNET_DBG(pdev, "Reset SNET[%u] device\n", snet->sid);
+
+ return 0;
+}
+
+static int snet_reset(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet_reset_dev(snet);
+}
+
+static size_t snet_get_config_size(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return (size_t)snet->cfg->cfg_size;
+}
+
+static u64 snet_get_features(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->cfg->features;
+}
+
+static int snet_set_drv_features(struct vdpa_device *vdev, u64 features)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ snet->negotiated_features = snet->cfg->features & features;
+ return 0;
+}
+
+static u64 snet_get_drv_features(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->negotiated_features;
+}
+
+static u16 snet_get_vq_num_max(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return (u16)snet->cfg->vq_size;
+}
+
+static void snet_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ snet->cb.callback = cb->callback;
+ snet->cb.private = cb->private;
+}
+
+static u32 snet_get_device_id(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->cfg->virtio_id;
+}
+
+static u32 snet_get_vendor_id(struct vdpa_device *vdev)
+{
+ return (u32)PCI_VENDOR_ID_SOLIDRUN;
+}
+
+static u8 snet_get_status(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+
+ return snet->status;
+}
+
+static int snet_write_conf(struct snet *snet)
+{
+ u32 off, i, tmp;
+ int ret;
+
+ /* No need to write the config twice */
+ if (snet->dpu_ready)
+ return true;
+
+ /* Snet data :
+ *
+ * General data: SNET_GENERAL_CFG_LEN bytes long
+ * 0 0x4 0x8 0xC 0x10 0x14 0x1C 0x24
+ * | MAGIC NUMBER | CFG VER | SNET SID | NUMBER OF QUEUES | IRQ IDX | FEATURES | RSVD |
+ *
+ * For every VQ: SNET_GENERAL_CFG_VQ_LEN bytes long
+ * 0 0x4 0x8
+ * | VQ SID AND QUEUE SIZE | IRQ Index |
+ * | DESC AREA |
+ * | DEVICE AREA |
+ * | DRIVER AREA |
+ * | VQ STATE (CFG 2+) | RSVD |
+ *
+ * Magic number should be written last, this is the DPU indication that the data is ready
+ */
+
+ /* Init offset */
+ off = snet->psnet->cfg.host_cfg_off;
+
+ /* Ignore magic number for now */
+ off += 4;
+ snet_write32(snet, off, snet->psnet->negotiated_cfg_ver);
+ off += 4;
+ snet_write32(snet, off, snet->sid);
+ off += 4;
+ snet_write32(snet, off, snet->cfg->vq_num);
+ off += 4;
+ snet_write32(snet, off, snet->cfg_irq_idx);
+ off += 4;
+ snet_write64(snet, off, snet->negotiated_features);
+ off += 8;
+ /* Ignore reserved */
+ off += 8;
+ /* Write VQs */
+ for (i = 0 ; i < snet->cfg->vq_num ; i++) {
+ tmp = (i << 16) | (snet->vqs[i]->num & 0xFFFF);
+ snet_write32(snet, off, tmp);
+ off += 4;
+ snet_write32(snet, off, snet->vqs[i]->irq_idx);
+ off += 4;
+ snet_write64(snet, off, snet->vqs[i]->desc_area);
+ off += 8;
+ snet_write64(snet, off, snet->vqs[i]->device_area);
+ off += 8;
+ snet_write64(snet, off, snet->vqs[i]->driver_area);
+ off += 8;
+ /* Write VQ state if config version is 2+ */
+ if (SNET_CFG_VER(snet, 2))
+ snet_write32(snet, off, *(u32 *)&snet->vqs[i]->vq_state);
+ off += 4;
+
+ /* Ignore reserved */
+ off += 4;
+ }
+
+ /* Write magic number - data is ready */
+ snet_write32(snet, snet->psnet->cfg.host_cfg_off, SNET_SIGNATURE);
+
+ /* The DPU will ACK the config by clearing the signature */
+ ret = readx_poll_timeout(ioread32, snet->bar + snet->psnet->cfg.host_cfg_off,
+ tmp, !tmp, 10, SNET_READ_CFG_TIMEOUT);
+ if (ret) {
+ SNET_ERR(snet->pdev, "Timeout waiting for the DPU to read the config\n");
+ return false;
+ }
+
+ /* set DPU flag */
+ snet->dpu_ready = true;
+
+ return true;
+}
+
+static int snet_request_irqs(struct pci_dev *pdev, struct snet *snet)
+{
+ int ret, i, irq;
+
+ /* Request config IRQ */
+ irq = pci_irq_vector(pdev, snet->cfg_irq_idx);
+ ret = devm_request_irq(&pdev->dev, irq, snet_cfg_irq_hndlr, 0,
+ snet->cfg_irq_name, snet);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to request IRQ\n");
+ return ret;
+ }
+ snet->cfg_irq = irq;
+
+ /* Request IRQ for every VQ */
+ for (i = 0; i < snet->cfg->vq_num; i++) {
+ irq = pci_irq_vector(pdev, snet->vqs[i]->irq_idx);
+ ret = devm_request_irq(&pdev->dev, irq, snet_vq_irq_hndlr, 0,
+ snet->vqs[i]->irq_name, snet->vqs[i]);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to request IRQ\n");
+ return ret;
+ }
+ snet->vqs[i]->irq = irq;
+ }
+ return 0;
+}
+
+static void snet_set_status(struct vdpa_device *vdev, u8 status)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ struct psnet *psnet = snet->psnet;
+ struct pci_dev *pdev = snet->pdev;
+ int ret;
+ bool pf_irqs;
+
+ if (status == snet->status)
+ return;
+
+ if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+ !(snet->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ /* Request IRQs */
+ pf_irqs = PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF);
+ ret = snet_request_irqs(pf_irqs ? pdev->physfn : pdev, snet);
+ if (ret)
+ goto set_err;
+
+ /* Write config to the DPU */
+ if (snet_write_conf(snet)) {
+ SNET_INFO(pdev, "Create SNET[%u] device\n", snet->sid);
+ } else {
+ snet_free_irqs(snet);
+ goto set_err;
+ }
+ }
+
+ /* Save the new status */
+ snet->status = status;
+ return;
+
+set_err:
+ snet->status |= VIRTIO_CONFIG_S_FAILED;
+}
+
+static void snet_get_config(struct vdpa_device *vdev, unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ void __iomem *cfg_ptr = snet->cfg->virtio_cfg + offset;
+ u8 *buf_ptr = buf;
+ u32 i;
+
+ /* check for offset error */
+ if (offset + len > snet->cfg->cfg_size)
+ return;
+
+ /* Write into buffer */
+ for (i = 0; i < len; i++)
+ *buf_ptr++ = ioread8(cfg_ptr + i);
+}
+
+static void snet_set_config(struct vdpa_device *vdev, unsigned int offset,
+ const void *buf, unsigned int len)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ void __iomem *cfg_ptr = snet->cfg->virtio_cfg + offset;
+ const u8 *buf_ptr = buf;
+ u32 i;
+
+ /* check for offset error */
+ if (offset + len > snet->cfg->cfg_size)
+ return;
+
+ /* Write into PCI BAR */
+ for (i = 0; i < len; i++)
+ iowrite8(*buf_ptr++, cfg_ptr + i);
+}
+
+static int snet_suspend(struct vdpa_device *vdev)
+{
+ struct snet *snet = vdpa_to_snet(vdev);
+ int ret;
+
+ ret = snet_suspend_dev(snet);
+ if (ret)
+ SNET_ERR(snet->pdev, "SNET[%u] suspend failed, err: %d\n", snet->sid, ret);
+ else
+ SNET_DBG(snet->pdev, "Suspend SNET[%u] device\n", snet->sid);
+
+ return ret;
+}
+
+static const struct vdpa_config_ops snet_config_ops = {
+ .set_vq_address = snet_set_vq_address,
+ .set_vq_num = snet_set_vq_num,
+ .kick_vq = snet_kick_vq,
+ .kick_vq_with_data = snet_kick_vq_with_data,
+ .set_vq_cb = snet_set_vq_cb,
+ .set_vq_ready = snet_set_vq_ready,
+ .get_vq_ready = snet_get_vq_ready,
+ .set_vq_state = snet_set_vq_state,
+ .get_vq_state = snet_get_vq_state,
+ .get_vq_irq = snet_get_vq_irq,
+ .get_vq_align = snet_get_vq_align,
+ .reset = snet_reset,
+ .get_config_size = snet_get_config_size,
+ .get_device_features = snet_get_features,
+ .set_driver_features = snet_set_drv_features,
+ .get_driver_features = snet_get_drv_features,
+ .get_vq_num_min = snet_get_vq_num_max,
+ .get_vq_num_max = snet_get_vq_num_max,
+ .set_config_cb = snet_set_config_cb,
+ .get_device_id = snet_get_device_id,
+ .get_vendor_id = snet_get_vendor_id,
+ .get_status = snet_get_status,
+ .set_status = snet_set_status,
+ .get_config = snet_get_config,
+ .set_config = snet_set_config,
+ .suspend = snet_suspend,
+};
+
+static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
+{
+ char name[50];
+ int ret, i, mask = 0;
+ /* We don't know which BAR will be used to communicate..
+ * We will map every bar with len > 0.
+ *
+ * Later, we will discover the BAR and unmap all other BARs.
+ */
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (pci_resource_len(pdev, i))
+ mask |= (1 << i);
+ }
+
+ /* No BAR can be used.. */
+ if (!mask) {
+ SNET_ERR(pdev, "Failed to find a PCI BAR\n");
+ return -ENODEV;
+ }
+
+ snprintf(name, sizeof(name), "psnet[%s]-bars", pci_name(pdev));
+ ret = pcim_iomap_regions(pdev, mask, name);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to request and map PCI BARs\n");
+ return ret;
+ }
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (mask & (1 << i))
+ psnet->bars[i] = pcim_iomap_table(pdev)[i];
+ }
+
+ return 0;
+}
+
+static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet)
+{
+ char name[50];
+ int ret;
+
+ snprintf(name, sizeof(name), "snet[%s]-bar", pci_name(pdev));
+ /* Request and map BAR */
+ ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to request and map PCI BAR for a VF\n");
+ return ret;
+ }
+
+ snet->bar = pcim_iomap_table(pdev)[snet->psnet->cfg.vf_bar];
+
+ return 0;
+}
+
+static void snet_free_cfg(struct snet_cfg *cfg)
+{
+ u32 i;
+
+ if (!cfg->devs)
+ return;
+
+ /* Free devices */
+ for (i = 0; i < cfg->devices_num; i++) {
+ if (!cfg->devs[i])
+ break;
+
+ kfree(cfg->devs[i]);
+ }
+ /* Free pointers to devices */
+ kfree(cfg->devs);
+}
+
+/* Detect which BAR is used for communication with the device. */
+static int psnet_detect_bar(struct psnet *psnet, u32 off)
+{
+ unsigned long exit_time;
+ int i;
+
+ exit_time = jiffies + usecs_to_jiffies(SNET_DETECT_TIMEOUT);
+
+ /* SNET DPU will write SNET's signature when the config is ready. */
+ while (time_before(jiffies, exit_time)) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ /* Is this BAR mapped? */
+ if (!psnet->bars[i])
+ continue;
+
+ if (ioread32(psnet->bars[i] + off) == SNET_SIGNATURE)
+ return i;
+ }
+ usleep_range(1000, 10000);
+ }
+
+ return -ENODEV;
+}
+
+static void psnet_unmap_unused_bars(struct pci_dev *pdev, struct psnet *psnet)
+{
+ int i, mask = 0;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (psnet->bars[i] && i != psnet->barno)
+ mask |= (1 << i);
+ }
+
+ if (mask)
+ pcim_iounmap_regions(pdev, mask);
+}
+
+/* Read SNET config from PCI BAR */
+static int psnet_read_cfg(struct pci_dev *pdev, struct psnet *psnet)
+{
+ struct snet_cfg *cfg = &psnet->cfg;
+ u32 i, off;
+ int barno;
+
+ /* Move to where the config starts */
+ off = SNET_CONFIG_OFF;
+
+ /* Find BAR used for communication */
+ barno = psnet_detect_bar(psnet, off);
+ if (barno < 0) {
+ SNET_ERR(pdev, "SNET config is not ready.\n");
+ return barno;
+ }
+
+ /* Save used BAR number and unmap all other BARs */
+ psnet->barno = barno;
+ SNET_DBG(pdev, "Using BAR number %d\n", barno);
+
+ psnet_unmap_unused_bars(pdev, psnet);
+
+ /* load config from BAR */
+ cfg->key = psnet_read32(psnet, off);
+ off += 4;
+ cfg->cfg_size = psnet_read32(psnet, off);
+ off += 4;
+ cfg->cfg_ver = psnet_read32(psnet, off);
+ off += 4;
+ /* The negotiated config version is the lower one between this driver's config
+ * and the DPU's.
+ */
+ psnet->negotiated_cfg_ver = min_t(u32, cfg->cfg_ver, SNET_CFG_VERSION);
+ SNET_DBG(pdev, "SNET config version %u\n", psnet->negotiated_cfg_ver);
+
+ cfg->vf_num = psnet_read32(psnet, off);
+ off += 4;
+ cfg->vf_bar = psnet_read32(psnet, off);
+ off += 4;
+ cfg->host_cfg_off = psnet_read32(psnet, off);
+ off += 4;
+ cfg->max_size_host_cfg = psnet_read32(psnet, off);
+ off += 4;
+ cfg->virtio_cfg_off = psnet_read32(psnet, off);
+ off += 4;
+ cfg->kick_off = psnet_read32(psnet, off);
+ off += 4;
+ cfg->hwmon_off = psnet_read32(psnet, off);
+ off += 4;
+ cfg->ctrl_off = psnet_read32(psnet, off);
+ off += 4;
+ cfg->flags = psnet_read32(psnet, off);
+ off += 4;
+ /* Ignore Reserved */
+ off += sizeof(cfg->rsvd);
+
+ cfg->devices_num = psnet_read32(psnet, off);
+ off += 4;
+ /* Allocate memory to hold pointer to the devices */
+ cfg->devs = kcalloc(cfg->devices_num, sizeof(void *), GFP_KERNEL);
+ if (!cfg->devs)
+ return -ENOMEM;
+
+ /* Load device configuration from BAR */
+ for (i = 0; i < cfg->devices_num; i++) {
+ cfg->devs[i] = kzalloc(sizeof(*cfg->devs[i]), GFP_KERNEL);
+ if (!cfg->devs[i]) {
+ snet_free_cfg(cfg);
+ return -ENOMEM;
+ }
+ /* Read device config */
+ cfg->devs[i]->virtio_id = psnet_read32(psnet, off);
+ off += 4;
+ cfg->devs[i]->vq_num = psnet_read32(psnet, off);
+ off += 4;
+ cfg->devs[i]->vq_size = psnet_read32(psnet, off);
+ off += 4;
+ cfg->devs[i]->vfid = psnet_read32(psnet, off);
+ off += 4;
+ cfg->devs[i]->features = psnet_read64(psnet, off);
+ off += 8;
+ /* Ignore Reserved */
+ off += sizeof(cfg->devs[i]->rsvd);
+
+ cfg->devs[i]->cfg_size = psnet_read32(psnet, off);
+ off += 4;
+
+ /* Is the config witten to the DPU going to be too big? */
+ if (SNET_GENERAL_CFG_LEN + SNET_GENERAL_CFG_VQ_LEN * cfg->devs[i]->vq_num >
+ cfg->max_size_host_cfg) {
+ SNET_ERR(pdev, "Failed to read SNET config, the config is too big..\n");
+ snet_free_cfg(cfg);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int psnet_alloc_irq_vector(struct pci_dev *pdev, struct psnet *psnet)
+{
+ int ret = 0;
+ u32 i, irq_num = 0;
+
+ /* Let's count how many IRQs we need, 1 for every VQ + 1 for config change */
+ for (i = 0; i < psnet->cfg.devices_num; i++)
+ irq_num += psnet->cfg.devs[i]->vq_num + 1;
+
+ ret = pci_alloc_irq_vectors(pdev, irq_num, irq_num, PCI_IRQ_MSIX);
+ if (ret != irq_num) {
+ SNET_ERR(pdev, "Failed to allocate IRQ vectors\n");
+ return ret;
+ }
+ SNET_DBG(pdev, "Allocated %u IRQ vectors from physical function\n", irq_num);
+
+ return 0;
+}
+
+static int snet_alloc_irq_vector(struct pci_dev *pdev, struct snet_dev_cfg *snet_cfg)
+{
+ int ret = 0;
+ u32 irq_num;
+
+ /* We want 1 IRQ for every VQ + 1 for config change events */
+ irq_num = snet_cfg->vq_num + 1;
+
+ ret = pci_alloc_irq_vectors(pdev, irq_num, irq_num, PCI_IRQ_MSIX);
+ if (ret <= 0) {
+ SNET_ERR(pdev, "Failed to allocate IRQ vectors\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void snet_free_vqs(struct snet *snet)
+{
+ u32 i;
+
+ if (!snet->vqs)
+ return;
+
+ for (i = 0 ; i < snet->cfg->vq_num ; i++) {
+ if (!snet->vqs[i])
+ break;
+
+ kfree(snet->vqs[i]);
+ }
+ kfree(snet->vqs);
+}
+
+static int snet_build_vqs(struct snet *snet)
+{
+ u32 i;
+ /* Allocate the VQ pointers array */
+ snet->vqs = kcalloc(snet->cfg->vq_num, sizeof(void *), GFP_KERNEL);
+ if (!snet->vqs)
+ return -ENOMEM;
+
+ /* Allocate the VQs */
+ for (i = 0; i < snet->cfg->vq_num; i++) {
+ snet->vqs[i] = kzalloc(sizeof(*snet->vqs[i]), GFP_KERNEL);
+ if (!snet->vqs[i]) {
+ snet_free_vqs(snet);
+ return -ENOMEM;
+ }
+ /* Reset IRQ num */
+ snet->vqs[i]->irq = -1;
+ /* VQ serial ID */
+ snet->vqs[i]->sid = i;
+ /* Kick address - every VQ gets 4B */
+ snet->vqs[i]->kick_ptr = snet->bar + snet->psnet->cfg.kick_off +
+ snet->vqs[i]->sid * 4;
+ /* Clear kick address for this VQ */
+ iowrite32(0, snet->vqs[i]->kick_ptr);
+ }
+ return 0;
+}
+
+static int psnet_get_next_irq_num(struct psnet *psnet)
+{
+ int irq;
+
+ spin_lock(&psnet->lock);
+ irq = psnet->next_irq++;
+ spin_unlock(&psnet->lock);
+
+ return irq;
+}
+
+static void snet_reserve_irq_idx(struct pci_dev *pdev, struct snet *snet)
+{
+ struct psnet *psnet = snet->psnet;
+ int i;
+
+ /* one IRQ for every VQ, and one for config changes */
+ snet->cfg_irq_idx = psnet_get_next_irq_num(psnet);
+ snprintf(snet->cfg_irq_name, SNET_NAME_SIZE, "snet[%s]-cfg[%d]",
+ pci_name(pdev), snet->cfg_irq_idx);
+
+ for (i = 0; i < snet->cfg->vq_num; i++) {
+ /* Get next free IRQ ID */
+ snet->vqs[i]->irq_idx = psnet_get_next_irq_num(psnet);
+ /* Write IRQ name */
+ snprintf(snet->vqs[i]->irq_name, SNET_NAME_SIZE, "snet[%s]-vq[%d]",
+ pci_name(pdev), snet->vqs[i]->irq_idx);
+ }
+}
+
+/* Find a device config based on virtual function id */
+static struct snet_dev_cfg *snet_find_dev_cfg(struct snet_cfg *cfg, u32 vfid)
+{
+ u32 i;
+
+ for (i = 0; i < cfg->devices_num; i++) {
+ if (cfg->devs[i]->vfid == vfid)
+ return cfg->devs[i];
+ }
+ /* Oppss.. no config found.. */
+ return NULL;
+}
+
+/* Probe function for a physical PCI function */
+static int snet_vdpa_probe_pf(struct pci_dev *pdev)
+{
+ struct psnet *psnet;
+ int ret = 0;
+ bool pf_irqs = false;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to enable PCI device\n");
+ return ret;
+ }
+
+ /* Allocate a PCI physical function device */
+ psnet = kzalloc(sizeof(*psnet), GFP_KERNEL);
+ if (!psnet)
+ return -ENOMEM;
+
+ /* Init PSNET spinlock */
+ spin_lock_init(&psnet->lock);
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, psnet);
+
+ /* Open SNET MAIN BAR */
+ ret = psnet_open_pf_bar(pdev, psnet);
+ if (ret)
+ goto free_psnet;
+
+ /* Try to read SNET's config from PCI BAR */
+ ret = psnet_read_cfg(pdev, psnet);
+ if (ret)
+ goto free_psnet;
+
+ /* If SNET_CFG_FLAG_IRQ_PF flag is set, we should use
+ * PF MSI-X vectors
+ */
+ pf_irqs = PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF);
+
+ if (pf_irqs) {
+ ret = psnet_alloc_irq_vector(pdev, psnet);
+ if (ret)
+ goto free_cfg;
+ }
+
+ SNET_DBG(pdev, "Enable %u virtual functions\n", psnet->cfg.vf_num);
+ ret = pci_enable_sriov(pdev, psnet->cfg.vf_num);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to enable SR-IOV\n");
+ goto free_irq;
+ }
+
+ /* Create HW monitor device */
+ if (PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_HWMON)) {
+#if IS_ENABLED(CONFIG_HWMON)
+ psnet_create_hwmon(pdev);
+#else
+ SNET_WARN(pdev, "Can't start HWMON, CONFIG_HWMON is not enabled\n");
+#endif
+ }
+
+ return 0;
+
+free_irq:
+ if (pf_irqs)
+ pci_free_irq_vectors(pdev);
+free_cfg:
+ snet_free_cfg(&psnet->cfg);
+free_psnet:
+ kfree(psnet);
+ return ret;
+}
+
+/* Probe function for a virtual PCI function */
+static int snet_vdpa_probe_vf(struct pci_dev *pdev)
+{
+ struct pci_dev *pdev_pf = pdev->physfn;
+ struct psnet *psnet = pci_get_drvdata(pdev_pf);
+ struct snet_dev_cfg *dev_cfg;
+ struct snet *snet;
+ u32 vfid;
+ int ret;
+ bool pf_irqs = false;
+
+ /* Get virtual function id.
+ * (the DPU counts the VFs from 1)
+ */
+ ret = pci_iov_vf_id(pdev);
+ if (ret < 0) {
+ SNET_ERR(pdev, "Failed to find a VF id\n");
+ return ret;
+ }
+ vfid = ret + 1;
+
+ /* Find the snet_dev_cfg based on vfid */
+ dev_cfg = snet_find_dev_cfg(&psnet->cfg, vfid);
+ if (!dev_cfg) {
+ SNET_WARN(pdev, "Failed to find a VF config..\n");
+ return -ENODEV;
+ }
+
+ /* Which PCI device should allocate the IRQs?
+ * If the SNET_CFG_FLAG_IRQ_PF flag set, the PF device allocates the IRQs
+ */
+ pf_irqs = PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF);
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to enable PCI VF device\n");
+ return ret;
+ }
+
+ /* Request for MSI-X IRQs */
+ if (!pf_irqs) {
+ ret = snet_alloc_irq_vector(pdev, dev_cfg);
+ if (ret)
+ return ret;
+ }
+
+ /* Allocate vdpa device */
+ snet = vdpa_alloc_device(struct snet, vdpa, &pdev->dev, &snet_config_ops, 1, 1, NULL,
+ false);
+ if (!snet) {
+ SNET_ERR(pdev, "Failed to allocate a vdpa device\n");
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
+
+ /* Init control mutex and spinlock */
+ mutex_init(&snet->ctrl_lock);
+ spin_lock_init(&snet->ctrl_spinlock);
+
+ /* Save pci device pointer */
+ snet->pdev = pdev;
+ snet->psnet = psnet;
+ snet->cfg = dev_cfg;
+ snet->dpu_ready = false;
+ snet->sid = vfid;
+ /* Reset IRQ value */
+ snet->cfg_irq = -1;
+
+ ret = snet_open_vf_bar(pdev, snet);
+ if (ret)
+ goto put_device;
+
+ /* Create a VirtIO config pointer */
+ snet->cfg->virtio_cfg = snet->bar + snet->psnet->cfg.virtio_cfg_off;
+
+ /* Clear control registers */
+ snet_ctrl_clear(snet);
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, snet);
+
+ ret = snet_build_vqs(snet);
+ if (ret)
+ goto put_device;
+
+ /* Reserve IRQ indexes,
+ * The IRQs may be requested and freed multiple times,
+ * but the indexes won't change.
+ */
+ snet_reserve_irq_idx(pf_irqs ? pdev_pf : pdev, snet);
+
+ /*set DMA device*/
+ snet->vdpa.dma_dev = &pdev->dev;
+
+ /* Register VDPA device */
+ ret = vdpa_register_device(&snet->vdpa, snet->cfg->vq_num);
+ if (ret) {
+ SNET_ERR(pdev, "Failed to register vdpa device\n");
+ goto free_vqs;
+ }
+
+ return 0;
+
+free_vqs:
+ snet_free_vqs(snet);
+put_device:
+ put_device(&snet->vdpa.dev);
+free_irqs:
+ if (!pf_irqs)
+ pci_free_irq_vectors(pdev);
+ return ret;
+}
+
+static int snet_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ if (pdev->is_virtfn)
+ return snet_vdpa_probe_vf(pdev);
+ else
+ return snet_vdpa_probe_pf(pdev);
+}
+
+static void snet_vdpa_remove_pf(struct pci_dev *pdev)
+{
+ struct psnet *psnet = pci_get_drvdata(pdev);
+
+ pci_disable_sriov(pdev);
+ /* If IRQs are allocated from the PF, we should free the IRQs */
+ if (PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF))
+ pci_free_irq_vectors(pdev);
+
+ snet_free_cfg(&psnet->cfg);
+ kfree(psnet);
+}
+
+static void snet_vdpa_remove_vf(struct pci_dev *pdev)
+{
+ struct snet *snet = pci_get_drvdata(pdev);
+ struct psnet *psnet = snet->psnet;
+
+ vdpa_unregister_device(&snet->vdpa);
+ snet_free_vqs(snet);
+ /* If IRQs are allocated from the VF, we should free the IRQs */
+ if (!PSNET_FLAG_ON(psnet, SNET_CFG_FLAG_IRQ_PF))
+ pci_free_irq_vectors(pdev);
+}
+
+static void snet_vdpa_remove(struct pci_dev *pdev)
+{
+ if (pdev->is_virtfn)
+ snet_vdpa_remove_vf(pdev);
+ else
+ snet_vdpa_remove_pf(pdev);
+}
+
+static struct pci_device_id snet_driver_pci_ids[] = {
+ { PCI_DEVICE_SUB(PCI_VENDOR_ID_SOLIDRUN, SNET_DEVICE_ID,
+ PCI_VENDOR_ID_SOLIDRUN, SNET_DEVICE_ID) },
+ { 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, snet_driver_pci_ids);
+
+static struct pci_driver snet_vdpa_driver = {
+ .name = "snet-vdpa-driver",
+ .id_table = snet_driver_pci_ids,
+ .probe = snet_vdpa_probe,
+ .remove = snet_vdpa_remove,
+};
+
+module_pci_driver(snet_vdpa_driver);
+
+MODULE_AUTHOR("Alvaro Karsz <alvaro.karsz@solid-run.com>");
+MODULE_DESCRIPTION("SolidRun vDPA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/vdpa/solidrun/snet_vdpa.h b/drivers/vdpa/solidrun/snet_vdpa.h
new file mode 100644
index 000000000000..3c78d4e7d485
--- /dev/null
+++ b/drivers/vdpa/solidrun/snet_vdpa.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SolidRun DPU driver for control plane
+ *
+ * Copyright (C) 2022-2023 SolidRun
+ *
+ * Author: Alvaro Karsz <alvaro.karsz@solid-run.com>
+ *
+ */
+#ifndef _SNET_VDPA_H_
+#define _SNET_VDPA_H_
+
+#include <linux/vdpa.h>
+#include <linux/pci.h>
+
+#define SNET_NAME_SIZE 256
+
+#define SNET_ERR(pdev, fmt, ...) dev_err(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
+#define SNET_WARN(pdev, fmt, ...) dev_warn(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
+#define SNET_INFO(pdev, fmt, ...) dev_info(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
+#define SNET_DBG(pdev, fmt, ...) dev_dbg(&(pdev)->dev, "%s"fmt, "snet_vdpa: ", ##__VA_ARGS__)
+#define SNET_HAS_FEATURE(s, f) ((s)->negotiated_features & BIT_ULL(f))
+/* Check if negotiated config version is at least @ver */
+#define SNET_CFG_VER(snet, ver) ((snet)->psnet->negotiated_cfg_ver >= (ver))
+
+/* VQ struct */
+struct snet_vq {
+ /* VQ callback */
+ struct vdpa_callback cb;
+ /* VQ state received from bus */
+ struct vdpa_vq_state vq_state;
+ /* desc base address */
+ u64 desc_area;
+ /* device base address */
+ u64 device_area;
+ /* driver base address */
+ u64 driver_area;
+ /* Queue size */
+ u32 num;
+ /* Serial ID for VQ */
+ u32 sid;
+ /* is ready flag */
+ bool ready;
+ /* IRQ number */
+ u32 irq;
+ /* IRQ index, DPU uses this to parse data from MSI-X table */
+ u32 irq_idx;
+ /* IRQ name */
+ char irq_name[SNET_NAME_SIZE];
+ /* pointer to mapped PCI BAR register used by this VQ to kick */
+ void __iomem *kick_ptr;
+};
+
+struct snet {
+ /* vdpa device */
+ struct vdpa_device vdpa;
+ /* Config callback */
+ struct vdpa_callback cb;
+ /* To lock the control mechanism */
+ struct mutex ctrl_lock;
+ /* Spinlock to protect critical parts in the control mechanism */
+ spinlock_t ctrl_spinlock;
+ /* array of virqueues */
+ struct snet_vq **vqs;
+ /* Used features */
+ u64 negotiated_features;
+ /* Device serial ID */
+ u32 sid;
+ /* device status */
+ u8 status;
+ /* boolean indicating if snet config was passed to the device */
+ bool dpu_ready;
+ /* IRQ number */
+ u32 cfg_irq;
+ /* IRQ index, DPU uses this to parse data from MSI-X table */
+ u32 cfg_irq_idx;
+ /* IRQ name */
+ char cfg_irq_name[SNET_NAME_SIZE];
+ /* BAR to access the VF */
+ void __iomem *bar;
+ /* PCI device */
+ struct pci_dev *pdev;
+ /* Pointer to snet pdev parent device */
+ struct psnet *psnet;
+ /* Pointer to snet config device */
+ struct snet_dev_cfg *cfg;
+};
+
+struct snet_dev_cfg {
+ /* Device ID following VirtIO spec. */
+ u32 virtio_id;
+ /* Number of VQs for this device */
+ u32 vq_num;
+ /* Size of every VQ */
+ u32 vq_size;
+ /* Virtual Function id */
+ u32 vfid;
+ /* Device features, following VirtIO spec */
+ u64 features;
+ /* Reserved for future usage */
+ u32 rsvd[6];
+ /* VirtIO device specific config size */
+ u32 cfg_size;
+ /* VirtIO device specific config address */
+ void __iomem *virtio_cfg;
+} __packed;
+
+struct snet_cfg {
+ /* Magic key */
+ u32 key;
+ /* Size of total config in bytes */
+ u32 cfg_size;
+ /* Config version */
+ u32 cfg_ver;
+ /* Number of Virtual Functions to create */
+ u32 vf_num;
+ /* BAR to use for the VFs */
+ u32 vf_bar;
+ /* Where should we write the SNET's config */
+ u32 host_cfg_off;
+ /* Max. allowed size for a SNET's config */
+ u32 max_size_host_cfg;
+ /* VirtIO config offset in BAR */
+ u32 virtio_cfg_off;
+ /* Offset in PCI BAR for VQ kicks */
+ u32 kick_off;
+ /* Offset in PCI BAR for HW monitoring */
+ u32 hwmon_off;
+ /* Offset in PCI BAR for Control mechanism */
+ u32 ctrl_off;
+ /* Config general flags - enum snet_cfg_flags */
+ u32 flags;
+ /* Reserved for future usage */
+ u32 rsvd[6];
+ /* Number of snet devices */
+ u32 devices_num;
+ /* The actual devices */
+ struct snet_dev_cfg **devs;
+} __packed;
+
+/* SolidNET PCIe device, one device per PCIe physical function */
+struct psnet {
+ /* PCI BARs */
+ void __iomem *bars[PCI_STD_NUM_BARS];
+ /* Negotiated config version */
+ u32 negotiated_cfg_ver;
+ /* Next IRQ index to use in case when the IRQs are allocated from this device */
+ u32 next_irq;
+ /* BAR number used to communicate with the device */
+ u8 barno;
+ /* spinlock to protect data that can be changed by SNET devices */
+ spinlock_t lock;
+ /* Pointer to the device's config read from BAR */
+ struct snet_cfg cfg;
+ /* Name of monitor device */
+ char hwmon_name[SNET_NAME_SIZE];
+};
+
+enum snet_cfg_flags {
+ /* Create a HWMON device */
+ SNET_CFG_FLAG_HWMON = BIT(0),
+ /* USE IRQs from the physical function */
+ SNET_CFG_FLAG_IRQ_PF = BIT(1),
+};
+
+#define PSNET_FLAG_ON(p, f) ((p)->cfg.flags & (f))
+
+static inline u32 psnet_read32(struct psnet *psnet, u32 off)
+{
+ return ioread32(psnet->bars[psnet->barno] + off);
+}
+
+static inline u32 snet_read32(struct snet *snet, u32 off)
+{
+ return ioread32(snet->bar + off);
+}
+
+static inline void snet_write32(struct snet *snet, u32 off, u32 val)
+{
+ iowrite32(val, snet->bar + off);
+}
+
+static inline u64 psnet_read64(struct psnet *psnet, u32 off)
+{
+ u64 val;
+ /* 64bits are written in 2 halves, low part first */
+ val = (u64)psnet_read32(psnet, off);
+ val |= ((u64)psnet_read32(psnet, off + 4) << 32);
+ return val;
+}
+
+static inline void snet_write64(struct snet *snet, u32 off, u64 val)
+{
+ /* The DPU expects a 64bit integer in 2 halves, the low part first */
+ snet_write32(snet, off, (u32)val);
+ snet_write32(snet, off + 4, (u32)(val >> 32));
+}
+
+#if IS_ENABLED(CONFIG_HWMON)
+void psnet_create_hwmon(struct pci_dev *pdev);
+#endif
+
+void snet_ctrl_clear(struct snet *snet);
+int snet_destroy_dev(struct snet *snet);
+int snet_read_vq_state(struct snet *snet, u16 idx, struct vdpa_vq_state *state);
+int snet_suspend_dev(struct snet *snet);
+
+#endif //_SNET_VDPA_H_
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index febdc99b51a7..965e32529eb8 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -39,6 +39,11 @@ static int vdpa_dev_probe(struct device *d)
u32 max_num, min_num = 1;
int ret = 0;
+ d->dma_mask = &d->coherent_dma_mask;
+ ret = dma_set_mask_and_coherent(d, DMA_BIT_MASK(64));
+ if (ret)
+ return ret;
+
max_num = ops->get_vq_num_max(vdev);
if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdev);
@@ -460,12 +465,28 @@ static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mg
return 0;
}
+static u64 vdpa_mgmtdev_get_classes(const struct vdpa_mgmt_dev *mdev,
+ unsigned int *nclasses)
+{
+ u64 supported_classes = 0;
+ unsigned int n = 0;
+
+ for (int i = 0; mdev->id_table[i].device; i++) {
+ if (mdev->id_table[i].device > 63)
+ continue;
+ supported_classes |= BIT_ULL(mdev->id_table[i].device);
+ n++;
+ }
+ if (nclasses)
+ *nclasses = n;
+
+ return supported_classes;
+}
+
static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg,
u32 portid, u32 seq, int flags)
{
- u64 supported_classes = 0;
void *hdr;
- int i = 0;
int err;
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW);
@@ -475,14 +496,9 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m
if (err)
goto msg_err;
- while (mdev->id_table[i].device) {
- if (mdev->id_table[i].device <= 63)
- supported_classes |= BIT_ULL(mdev->id_table[i].device);
- i++;
- }
-
if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES,
- supported_classes, VDPA_ATTR_UNSPEC)) {
+ vdpa_mgmtdev_get_classes(mdev, NULL),
+ VDPA_ATTR_UNSPEC)) {
err = -EMSGSIZE;
goto msg_err;
}
@@ -566,13 +582,25 @@ out:
BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | \
BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP))
+/*
+ * Bitmask for all per-device features: feature bits VIRTIO_TRANSPORT_F_START
+ * through VIRTIO_TRANSPORT_F_END are unset, i.e. 0xfffffc000fffffff for
+ * all 64bit features. If the features are extended beyond 64 bits, or new
+ * "holes" are reserved for other type of features than per-device, this
+ * macro would have to be updated.
+ */
+#define VIRTIO_DEVICE_F_MASK (~0ULL << (VIRTIO_TRANSPORT_F_END + 1) | \
+ ((1ULL << VIRTIO_TRANSPORT_F_START) - 1))
+
static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
{
struct vdpa_dev_set_config config = {};
struct nlattr **nl_attrs = info->attrs;
struct vdpa_mgmt_dev *mdev;
+ unsigned int ncls = 0;
const u8 *macaddr;
const char *name;
+ u64 classes;
int err = 0;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
@@ -601,8 +629,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
}
if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) {
+ u64 missing = 0x0ULL;
+
config.device_features =
nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]);
+ if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR] &&
+ !(config.device_features & BIT_ULL(VIRTIO_NET_F_MAC)))
+ missing |= BIT_ULL(VIRTIO_NET_F_MAC);
+ if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU] &&
+ !(config.device_features & BIT_ULL(VIRTIO_NET_F_MTU)))
+ missing |= BIT_ULL(VIRTIO_NET_F_MTU);
+ if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP] &&
+ config.net.max_vq_pairs > 1 &&
+ !(config.device_features & BIT_ULL(VIRTIO_NET_F_MQ)))
+ missing |= BIT_ULL(VIRTIO_NET_F_MQ);
+ if (missing) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "Missing features 0x%llx for provided attributes",
+ missing);
+ return -EINVAL;
+ }
config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
}
@@ -622,13 +668,33 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
err = PTR_ERR(mdev);
goto err;
}
+
if ((config.mask & mdev->config_attr_mask) != config.mask) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "All provided attributes are not supported");
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "Some provided attributes are not supported: 0x%llx",
+ config.mask & ~mdev->config_attr_mask);
err = -EOPNOTSUPP;
goto err;
}
+ classes = vdpa_mgmtdev_get_classes(mdev, &ncls);
+ if (config.mask & VDPA_DEV_NET_ATTRS_MASK &&
+ !(classes & BIT_ULL(VIRTIO_ID_NET))) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Network class attributes provided on unsupported management device");
+ err = -EINVAL;
+ goto err;
+ }
+ if (!(config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
+ config.mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES) &&
+ classes & BIT_ULL(VIRTIO_ID_NET) && ncls > 1 &&
+ config.device_features & VIRTIO_DEVICE_F_MASK) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Management device supports multi-class while device features specified are ambiguous");
+ err = -EINVAL;
+ goto err;
+ }
+
err = mdev->ops->dev_add(mdev, name, &config);
err:
up_write(&vdpa_dev_lock);
@@ -841,21 +907,28 @@ static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features,
sizeof(config->mac), config->mac);
}
+static int vdpa_dev_net_status_config_fill(struct sk_buff *msg, u64 features,
+ const struct virtio_net_config *config)
+{
+ u16 val_u16;
+
+ if ((features & BIT_ULL(VIRTIO_NET_F_STATUS)) == 0)
+ return 0;
+
+ val_u16 = __virtio16_to_cpu(true, config->status);
+ return nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16);
+}
+
static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
{
struct virtio_net_config config = {};
u64 features_device;
- u16 val_u16;
vdev->config->get_config(vdev, 0, &config, sizeof(config));
- val_u16 = __virtio16_to_cpu(true, config.status);
- if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
- return -EMSGSIZE;
-
features_device = vdev->config->get_device_features(vdev);
- if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device,
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device,
VDPA_ATTR_PAD))
return -EMSGSIZE;
@@ -865,6 +938,9 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
if (vdpa_dev_net_mac_config_fill(msg, features_device, &config))
return -EMSGSIZE;
+ if (vdpa_dev_net_status_config_fill(msg, features_device, &config))
+ return -EMSGSIZE;
+
return vdpa_dev_net_mq_config_fill(msg, features_device, &config);
}
@@ -935,7 +1011,6 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
{
struct virtio_net_config config = {};
u64 features;
- u16 max_vqp;
u8 status;
int err;
@@ -946,15 +1021,15 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
}
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
- max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
- if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
- return -EMSGSIZE;
-
features = vdev->config->get_driver_features(vdev);
if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES,
features, VDPA_ATTR_PAD))
return -EMSGSIZE;
+ err = vdpa_dev_net_mq_config_fill(msg, features, &config);
+ if (err)
+ return err;
+
if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index))
return -EMSGSIZE;
@@ -1012,7 +1087,7 @@ static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev,
switch (device_id) {
case VIRTIO_ID_NET:
if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
- NL_SET_ERR_MSG_MOD(info->extack, "queue index excceeds max value");
+ NL_SET_ERR_MSG_MOD(info->extack, "queue index exceeds max value");
err = -ERANGE;
break;
}
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index b071f0d842fb..d343af4fa60e 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -11,13 +11,12 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
+#include <linux/kthread.h>
#include <linux/slab.h>
-#include <linux/sched.h>
#include <linux/dma-map-ops.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
-#include <linux/iova.h>
#include <uapi/linux/vdpa.h>
#include "vdpa_sim.h"
@@ -36,20 +35,47 @@ module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
+static bool use_va = true;
+module_param(use_va, bool, 0444);
+MODULE_PARM_DESC(use_va, "Enable/disable the device's ability to use VA");
+
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256
#define VDPASIM_VENDOR_ID 0
-static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
+struct vdpasim_mm_work {
+ struct kthread_work work;
+ struct vdpasim *vdpasim;
+ struct mm_struct *mm_to_bind;
+ int ret;
+};
+
+static void vdpasim_mm_work_fn(struct kthread_work *work)
{
- return container_of(vdpa, struct vdpasim, vdpa);
+ struct vdpasim_mm_work *mm_work =
+ container_of(work, struct vdpasim_mm_work, work);
+ struct vdpasim *vdpasim = mm_work->vdpasim;
+
+ mm_work->ret = 0;
+
+ //TODO: should we attach the cgroup of the mm owner?
+ vdpasim->mm_bound = mm_work->mm_to_bind;
}
-static struct vdpasim *dev_to_sim(struct device *dev)
+static void vdpasim_worker_change_mm_sync(struct vdpasim *vdpasim,
+ struct vdpasim_mm_work *mm_work)
{
- struct vdpa_device *vdpa = dev_to_vdpa(dev);
+ struct kthread_work *work = &mm_work->work;
+
+ kthread_init_work(work, vdpasim_mm_work_fn);
+ kthread_queue_work(vdpasim->worker, work);
+
+ kthread_flush_work(work);
+}
- return vdpa_to_sim(vdpa);
+static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct vdpasim, vdpa);
}
static void vdpasim_vq_notify(struct vringh *vring)
@@ -66,15 +92,34 @@ static void vdpasim_vq_notify(struct vringh *vring)
static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
{
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ uint16_t last_avail_idx = vq->vring.last_avail_idx;
+ struct vring_desc *desc = (struct vring_desc *)
+ (uintptr_t)vq->desc_addr;
+ struct vring_avail *avail = (struct vring_avail *)
+ (uintptr_t)vq->driver_addr;
+ struct vring_used *used = (struct vring_used *)
+ (uintptr_t)vq->device_addr;
+
+ if (use_va && vdpasim->mm_bound) {
+ vringh_init_iotlb_va(&vq->vring, vdpasim->features, vq->num,
+ true, desc, avail, used);
+ } else {
+ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num,
+ true, desc, avail, used);
+ }
- vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
- VDPASIM_QUEUE_MAX, false,
- (struct vring_desc *)(uintptr_t)vq->desc_addr,
- (struct vring_avail *)
- (uintptr_t)vq->driver_addr,
- (struct vring_used *)
- (uintptr_t)vq->device_addr);
-
+ vq->vring.last_avail_idx = last_avail_idx;
+
+ /*
+ * Since vdpa_sim does not support receive inflight descriptors as a
+ * destination of a migration, let's set both avail_idx and used_idx
+ * the same at vq start. This is how vhost-user works in a
+ * VHOST_SET_VRING_BASE call.
+ *
+ * Although the simple fix is to set last_used_idx at
+ * vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready.
+ */
+ vq->vring.last_used_idx = last_avail_idx;
vq->vring.notify = vdpasim_vq_notify;
}
@@ -105,8 +150,12 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
&vdpasim->iommu_lock);
}
- for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ for (i = 0; i < vdpasim->dev_attr.nas; i++) {
vhost_iotlb_reset(&vdpasim->iommu[i]);
+ vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX,
+ 0, VHOST_MAP_RW);
+ vdpasim->iommu_pt[i] = true;
+ }
vdpasim->running = true;
spin_unlock(&vdpasim->iommu_lock);
@@ -116,144 +165,40 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
++vdpasim->generation;
}
-static int dir_to_perm(enum dma_data_direction dir)
-{
- int perm = -EFAULT;
-
- switch (dir) {
- case DMA_FROM_DEVICE:
- perm = VHOST_MAP_WO;
- break;
- case DMA_TO_DEVICE:
- perm = VHOST_MAP_RO;
- break;
- case DMA_BIDIRECTIONAL:
- perm = VHOST_MAP_RW;
- break;
- default:
- break;
- }
-
- return perm;
-}
-
-static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
- size_t size, unsigned int perm)
-{
- struct iova *iova;
- dma_addr_t dma_addr;
- int ret;
-
- /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
- iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova),
- ULONG_MAX - 1, true);
- if (!iova)
- return DMA_MAPPING_ERROR;
-
- dma_addr = iova_dma_addr(&vdpasim->iova, iova);
-
- spin_lock(&vdpasim->iommu_lock);
- ret = vhost_iotlb_add_range(&vdpasim->iommu[0], (u64)dma_addr,
- (u64)dma_addr + size - 1, (u64)paddr, perm);
- spin_unlock(&vdpasim->iommu_lock);
-
- if (ret) {
- __free_iova(&vdpasim->iova, iova);
- return DMA_MAPPING_ERROR;
- }
-
- return dma_addr;
-}
-
-static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
- size_t size)
-{
- spin_lock(&vdpasim->iommu_lock);
- vhost_iotlb_del_range(&vdpasim->iommu[0], (u64)dma_addr,
- (u64)dma_addr + size - 1);
- spin_unlock(&vdpasim->iommu_lock);
-
- free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr));
-}
-
-static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct vdpasim *vdpasim = dev_to_sim(dev);
- phys_addr_t paddr = page_to_phys(page) + offset;
- int perm = dir_to_perm(dir);
-
- if (perm < 0)
- return DMA_MAPPING_ERROR;
-
- return vdpasim_map_range(vdpasim, paddr, size, perm);
-}
-
-static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct vdpasim *vdpasim = dev_to_sim(dev);
-
- vdpasim_unmap_range(vdpasim, dma_addr, size);
-}
+static const struct vdpa_config_ops vdpasim_config_ops;
+static const struct vdpa_config_ops vdpasim_batch_config_ops;
-static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
+static void vdpasim_work_fn(struct kthread_work *work)
{
- struct vdpasim *vdpasim = dev_to_sim(dev);
- phys_addr_t paddr;
- void *addr;
+ struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ struct mm_struct *mm = vdpasim->mm_bound;
- addr = kmalloc(size, flag);
- if (!addr) {
- *dma_addr = DMA_MAPPING_ERROR;
- return NULL;
+ if (use_va && mm) {
+ if (!mmget_not_zero(mm))
+ return;
+ kthread_use_mm(mm);
}
- paddr = virt_to_phys(addr);
+ vdpasim->dev_attr.work_fn(vdpasim);
- *dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW);
- if (*dma_addr == DMA_MAPPING_ERROR) {
- kfree(addr);
- return NULL;
+ if (use_va && mm) {
+ kthread_unuse_mm(mm);
+ mmput(mm);
}
-
- return addr;
}
-static void vdpasim_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- struct vdpasim *vdpasim = dev_to_sim(dev);
-
- vdpasim_unmap_range(vdpasim, dma_addr, size);
-
- kfree(vaddr);
-}
-
-static const struct dma_map_ops vdpasim_dma_ops = {
- .map_page = vdpasim_map_page,
- .unmap_page = vdpasim_unmap_page,
- .alloc = vdpasim_alloc_coherent,
- .free = vdpasim_free_coherent,
-};
-
-static const struct vdpa_config_ops vdpasim_config_ops;
-static const struct vdpa_config_ops vdpasim_batch_config_ops;
-
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
const struct vdpa_dev_set_config *config)
{
const struct vdpa_config_ops *ops;
+ struct vdpa_device *vdpa;
struct vdpasim *vdpasim;
struct device *dev;
int i, ret = -ENOMEM;
+ if (!dev_attr->alloc_size)
+ return ERR_PTR(-EINVAL);
+
if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
if (config->device_features &
~dev_attr->supported_features)
@@ -267,24 +212,31 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
else
ops = &vdpasim_config_ops;
- vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
- dev_attr->ngroups, dev_attr->nas,
- dev_attr->name, false);
- if (IS_ERR(vdpasim)) {
- ret = PTR_ERR(vdpasim);
+ vdpa = __vdpa_alloc_device(NULL, ops,
+ dev_attr->ngroups, dev_attr->nas,
+ dev_attr->alloc_size,
+ dev_attr->name, use_va);
+ if (IS_ERR(vdpa)) {
+ ret = PTR_ERR(vdpa);
goto err_alloc;
}
+ vdpasim = vdpa_to_sim(vdpa);
vdpasim->dev_attr = *dev_attr;
- INIT_WORK(&vdpasim->work, dev_attr->work_fn);
- spin_lock_init(&vdpasim->lock);
+ dev = &vdpasim->vdpa.dev;
+
+ kthread_init_work(&vdpasim->work, vdpasim_work_fn);
+ vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s",
+ dev_attr->name);
+ if (IS_ERR(vdpasim->worker))
+ goto err_iommu;
+
+ mutex_init(&vdpasim->mutex);
spin_lock_init(&vdpasim->iommu_lock);
- dev = &vdpasim->vdpa.dev;
dev->dma_mask = &dev->coherent_dma_mask;
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
goto err_iommu;
- set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
@@ -301,24 +253,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
if (!vdpasim->iommu)
goto err_iommu;
+ vdpasim->iommu_pt = kmalloc_array(vdpasim->dev_attr.nas,
+ sizeof(*vdpasim->iommu_pt), GFP_KERNEL);
+ if (!vdpasim->iommu_pt)
+ goto err_iommu;
+
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
- vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
- if (!vdpasim->buffer)
- goto err_iommu;
-
for (i = 0; i < dev_attr->nvqs; i++)
vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
&vdpasim->iommu_lock);
- ret = iova_cache_get();
- if (ret)
- goto err_iommu;
-
- /* For simplicity we use an IOVA allocator with byte granularity */
- init_iova_domain(&vdpasim->iova, 1, 0);
-
vdpasim->vdpa.dma_dev = dev;
return vdpasim;
@@ -330,6 +276,12 @@ err_alloc:
}
EXPORT_SYMBOL_GPL(vdpasim_create);
+void vdpasim_schedule_work(struct vdpasim *vdpasim)
+{
+ kthread_queue_work(vdpasim->worker, &vdpasim->work);
+}
+EXPORT_SYMBOL_GPL(vdpasim_schedule_work);
+
static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 desc_area, u64 driver_area,
u64 device_area)
@@ -357,8 +309,14 @@ static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+ if (!vdpasim->running &&
+ (vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ vdpasim->pending_kick = true;
+ return;
+ }
+
if (vq->ready)
- schedule_work(&vdpasim->work);
+ vdpasim_schedule_work(vdpasim);
}
static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
@@ -377,13 +335,13 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready)
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
bool old_ready;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
old_ready = vq->ready;
vq->ready = ready;
if (vq->ready && !old_ready) {
vdpasim_queue_ready(vdpasim, idx);
}
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
}
static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
@@ -401,9 +359,9 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
struct vringh *vrh = &vq->vring;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vrh->last_avail_idx = state->split.avail_index;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -419,6 +377,18 @@ static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx,
return 0;
}
+static int vdpasim_get_vq_stats(struct vdpa_device *vdpa, u16 idx,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ if (vdpasim->dev_attr.get_stats)
+ return vdpasim->dev_attr.get_stats(vdpasim, idx,
+ msg, extack);
+ return -EOPNOTSUPP;
+}
+
static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
{
return VDPASIM_QUEUE_ALIGN;
@@ -488,9 +458,9 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
u8 status;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
status = vdpasim->status;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return status;
}
@@ -499,19 +469,19 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->status = status;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
}
static int vdpasim_reset(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->status = 0;
vdpasim_do_reset(vdpasim);
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -520,9 +490,30 @@ static int vdpasim_suspend(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
vdpasim->running = false;
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
+
+ return 0;
+}
+
+static int vdpasim_resume(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ int i;
+
+ mutex_lock(&vdpasim->mutex);
+ vdpasim->running = true;
+
+ if (vdpasim->pending_kick) {
+ /* Process pending descriptors */
+ for (i = 0; i < vdpasim->dev_attr.nvqs; ++i)
+ vdpasim_kick_vq(vdpa, i);
+
+ vdpasim->pending_kick = false;
+ }
+
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -594,14 +585,14 @@ static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
iommu = &vdpasim->iommu[asid];
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
if (vdpasim_get_vq_group(vdpa, i) == group)
vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu,
&vdpasim->iommu_lock);
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
return 0;
}
@@ -622,6 +613,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
iommu = &vdpasim->iommu[asid];
vhost_iotlb_reset(iommu);
+ vdpasim->iommu_pt[asid] = false;
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
@@ -639,6 +631,30 @@ err:
return ret;
}
+static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_mm_work mm_work;
+
+ mm_work.vdpasim = vdpasim;
+ mm_work.mm_to_bind = mm;
+
+ vdpasim_worker_change_mm_sync(vdpasim, &mm_work);
+
+ return mm_work.ret;
+}
+
+static void vdpasim_unbind_mm(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ struct vdpasim_mm_work mm_work;
+
+ mm_work.vdpasim = vdpasim;
+ mm_work.mm_to_bind = NULL;
+
+ vdpasim_worker_change_mm_sync(vdpasim, &mm_work);
+}
+
static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
u64 iova, u64 size,
u64 pa, u32 perm, void *opaque)
@@ -650,6 +666,10 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
return -EINVAL;
spin_lock(&vdpasim->iommu_lock);
+ if (vdpasim->iommu_pt[asid]) {
+ vhost_iotlb_reset(&vdpasim->iommu[asid]);
+ vdpasim->iommu_pt[asid] = false;
+ }
ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova,
iova + size - 1, pa, perm, opaque);
spin_unlock(&vdpasim->iommu_lock);
@@ -665,6 +685,11 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
if (asid >= vdpasim->dev_attr.nas)
return -EINVAL;
+ if (vdpasim->iommu_pt[asid]) {
+ vhost_iotlb_reset(&vdpasim->iommu[asid]);
+ vdpasim->iommu_pt[asid] = false;
+ }
+
spin_lock(&vdpasim->iommu_lock);
vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1);
spin_unlock(&vdpasim->iommu_lock);
@@ -677,20 +702,20 @@ static void vdpasim_free(struct vdpa_device *vdpa)
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int i;
- cancel_work_sync(&vdpasim->work);
+ kthread_cancel_work_sync(&vdpasim->work);
+ kthread_destroy_worker(vdpasim->worker);
for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
}
- if (vdpa_get_dma_dev(vdpa)) {
- put_iova_domain(&vdpasim->iova);
- iova_cache_put();
- }
+ vdpasim->dev_attr.free(vdpasim);
- kvfree(vdpasim->buffer);
- vhost_iotlb_free(vdpasim->iommu);
+ for (i = 0; i < vdpasim->dev_attr.nas; i++)
+ vhost_iotlb_reset(&vdpasim->iommu[i]);
+ kfree(vdpasim->iommu);
+ kfree(vdpasim->iommu_pt);
kfree(vdpasim->vqs);
kfree(vdpasim->config);
}
@@ -703,6 +728,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_vq_ready = vdpasim_set_vq_ready,
.get_vq_ready = vdpasim_get_vq_ready,
.set_vq_state = vdpasim_set_vq_state,
+ .get_vendor_vq_stats = vdpasim_get_vq_stats,
.get_vq_state = vdpasim_get_vq_state,
.get_vq_align = vdpasim_get_vq_align,
.get_vq_group = vdpasim_get_vq_group,
@@ -717,6 +743,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
.suspend = vdpasim_suspend,
+ .resume = vdpasim_resume,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
@@ -725,6 +752,8 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_group_asid = vdpasim_set_group_asid,
.dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap,
+ .bind_mm = vdpasim_bind_mm,
+ .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free,
};
@@ -736,6 +765,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.set_vq_ready = vdpasim_set_vq_ready,
.get_vq_ready = vdpasim_get_vq_ready,
.set_vq_state = vdpasim_set_vq_state,
+ .get_vendor_vq_stats = vdpasim_get_vq_stats,
.get_vq_state = vdpasim_get_vq_state,
.get_vq_align = vdpasim_get_vq_align,
.get_vq_group = vdpasim_get_vq_group,
@@ -750,6 +780,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.set_status = vdpasim_set_status,
.reset = vdpasim_reset,
.suspend = vdpasim_suspend,
+ .resume = vdpasim_resume,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
@@ -757,6 +788,8 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_iova_range = vdpasim_get_iova_range,
.set_group_asid = vdpasim_set_group_asid,
.set_map = vdpasim_set_map,
+ .bind_mm = vdpasim_bind_mm,
+ .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free,
};
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 0e78737dcc16..bb137e479763 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -37,42 +37,49 @@ struct vdpasim_dev_attr {
struct vdpa_mgmt_dev *mgmt_dev;
const char *name;
u64 supported_features;
+ size_t alloc_size;
size_t config_size;
- size_t buffer_size;
int nvqs;
u32 id;
u32 ngroups;
u32 nas;
- work_func_t work_fn;
+ void (*work_fn)(struct vdpasim *vdpasim);
void (*get_config)(struct vdpasim *vdpasim, void *config);
void (*set_config)(struct vdpasim *vdpasim, const void *config);
+ int (*get_stats)(struct vdpasim *vdpasim, u16 idx,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack);
+ void (*free)(struct vdpasim *vdpasim);
};
/* State of each vdpasim device */
struct vdpasim {
struct vdpa_device vdpa;
struct vdpasim_virtqueue *vqs;
- struct work_struct work;
+ struct kthread_worker *worker;
+ struct kthread_work work;
+ struct mm_struct *mm_bound;
struct vdpasim_dev_attr dev_attr;
- /* spinlock to synchronize virtqueue state */
- spinlock_t lock;
+ /* mutex to synchronize virtqueue state */
+ struct mutex mutex;
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
- struct iova_domain iova;
- void *buffer;
+ bool *iommu_pt;
u32 status;
u32 generation;
u64 features;
u32 groups;
bool running;
+ bool pending_kick;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
const struct vdpa_dev_set_config *config);
+void vdpasim_schedule_work(struct vdpasim *vdpasim);
/* TODO: cross-endian support */
static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index c6db1a1baf76..00d7d72713be 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/blkdev.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
@@ -44,8 +43,39 @@
#define VDPASIM_BLK_AS_NUM 1
#define VDPASIM_BLK_GROUP_NUM 1
+struct vdpasim_blk {
+ struct vdpasim vdpasim;
+ void *buffer;
+ bool shared_backend;
+};
+
+static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
+{
+ return container_of(vdpasim, struct vdpasim_blk, vdpasim);
+}
+
static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
+static bool shared_backend;
+module_param(shared_backend, bool, 0444);
+MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
+
+static void *shared_buffer;
+/* mutex to synchronize shared_buffer access */
+static DEFINE_MUTEX(shared_buffer_mutex);
+
+static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
+{
+ if (blk->shared_backend)
+ mutex_lock(&shared_buffer_mutex);
+}
+
+static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
+{
+ if (blk->shared_backend)
+ mutex_unlock(&shared_buffer_mutex);
+}
+
static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
u64 num_sectors, u64 max_sectors)
{
@@ -79,6 +109,7 @@ static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
struct vdpasim_virtqueue *vq)
{
+ struct vdpasim_blk *blk = sim_to_blk(vdpasim);
size_t pushed = 0, to_pull, to_push;
struct virtio_blk_outhdr hdr;
bool handled = false;
@@ -144,9 +175,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break;
}
+ vdpasim_blk_buffer_lock(blk);
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
- vdpasim->buffer + offset,
- to_push);
+ blk->buffer + offset, to_push);
+ vdpasim_blk_buffer_unlock(blk);
if (bytes < 0) {
dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
@@ -166,9 +198,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break;
}
+ vdpasim_blk_buffer_lock(blk);
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
- vdpasim->buffer + offset,
- to_pull);
+ blk->buffer + offset, to_pull);
+ vdpasim_blk_buffer_unlock(blk);
if (bytes < 0) {
dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
@@ -248,8 +281,10 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
}
if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
- memset(vdpasim->buffer + offset, 0,
+ vdpasim_blk_buffer_lock(blk);
+ memset(blk->buffer + offset, 0,
num_sectors << SECTOR_SHIFT);
+ vdpasim_blk_buffer_unlock(blk);
}
break;
@@ -286,13 +321,12 @@ err:
return handled;
}
-static void vdpasim_blk_work(struct work_struct *work)
+static void vdpasim_blk_work(struct vdpasim *vdpasim)
{
- struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
bool reschedule = false;
int i;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;
@@ -323,10 +357,10 @@ static void vdpasim_blk_work(struct work_struct *work)
}
}
out:
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
if (reschedule)
- schedule_work(&vdpasim->work);
+ vdpasim_schedule_work(vdpasim);
}
static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
@@ -355,6 +389,14 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
}
+static void vdpasim_blk_free(struct vdpasim *vdpasim)
+{
+ struct vdpasim_blk *blk = sim_to_blk(vdpasim);
+
+ if (!blk->shared_backend)
+ kvfree(blk->buffer);
+}
+
static void vdpasim_blk_mgmtdev_release(struct device *dev)
{
}
@@ -368,6 +410,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vdpasim_dev_attr dev_attr = {};
+ struct vdpasim_blk *blk;
struct vdpasim *simdev;
int ret;
@@ -378,15 +421,30 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
dev_attr.nas = VDPASIM_BLK_AS_NUM;
+ dev_attr.alloc_size = sizeof(struct vdpasim_blk);
dev_attr.config_size = sizeof(struct virtio_blk_config);
dev_attr.get_config = vdpasim_blk_get_config;
dev_attr.work_fn = vdpasim_blk_work;
- dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
+ dev_attr.free = vdpasim_blk_free;
simdev = vdpasim_create(&dev_attr, config);
if (IS_ERR(simdev))
return PTR_ERR(simdev);
+ blk = sim_to_blk(simdev);
+ blk->shared_backend = shared_backend;
+
+ if (blk->shared_backend) {
+ blk->buffer = shared_buffer;
+ } else {
+ blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ GFP_KERNEL);
+ if (!blk->buffer) {
+ ret = -ENOMEM;
+ goto put_dev;
+ }
+ }
+
ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
if (ret)
goto put_dev;
@@ -427,13 +485,24 @@ static int __init vdpasim_blk_init(void)
int ret;
ret = device_register(&vdpasim_blk_mgmtdev);
- if (ret)
+ if (ret) {
+ put_device(&vdpasim_blk_mgmtdev);
return ret;
+ }
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
goto parent_err;
+ if (shared_backend) {
+ shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ GFP_KERNEL);
+ if (!shared_buffer) {
+ ret = -ENOMEM;
+ goto parent_err;
+ }
+ }
+
return 0;
parent_err:
@@ -443,6 +512,7 @@ parent_err:
static void __exit vdpasim_blk_exit(void)
{
+ kvfree(shared_buffer);
vdpa_mgmtdev_unregister(&mgmt_dev);
device_unregister(&vdpasim_blk_mgmtdev);
}
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index c3cb225ea469..cfe962911804 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -11,10 +11,10 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/etherdevice.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
+#include <net/netlink.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/vdpa.h>
@@ -27,6 +27,7 @@
#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
(1ULL << VIRTIO_NET_F_MAC) | \
+ (1ULL << VIRTIO_NET_F_STATUS) | \
(1ULL << VIRTIO_NET_F_MTU) | \
(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
(1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR))
@@ -36,6 +37,35 @@
#define VDPASIM_NET_AS_NUM 2
#define VDPASIM_NET_GROUP_NUM 2
+struct vdpasim_dataq_stats {
+ struct u64_stats_sync syncp;
+ u64 pkts;
+ u64 bytes;
+ u64 drops;
+ u64 errors;
+ u64 overruns;
+};
+
+struct vdpasim_cq_stats {
+ struct u64_stats_sync syncp;
+ u64 requests;
+ u64 successes;
+ u64 errors;
+};
+
+struct vdpasim_net{
+ struct vdpasim vdpasim;
+ struct vdpasim_dataq_stats tx_stats;
+ struct vdpasim_dataq_stats rx_stats;
+ struct vdpasim_cq_stats cq_stats;
+ void *buffer;
+};
+
+static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim)
+{
+ return container_of(vdpasim, struct vdpasim_net, vdpasim);
+}
+
static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
{
/* Make sure data is wrote before advancing index */
@@ -58,11 +88,15 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len)
size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) :
sizeof(struct virtio_net_hdr);
struct virtio_net_config *vio_config = vdpasim->config;
+ struct vdpasim_net *net = sim_to_net(vdpasim);
if (len < ETH_ALEN + hdr_len)
return false;
- if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN))
+ if (is_broadcast_ether_addr(net->buffer + hdr_len) ||
+ is_multicast_ether_addr(net->buffer + hdr_len))
+ return true;
+ if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN))
return true;
return false;
@@ -93,9 +127,11 @@ static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
{
struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
+ struct vdpasim_net *net = sim_to_net(vdpasim);
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
struct virtio_net_ctrl_hdr ctrl;
size_t read, write;
+ u64 requests = 0, errors = 0, successes = 0;
int err;
if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
@@ -111,10 +147,13 @@ static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
if (err <= 0)
break;
+ ++requests;
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl,
sizeof(ctrl));
- if (read != sizeof(ctrl))
+ if (read != sizeof(ctrl)) {
+ ++errors;
break;
+ }
switch (ctrl.class) {
case VIRTIO_NET_CTRL_MAC:
@@ -124,6 +163,11 @@ static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
break;
}
+ if (status == VIRTIO_NET_OK)
+ ++successes;
+ else
+ ++errors;
+
/* Make sure data is wrote before advancing index */
smp_wmb();
@@ -141,18 +185,25 @@ static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
cvq->cb(cvq->private);
local_bh_enable();
}
+
+ u64_stats_update_begin(&net->cq_stats.syncp);
+ net->cq_stats.requests += requests;
+ net->cq_stats.errors += errors;
+ net->cq_stats.successes += successes;
+ u64_stats_update_end(&net->cq_stats.syncp);
}
-static void vdpasim_net_work(struct work_struct *work)
+static void vdpasim_net_work(struct vdpasim *vdpasim)
{
- struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
+ struct vdpasim_net *net = sim_to_net(vdpasim);
ssize_t read, write;
- int pkts = 0;
+ u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0;
+ u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0;
int err;
- spin_lock(&vdpasim->lock);
+ mutex_lock(&vdpasim->mutex);
if (!vdpasim->running)
goto out;
@@ -168,14 +219,20 @@ static void vdpasim_net_work(struct work_struct *work)
while (true) {
err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
&txq->head, GFP_ATOMIC);
- if (err <= 0)
+ if (err <= 0) {
+ if (err)
+ ++tx_errors;
break;
+ }
+ ++tx_pkts;
read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
- vdpasim->buffer,
- PAGE_SIZE);
+ net->buffer, PAGE_SIZE);
+
+ tx_bytes += read;
if (!receive_filter(vdpasim, read)) {
+ ++rx_drops;
vdpasim_net_complete(txq, 0);
continue;
}
@@ -183,26 +240,171 @@ static void vdpasim_net_work(struct work_struct *work)
err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
&rxq->head, GFP_ATOMIC);
if (err <= 0) {
+ ++rx_overruns;
vdpasim_net_complete(txq, 0);
break;
}
write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
- vdpasim->buffer, read);
- if (write <= 0)
+ net->buffer, read);
+ if (write <= 0) {
+ ++rx_errors;
break;
+ }
+
+ ++rx_pkts;
+ rx_bytes += write;
vdpasim_net_complete(txq, 0);
vdpasim_net_complete(rxq, write);
- if (++pkts > 4) {
- schedule_work(&vdpasim->work);
+ if (tx_pkts > 4) {
+ vdpasim_schedule_work(vdpasim);
goto out;
}
}
out:
- spin_unlock(&vdpasim->lock);
+ mutex_unlock(&vdpasim->mutex);
+
+ u64_stats_update_begin(&net->tx_stats.syncp);
+ net->tx_stats.pkts += tx_pkts;
+ net->tx_stats.bytes += tx_bytes;
+ net->tx_stats.errors += tx_errors;
+ u64_stats_update_end(&net->tx_stats.syncp);
+
+ u64_stats_update_begin(&net->rx_stats.syncp);
+ net->rx_stats.pkts += rx_pkts;
+ net->rx_stats.bytes += rx_bytes;
+ net->rx_stats.drops += rx_drops;
+ net->rx_stats.errors += rx_errors;
+ net->rx_stats.overruns += rx_overruns;
+ u64_stats_update_end(&net->rx_stats.syncp);
+}
+
+static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack)
+{
+ struct vdpasim_net *net = sim_to_net(vdpasim);
+ u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops;
+ u64 tx_pkts, tx_bytes, tx_errors, tx_drops;
+ u64 cq_requests, cq_successes, cq_errors;
+ unsigned int start;
+ int err = -EMSGSIZE;
+
+ switch(idx) {
+ case 0:
+ do {
+ start = u64_stats_fetch_begin(&net->rx_stats.syncp);
+ rx_pkts = net->rx_stats.pkts;
+ rx_bytes = net->rx_stats.bytes;
+ rx_errors = net->rx_stats.errors;
+ rx_overruns = net->rx_stats.overruns;
+ rx_drops = net->rx_stats.drops;
+ } while (u64_stats_fetch_retry(&net->rx_stats.syncp, start));
+
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "rx packets"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ rx_pkts, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "rx bytes"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ rx_bytes, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "rx errors"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ rx_errors, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "rx overruns"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ rx_overruns, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "rx drops"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ rx_drops, VDPA_ATTR_PAD))
+ break;
+ err = 0;
+ break;
+ case 1:
+ do {
+ start = u64_stats_fetch_begin(&net->tx_stats.syncp);
+ tx_pkts = net->tx_stats.pkts;
+ tx_bytes = net->tx_stats.bytes;
+ tx_errors = net->tx_stats.errors;
+ tx_drops = net->tx_stats.drops;
+ } while (u64_stats_fetch_retry(&net->tx_stats.syncp, start));
+
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "tx packets"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ tx_pkts, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "tx bytes"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ tx_bytes, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "tx errors"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ tx_errors, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "tx drops"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ tx_drops, VDPA_ATTR_PAD))
+ break;
+ err = 0;
+ break;
+ case 2:
+ do {
+ start = u64_stats_fetch_begin(&net->cq_stats.syncp);
+ cq_requests = net->cq_stats.requests;
+ cq_successes = net->cq_stats.successes;
+ cq_errors = net->cq_stats.errors;
+ } while (u64_stats_fetch_retry(&net->cq_stats.syncp, start));
+
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "cvq requests"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ cq_requests, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "cvq successes"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ cq_successes, VDPA_ATTR_PAD))
+ break;
+ if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME,
+ "cvq errors"))
+ break;
+ if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,
+ cq_errors, VDPA_ATTR_PAD))
+ break;
+ err = 0;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
}
static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
@@ -226,6 +428,13 @@ static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
}
+static void vdpasim_net_free(struct vdpasim *vdpasim)
+{
+ struct vdpasim_net *net = sim_to_net(vdpasim);
+
+ kvfree(net->buffer);
+}
+
static void vdpasim_net_mgmtdev_release(struct device *dev)
{
}
@@ -239,6 +448,7 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config)
{
struct vdpasim_dev_attr dev_attr = {};
+ struct vdpasim_net *net;
struct vdpasim *simdev;
int ret;
@@ -249,10 +459,12 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
dev_attr.nas = VDPASIM_NET_AS_NUM;
+ dev_attr.alloc_size = sizeof(struct vdpasim_net);
dev_attr.config_size = sizeof(struct virtio_net_config);
dev_attr.get_config = vdpasim_net_get_config;
dev_attr.work_fn = vdpasim_net_work;
- dev_attr.buffer_size = PAGE_SIZE;
+ dev_attr.get_stats = vdpasim_net_get_stats;
+ dev_attr.free = vdpasim_net_free;
simdev = vdpasim_create(&dev_attr, config);
if (IS_ERR(simdev))
@@ -260,6 +472,23 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
vdpasim_net_setup_config(simdev, config);
+ net = sim_to_net(simdev);
+
+ u64_stats_init(&net->tx_stats.syncp);
+ u64_stats_init(&net->rx_stats.syncp);
+ u64_stats_init(&net->cq_stats.syncp);
+
+ net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!net->buffer) {
+ ret = -ENOMEM;
+ goto reg_err;
+ }
+
+ /*
+ * Initialization must be completed before this call, since it can
+ * connect the device to the vDPA bus, so requests can arrive after
+ * this call.
+ */
ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
if (ret)
goto reg_err;
@@ -305,8 +534,10 @@ static int __init vdpasim_net_init(void)
int ret;
ret = device_register(&vdpasim_net_mgmtdev);
- if (ret)
+ if (ret) {
+ put_device(&vdpasim_net_mgmtdev);
return ret;
+ }
ret = vdpa_mgmtdev_register(&mgmt_dev);
if (ret)
diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c
index e682bc7ee6c9..5e4a77b9bae6 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.c
+++ b/drivers/vdpa/vdpa_user/iova_domain.c
@@ -512,7 +512,7 @@ static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
{
struct vduse_iova_domain *domain = file->private_data;
- vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
+ vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
vma->vm_private_data = domain;
vma->vm_ops = &vduse_domain_mmap_ops;
diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h
index 4e0e50e7ac15..173e979b84a9 100644
--- a/drivers/vdpa/vdpa_user/iova_domain.h
+++ b/drivers/vdpa/vdpa_user/iova_domain.h
@@ -14,7 +14,6 @@
#include <linux/iova.h>
#include <linux/dma-mapping.h>
#include <linux/vhost_iotlb.h>
-#include <linux/rwlock.h>
#define IOVA_START_PFN 1
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 35dceee3ed56..5f5c21674fdc 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -37,10 +37,15 @@
#define DRV_LICENSE "GPL v2"
#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
+#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
-#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+/* 128 MB reserved for virtqueue creation */
+#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+#define IRQ_UNBOUND -1
+
struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +62,9 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
+ struct cpumask irq_affinity;
+ struct kobject kobj;
};
struct vduse_dev;
@@ -76,7 +84,7 @@ struct vduse_umem {
struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
- struct vduse_virtqueue *vqs;
+ struct vduse_virtqueue **vqs;
struct vduse_iova_domain *domain;
char *name;
struct mutex lock;
@@ -106,6 +114,8 @@ struct vduse_dev {
u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
+ unsigned int bounce_size;
+ struct mutex domain_lock;
};
struct vduse_dev_msg {
@@ -128,6 +138,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -419,7 +430,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
struct vduse_iova_domain *domain = dev->domain;
/* The coherent mappings are handled in vduse_dev_free_coherent() */
- if (domain->bounce_map)
+ if (domain && domain->bounce_map)
vduse_domain_reset_bounce_map(domain);
down_write(&dev->rwsem);
@@ -434,7 +445,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
flush_work(&dev->inject);
for (i = 0; i < dev->vq_num; i++) {
- struct vduse_virtqueue *vq = &dev->vqs[i];
+ struct vduse_virtqueue *vq = dev->vqs[i];
vq->ready = false;
vq->desc_addr = 0;
@@ -453,6 +464,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
spin_lock(&vq->irq_lock);
vq->cb.callback = NULL;
vq->cb.private = NULL;
+ vq->cb.trigger = NULL;
spin_unlock(&vq->irq_lock);
flush_work(&vq->inject);
flush_work(&vq->kick);
@@ -466,7 +478,7 @@ static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
u64 device_area)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->desc_addr = desc_area;
vq->driver_addr = driver_area;
@@ -500,7 +512,7 @@ static void vduse_vq_kick_work(struct work_struct *work)
static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (!eventfd_signal_allowed()) {
schedule_work(&vq->kick);
@@ -513,18 +525,19 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
struct vdpa_callback *cb)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
spin_lock(&vq->irq_lock);
vq->cb.callback = cb->callback;
vq->cb.private = cb->private;
+ vq->cb.trigger = cb->trigger;
spin_unlock(&vq->irq_lock);
}
static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->num = num;
}
@@ -533,7 +546,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
u16 idx, bool ready)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
vq->ready = ready;
}
@@ -541,7 +554,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
return vq->ready;
}
@@ -550,7 +563,7 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
const struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
vq->state.packed.last_avail_counter =
@@ -569,7 +582,7 @@ static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
struct vdpa_vq_state *state)
{
struct vduse_dev *dev = vdpa_to_vduse(vdpa);
- struct vduse_virtqueue *vq = &dev->vqs[idx];
+ struct vduse_virtqueue *vq = dev->vqs[idx];
if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
@@ -624,8 +637,8 @@ static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (num_max < dev->vqs[i].num_max)
- num_max = dev->vqs[i].num_max;
+ if (num_max < dev->vqs[i]->num_max)
+ num_max = dev->vqs[i]->num_max;
return num_max;
}
@@ -708,6 +721,23 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
return dev->generation;
}
+static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
+ const struct cpumask *cpu_mask)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+ return 0;
+}
+
+static const struct cpumask *
+vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return &dev->vqs[idx]->irq_affinity;
+}
+
static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +788,8 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_vq_affinity = vduse_vdpa_set_vq_affinity,
+ .get_vq_affinity = vduse_vdpa_get_vq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
.free = vduse_vdpa_free,
@@ -863,7 +895,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev,
return -EINVAL;
index = array_index_nospec(eventfd->index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
if (eventfd->fd >= 0) {
ctx = eventfd_ctx_fdget(eventfd->fd);
if (IS_ERR(ctx))
@@ -889,7 +921,7 @@ static bool vduse_dev_is_ready(struct vduse_dev *dev)
int i;
for (i = 0; i < dev->vq_num; i++)
- if (!dev->vqs[i].num_max)
+ if (!dev->vqs[i]->num_max)
return false;
return true;
@@ -916,8 +948,26 @@ static void vduse_vq_irq_inject(struct work_struct *work)
spin_unlock_irq(&vq->irq_lock);
}
+static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
+{
+ bool signal = false;
+
+ if (!vq->cb.trigger)
+ return false;
+
+ spin_lock_irq(&vq->irq_lock);
+ if (vq->ready && vq->cb.trigger) {
+ eventfd_signal(vq->cb.trigger, 1);
+ signal = true;
+ }
+ spin_unlock_irq(&vq->irq_lock);
+
+ return signal;
+}
+
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +976,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == IRQ_UNBOUND)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -944,6 +998,9 @@ static int vduse_dev_dereg_umem(struct vduse_dev *dev,
goto unlock;
ret = -EINVAL;
+ if (!dev->domain)
+ goto unlock;
+
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
@@ -970,7 +1027,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
unsigned long npages, lock_limit;
int ret;
- if (!dev->domain->bounce_map ||
+ if (!dev->domain || !dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
@@ -1029,6 +1086,22 @@ unlock:
return ret;
}
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+ int curr_cpu = vq->irq_effective_cpu;
+
+ while (true) {
+ curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+ if (cpu_online(curr_cpu))
+ break;
+
+ if (curr_cpu >= nr_cpu_ids)
+ curr_cpu = IRQ_UNBOUND;
+ }
+
+ vq->irq_effective_cpu = curr_cpu;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1044,7 +1117,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
struct vduse_iotlb_entry entry;
struct vhost_iotlb_map *map;
struct vdpa_map_file *map_file;
- struct vduse_iova_domain *domain = dev->domain;
struct file *f = NULL;
ret = -EFAULT;
@@ -1055,8 +1127,13 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (entry.start > entry.last)
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
entry.start, entry.last);
if (map) {
map_file = (struct vdpa_map_file *)map->opaque;
@@ -1066,7 +1143,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
entry.last = map->last;
entry.perm = map->perm;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
ret = -EINVAL;
if (!f)
break;
@@ -1111,7 +1189,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1130,7 +1208,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(config.index, dev->vq_num);
- dev->vqs[index].num_max = config.max_size;
+ dev->vqs[index]->num_max = config.max_size;
ret = 0;
break;
}
@@ -1148,7 +1226,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(vq_info.index, dev->vq_num);
- vq = &dev->vqs[index];
+ vq = dev->vqs[index];
vq_info.desc_addr = vq->desc_addr;
vq_info.driver_addr = vq->driver_addr;
vq_info.device_addr = vq->device_addr;
@@ -1197,8 +1275,14 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (index >= dev->vq_num)
break;
+ ret = 0;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
+ if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev,
+ &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
+ }
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1213,8 +1297,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(umem.reserved)))
break;
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
@@ -1228,15 +1314,15 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
-
+ mutex_lock(&dev->domain_lock);
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
+ mutex_unlock(&dev->domain_lock);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
- struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
@@ -1250,18 +1336,24 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
sizeof(info.reserved)))
break;
- spin_lock(&domain->iotlb_lock);
- map = vhost_iotlb_itree_first(domain->iotlb,
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain) {
+ mutex_unlock(&dev->domain_lock);
+ break;
+ }
+ spin_lock(&dev->domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(dev->domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
- if (domain->bounce_map && map->start == 0 &&
- map->last == domain->bounce_size - 1)
+ if (dev->domain->bounce_map && map->start == 0 &&
+ map->last == dev->domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
- spin_unlock(&domain->iotlb_lock);
+ spin_unlock(&dev->domain->iotlb_lock);
+ mutex_unlock(&dev->domain_lock);
if (!map)
break;
@@ -1284,7 +1376,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;
- vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
+ mutex_unlock(&dev->domain_lock);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1339,6 +1434,151 @@ static const struct file_operations vduse_dev_fops = {
.llseek = noop_llseek,
};
+static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
+{
+ return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
+}
+
+static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
+ const char *buf, size_t count)
+{
+ cpumask_var_t new_value;
+ int ret;
+
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, new_value);
+ if (ret)
+ goto free_mask;
+
+ ret = -EINVAL;
+ if (!cpumask_intersects(new_value, cpu_online_mask))
+ goto free_mask;
+
+ cpumask_copy(&vq->irq_affinity, new_value);
+ ret = count;
+free_mask:
+ free_cpumask_var(new_value);
+ return ret;
+}
+
+struct vq_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
+ ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
+ size_t count);
+};
+
+static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
+
+static struct attribute *vq_attrs[] = {
+ &irq_cb_affinity_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vq);
+
+static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(vq, buf);
+}
+
+static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ struct vq_sysfs_entry *entry = container_of(attr,
+ struct vq_sysfs_entry, attr);
+
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(vq, buf, count);
+}
+
+static const struct sysfs_ops vq_sysfs_ops = {
+ .show = vq_attr_show,
+ .store = vq_attr_store,
+};
+
+static void vq_release(struct kobject *kobj)
+{
+ struct vduse_virtqueue *vq = container_of(kobj,
+ struct vduse_virtqueue, kobj);
+ kfree(vq);
+}
+
+static const struct kobj_type vq_type = {
+ .release = vq_release,
+ .sysfs_ops = &vq_sysfs_ops,
+ .default_groups = vq_groups,
+};
+
+static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
+{
+ int i;
+
+ if (!dev->vqs)
+ return;
+
+ for (i = 0; i < dev->vq_num; i++)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+}
+
+static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
+{
+ int ret, i;
+
+ dev->vq_align = vq_align;
+ dev->vq_num = vq_num;
+ dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+ if (!dev->vqs)
+ return -ENOMEM;
+
+ for (i = 0; i < vq_num; i++) {
+ dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
+ if (!dev->vqs[i]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
+ INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
+ INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
+ spin_lock_init(&dev->vqs[i]->kick_lock);
+ spin_lock_init(&dev->vqs[i]->irq_lock);
+ cpumask_setall(&dev->vqs[i]->irq_affinity);
+
+ kobject_init(&dev->vqs[i]->kobj, &vq_type);
+ ret = kobject_add(&dev->vqs[i]->kobj,
+ &dev->dev->kobj, "vq%d", i);
+ if (ret) {
+ kfree(dev->vqs[i]);
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ while (i--)
+ kobject_put(&dev->vqs[i]->kobj);
+ kfree(dev->vqs);
+ dev->vqs = NULL;
+ return ret;
+}
+
static struct vduse_dev *vduse_dev_create(void)
{
struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1348,6 +1588,7 @@ static struct vduse_dev *vduse_dev_create(void)
mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
+ mutex_init(&dev->domain_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
@@ -1396,8 +1637,9 @@ static int vduse_destroy_dev(char *name)
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config);
- kfree(dev->vqs);
- vduse_domain_destroy(dev->domain);
+ vduse_dev_deinit_vqs(dev);
+ if (dev->domain)
+ vduse_domain_destroy(dev->domain);
kfree(dev->name);
vduse_dev_destroy(dev);
module_put(THIS_MODULE);
@@ -1440,6 +1682,12 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->config_size > PAGE_SIZE)
return false;
+ if (config->vq_num > 0xffff)
+ return false;
+
+ if (!config->name[0])
+ return false;
+
if (!device_is_allowed(config->device_id))
return false;
@@ -1473,8 +1721,48 @@ static ssize_t msg_timeout_store(struct device *device,
static DEVICE_ATTR_RW(msg_timeout);
+static ssize_t bounce_size_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+
+ return sysfs_emit(buf, "%u\n", dev->bounce_size);
+}
+
+static ssize_t bounce_size_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+ unsigned int bounce_size;
+ int ret;
+
+ ret = -EPERM;
+ mutex_lock(&dev->domain_lock);
+ if (dev->domain)
+ goto unlock;
+
+ ret = kstrtouint(buf, 10, &bounce_size);
+ if (ret < 0)
+ goto unlock;
+
+ ret = -EINVAL;
+ if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
+ bounce_size < VDUSE_MIN_BOUNCE_SIZE)
+ goto unlock;
+
+ dev->bounce_size = bounce_size & PAGE_MASK;
+ ret = count;
+unlock:
+ mutex_unlock(&dev->domain_lock);
+ return ret;
+}
+
+static DEVICE_ATTR_RW(bounce_size);
+
static struct attribute *vduse_dev_attrs[] = {
&dev_attr_msg_timeout.attr,
+ &dev_attr_bounce_size.attr,
NULL
};
@@ -1483,7 +1771,7 @@ ATTRIBUTE_GROUPS(vduse_dev);
static int vduse_create_dev(struct vduse_dev_config *config,
void *config_buf, u64 api_version)
{
- int i, ret;
+ int ret;
struct vduse_dev *dev;
ret = -EEXIST;
@@ -1503,26 +1791,9 @@ static int vduse_create_dev(struct vduse_dev_config *config,
if (!dev->name)
goto err_str;
- dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
- VDUSE_BOUNCE_SIZE);
- if (!dev->domain)
- goto err_domain;
-
+ dev->bounce_size = VDUSE_BOUNCE_SIZE;
dev->config = config_buf;
dev->config_size = config->config_size;
- dev->vq_align = config->vq_align;
- dev->vq_num = config->vq_num;
- dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
- if (!dev->vqs)
- goto err_vqs;
-
- for (i = 0; i < dev->vq_num; i++) {
- dev->vqs[i].index = i;
- INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
- INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
- spin_lock_init(&dev->vqs[i].kick_lock);
- spin_lock_init(&dev->vqs[i].irq_lock);
- }
ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
if (ret < 0)
@@ -1537,16 +1808,19 @@ static int vduse_create_dev(struct vduse_dev_config *config,
ret = PTR_ERR(dev->dev);
goto err_dev;
}
+
+ ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
+ if (ret)
+ goto err_vqs;
+
__module_get(THIS_MODULE);
return 0;
+err_vqs:
+ device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
err_dev:
idr_remove(&vduse_idr, dev->minor);
err_idr:
- kfree(dev->vqs);
-err_vqs:
- vduse_domain_destroy(dev->domain);
-err_domain:
kfree(dev->name);
err_str:
vduse_dev_destroy(dev);
@@ -1656,7 +1930,7 @@ static const struct file_operations vduse_ctrl_fops = {
.llseek = noop_llseek,
};
-static char *vduse_devnode(struct device *dev, umode_t *mode)
+static char *vduse_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
}
@@ -1713,9 +1987,23 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (ret)
return ret;
+ mutex_lock(&dev->domain_lock);
+ if (!dev->domain)
+ dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+ dev->bounce_size);
+ mutex_unlock(&dev->domain_lock);
+ if (!dev->domain) {
+ put_device(&dev->vdev->vdpa.dev);
+ return -ENOMEM;
+ }
+
ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
if (ret) {
put_device(&dev->vdev->vdpa.dev);
+ mutex_lock(&dev->domain_lock);
+ vduse_domain_destroy(dev->domain);
+ dev->domain = NULL;
+ mutex_unlock(&dev->domain_lock);
return ret;
}
@@ -1790,7 +2078,7 @@ static int vduse_init(void)
int ret;
struct device *dev;
- vduse_class = class_create(THIS_MODULE, "vduse");
+ vduse_class = class_create("vduse");
if (IS_ERR(vduse_class))
return PTR_ERR(vduse_class);
@@ -1821,12 +2109,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1840,6 +2131,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1859,6 +2152,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index d448db0c4de3..281287fae89f 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -645,9 +645,9 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
struct virtio_pci_modern_device *mdev = NULL;
mdev = vp_vdpa_mgtdev->mdev;
- vp_modern_remove(mdev);
vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev);
- kfree(&vp_vdpa_mgtdev->mgtdev.id_table);
+ vp_modern_remove(mdev);
+ kfree(vp_vdpa_mgtdev->mgtdev.id_table);
kfree(mdev);
kfree(vp_vdpa_mgtdev);
}