aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c9
-rw-r--r--drivers/infiniband/core/cache.c128
-rw-r--r--drivers/infiniband/core/cm.c31
-rw-r--r--drivers/infiniband/core/cma.c53
-rw-r--r--drivers/infiniband/core/cma_configfs.c13
-rw-r--r--drivers/infiniband/core/core_priv.h16
-rw-r--r--drivers/infiniband/core/cq.c4
-rw-r--r--drivers/infiniband/core/device.c70
-rw-r--r--drivers/infiniband/core/iwcm.c4
-rw-r--r--drivers/infiniband/core/mad.c3
-rw-r--r--drivers/infiniband/core/nldev.c10
-rw-r--r--drivers/infiniband/core/rdma_core.c24
-rw-r--r--drivers/infiniband/core/sa_query.c42
-rw-r--r--drivers/infiniband/core/security.c31
-rw-r--r--drivers/infiniband/core/sysfs.c10
-rw-r--r--drivers/infiniband/core/ucma.c49
-rw-r--r--drivers/infiniband/core/umem.c9
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/user_mad.c5
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c29
-rw-r--r--drivers/infiniband/core/uverbs_main.c34
-rw-r--r--drivers/infiniband/core/verbs.c25
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c8
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h8
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c41
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c14
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c5
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c8
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c11
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h2
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c1
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c52
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h7
-rw-r--r--drivers/infiniband/hw/hfi1/init.c38
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c4
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c4
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c3
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c5
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c16
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c8
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c26
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c76
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h3
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c46
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h6
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c37
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c124
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c29
-rw-r--r--drivers/infiniband/hw/mlx4/main.c35
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c14
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c27
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c5
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c55
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c13
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c9
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c19
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c10
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c4
-rw-r--r--drivers/infiniband/hw/qedr/main.c5
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h27
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c161
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c98
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c84
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c76
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c12
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c1
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c3
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c24
101 files changed, 1250 insertions, 795 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bf539c34ccd3..152b3069588a 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
- return skb->len;
+ return 0;
}
static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
@@ -421,16 +421,15 @@ static int addr6_resolve(struct sockaddr *src_sock,
(const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
- int ret;
memset(&fl6, 0, sizeof fl6);
fl6.daddr = dst_in->sin6_addr;
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
- if (ret < 0)
- return ret;
+ dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 18e476b3ced0..bac02a44cc23 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -51,9 +51,8 @@ struct ib_pkey_cache {
struct ib_update_work {
struct work_struct work;
- struct ib_device *device;
- u8 port_num;
- bool enforce_security;
+ struct ib_event event;
+ bool enforce_security;
};
union ib_gid zgid;
@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
event.element.port_num = port;
event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ ib_dispatch_event_clients(&event);
}
static const char * const gid_type_str[] = {
@@ -1386,9 +1385,8 @@ err:
return ret;
}
-static void ib_cache_update(struct ib_device *device,
- u8 port,
- bool enforce_security)
+static int
+ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
@@ -1396,11 +1394,11 @@ static void ib_cache_update(struct ib_device *device,
int ret;
if (!rdma_is_port_valid(device, port))
- return;
+ return -EINVAL;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
- return;
+ return -ENOMEM;
ret = ib_query_port(device, port, tprops);
if (ret) {
@@ -1418,8 +1416,10 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache = kmalloc(struct_size(pkey_cache, table,
tprops->pkey_tbl_len),
GFP_KERNEL);
- if (!pkey_cache)
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
+ }
pkey_cache->table_len = tprops->pkey_tbl_len;
@@ -1451,50 +1451,84 @@ static void ib_cache_update(struct ib_device *device,
kfree(old_pkey_cache);
kfree(tprops);
- return;
+ return 0;
err:
kfree(pkey_cache);
kfree(tprops);
+ return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+ int ret;
+
+ /* Before distributing the cache update event, first sync
+ * the cache.
+ */
+ ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->enforce_security);
+
+ /* GID event is notified already for individual GID entries by
+ * dispatch_gid_change_event(). Hence, notifiy for rest of the
+ * events.
+ */
+ if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+ ib_dispatch_event_clients(&work->event);
+
+ kfree(work);
}
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device,
- work->port_num,
- work->enforce_security);
+ ib_dispatch_event_clients(&work->event);
kfree(work);
}
-static void ib_cache_event(struct ib_event_handler *handler,
- struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+ return (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
{
struct ib_update_work *work;
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_GID_CHANGE) {
- work = kmalloc(sizeof *work, GFP_ATOMIC);
- if (work) {
- INIT_WORK(&work->work, ib_cache_task);
- work->device = event->device;
- work->port_num = event->element.port_num;
- if (event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_GID_CHANGE)
- work->enforce_security = true;
- else
- work->enforce_security = false;
-
- queue_work(ib_wq, &work->work);
- }
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ if (is_cache_update_event(event))
+ INIT_WORK(&work->work, ib_cache_event_task);
+ else
+ INIT_WORK(&work->work, ib_generic_event_task);
+
+ work->event = *event;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+
+ queue_work(ib_wq, &work->work);
}
+EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
@@ -1507,12 +1541,12 @@ int ib_cache_setup_one(struct ib_device *device)
if (err)
return err;
- rdma_for_each_port (device, p)
- ib_cache_update(device, p, true);
+ rdma_for_each_port (device, p) {
+ err = ib_cache_update(device, p, true);
+ if (err)
+ return err;
+ }
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event);
- ib_register_event_handler(&device->cache.event_handler);
return 0;
}
@@ -1534,14 +1568,12 @@ void ib_cache_release_one(struct ib_device *device)
void ib_cache_cleanup_one(struct ib_device *device)
{
- /* The cleanup function unregisters the event handler,
- * waits for all in-progress workqueue elements and cleans
- * up the GID cache. This function should be called after
- * the device was removed from the devices list and all
- * clients were removed, so the cache exists but is
+ /* The cleanup function waits for all in-progress workqueue
+ * elements and cleans up the GID cache. This function should be
+ * called after the device was removed from the devices list and
+ * all clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore.
*/
- ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index da10e6ccb43c..09af96ec41dd 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -597,18 +597,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return 0;
}
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
-{
- int err;
- u32 id;
-
- err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
- xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
-
- cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return err;
-}
-
static u32 cm_local_id(__be32 local_id)
{
return (__force u32) (local_id ^ cm.random_id_operand);
@@ -862,6 +850,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
void *context)
{
struct cm_id_private *cm_id_priv;
+ u32 id;
int ret;
cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -873,9 +862,6 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
- ret = cm_alloc_id(cm_id_priv);
- if (ret)
- goto error;
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
@@ -884,11 +870,20 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
atomic_set(&cm_id_priv->refcount, 1);
+
+ ret = xa_alloc_cyclic_irq(&cm.local_id_table, &id, NULL, xa_limit_32b,
+ &cm.local_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+ xa_store_irq(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+ cm_id_priv, GFP_KERNEL);
+
return &cm_id_priv->id;
error:
kfree(cm_id_priv);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_create_cm_id);
@@ -1228,6 +1223,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
/* Sharing an ib_cm_id with different handlers is not
* supported */
spin_unlock_irqrestore(&cm.lock, flags);
+ ib_destroy_cm_id(cm_id);
return ERR_PTR(-EINVAL);
}
atomic_inc(&cm_id_priv->refcount);
@@ -4399,6 +4395,7 @@ error2:
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+ kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4407,6 +4404,7 @@ error1:
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
free:
kfree(cm_dev);
@@ -4460,6 +4458,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
spin_unlock_irq(&cm.state_lock);
ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
kfree(cm_dev);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a68d0ccf67a4..08d6d7b2d635 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1631,6 +1631,8 @@ static struct rdma_id_private *cma_find_listener(
{
struct rdma_id_private *id_priv, *id_priv_dev;
+ lockdep_assert_held(&lock);
+
if (!bind_list)
return ERR_PTR(-EINVAL);
@@ -1677,6 +1679,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
}
+ mutex_lock(&lock);
/*
* Net namespace might be getting deleted while route lookup,
* cm_id lookup is in progress. Therefore, perform netdevice
@@ -1718,6 +1721,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
+ mutex_unlock(&lock);
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
@@ -2396,9 +2400,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
- goto out;
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
@@ -2472,6 +2477,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
+ lockdep_assert_held(&lock);
+
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
@@ -2910,6 +2917,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
+ route->num_paths = 0;
err1:
kfree(work);
return ret;
@@ -3090,6 +3098,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3116,6 +3125,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
+ atomic_inc(&id_priv->refcount);
cma_init_resolve_addr_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -3152,19 +3162,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0,
+ rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -3234,6 +3251,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
u64 sid, mask;
__be16 port;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
port = htons(bind_list->port);
@@ -3262,6 +3281,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list;
int ret;
+ lockdep_assert_held(&lock);
+
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
@@ -3288,6 +3309,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
struct sockaddr *saddr = cma_src_addr(id_priv);
__be16 dport = cma_port(daddr);
+ lockdep_assert_held(&lock);
+
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
struct sockaddr *cur_saddr = cma_src_addr(cur_id);
@@ -3327,6 +3350,8 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
unsigned int rover;
struct net *net = id_priv->id.route.addr.dev_addr.net;
+ lockdep_assert_held(&lock);
+
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
@@ -3374,6 +3399,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
@@ -3404,6 +3431,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
unsigned short snum;
int ret;
+ lockdep_assert_held(&lock);
+
snum = ntohs(cma_port(cma_src_addr(id_priv)));
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -4709,6 +4738,19 @@ static int __init cma_init(void)
{
int ret;
+ /*
+ * There is a rare lock ordering dependency in cma_netdev_callback()
+ * that only happens when bonding is enabled. Teach lockdep that rtnl
+ * must never be nested under lock so it can find these without having
+ * to test with bonding.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ rtnl_lock();
+ mutex_lock(&lock);
+ mutex_unlock(&lock);
+ rtnl_unlock();
+ }
+
cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
@@ -4735,6 +4777,7 @@ err_ib:
err:
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 3ec2c415bb70..6b3f5b25b6ff 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -322,8 +322,21 @@ fail:
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
static const struct config_item_type cma_subsys_type = {
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index ff9e0d7fb4f3..4c9193d2f894 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -119,6 +119,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
#ifdef CONFIG_CGROUP_RDMA
void ib_device_register_rdmacg(struct ib_device *device);
@@ -307,6 +308,21 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
qp->pd = pd;
qp->uobject = uobj;
qp->real_qp = qp;
+
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+ qp->srq = attr->srq;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->event_handler = attr->event_handler;
+ qp->qp_context = attr->qp_context;
+
+ atomic_set(&qp->usecnt, 0);
+ spin_lock_init(&qp->mr_lock);
+ INIT_LIST_HEAD(&qp->rdma_mrs);
+ INIT_LIST_HEAD(&qp->sig_mrs);
+
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a24c900fbdf6..d17e7af352fd 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -113,7 +113,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
}
/**
- * __ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq_user - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
@@ -193,7 +193,7 @@ out_destroy_cq:
EXPORT_SYMBOL(__ib_alloc_cq_user);
/**
- * ib_free_cq - free a completion queue
+ * ib_free_cq_user - free a completion queue
* @cq: completion queue to free.
* @udata: User data or NULL for kernel object
*/
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 156d210de195..80a1e20a95ae 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -589,6 +589,7 @@ struct ib_device *_ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock);
+ init_rwsem(&device->event_handler_rwsem);
mutex_init(&device->unregistration_lock);
/*
* client_data needs to be alloc because we don't want our mark to be
@@ -896,7 +897,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
@@ -1190,9 +1193,21 @@ static void setup_dma_device(struct ib_device *device)
WARN_ON_ONCE(!parent);
device->dma_device = parent;
}
- /* Setup default max segment size for all IB devices */
- dma_set_max_seg_size(device->dma_device, SZ_2G);
+ if (!device->dev.dma_parms) {
+ if (parent) {
+ /*
+ * The caller did not provide DMA parameters, so
+ * 'parent' probably represents a PCI device. The PCI
+ * core sets the maximum segment size to 64
+ * KB. Increase this parameter to 2 GB.
+ */
+ device->dev.dma_parms = parent->dma_parms;
+ dma_set_max_seg_size(device->dma_device, SZ_2G);
+ } else {
+ WARN_ON_ONCE(true);
+ }
+ }
}
/*
@@ -1300,6 +1315,10 @@ out:
return ret;
}
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
@@ -1365,11 +1384,11 @@ int ib_register_device(struct ib_device *device, const char *name)
* possibility for a parallel unregistration along with this
* error flow. Since we have a refcount here we know any
* parallel flow is stopped in disable_device and will see the
- * NULL pointers, causing the responsibility to
+ * special dealloc_driver pointer, causing the responsibility to
* ib_dealloc_device() to revert back to this thread.
*/
dealloc_fn = device->ops.dealloc_driver;
- device->ops.dealloc_driver = NULL;
+ device->ops.dealloc_driver = prevent_dealloc_device;
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
@@ -1417,7 +1436,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
* Drivers using the new flow may not call ib_dealloc_device except
* in error unwind prior to registration success.
*/
- if (ib_dev->ops.dealloc_driver) {
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
ib_dealloc_device(ib_dev);
}
@@ -1808,17 +1828,15 @@ EXPORT_SYMBOL(ib_set_client_data);
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification). This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
*/
void ib_register_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_register_event_handler);
@@ -1831,35 +1849,23 @@ EXPORT_SYMBOL(ib_register_event_handler);
*/
void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_del(&event_handler->list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_unregister_event_handler);
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
{
- unsigned long flags;
struct ib_event_handler *handler;
- spin_lock_irqsave(&event->device->event_handler_lock, flags);
+ down_read(&event->device->event_handler_rwsem);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
- spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+ up_read(&event->device->event_handler_rwsem);
}
-EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_port - Query IB port attributes
@@ -2229,8 +2235,12 @@ int ib_modify_port(struct ib_device *device,
rc = device->ops.modify_port(device, port_num,
port_modify_mask,
port_modify);
+ else if (rdma_protocol_roce(device, port_num) &&
+ ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
+ (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
+ rc = 0;
else
- rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
+ rc = -EOPNOTSUPP;
return rc;
}
EXPORT_SYMBOL(ib_modify_port);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..570dc526a942 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
{
struct list_head *e, *tmp;
- list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+ list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) {
+ list_del(e);
kfree(list_entry(e, struct iwcm_work, free_list));
+ }
}
static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 9947d16edef2..2284930b5f91 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -639,10 +639,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
- ib_cancel_rmpp_recvs(mad_agent_priv);
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
@@ -2960,6 +2960,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
+ kfree(mad_priv);
ret = -ENOMEM;
break;
}
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 69188cbbd99b..6123780f9602 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -692,6 +692,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -1055,7 +1059,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_get;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
@@ -1069,10 +1073,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
- rdma_restrack_put(res);
if (ret)
goto err_free;
+ rdma_restrack_put(res);
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
@@ -1292,7 +1296,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..d0580eed3bcb 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -160,9 +160,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->context = NULL;
/*
- * For DESTROY the usecnt is held write locked, the caller is expected
- * to put it unlock and put the object when done with it. Only DESTROY
- * can remove the IDR handle.
+ * For DESTROY the usecnt is not changed, the caller is expected to
+ * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
+ * handle.
*/
if (reason != RDMA_REMOVE_DESTROY)
atomic_set(&uobj->usecnt, 0);
@@ -194,7 +194,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
/*
* This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
* sequence. It should only be used from command callbacks. On success the
- * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * caller must pair this with uobj_put_destroy(). This
* version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref.
*/
@@ -205,6 +205,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
down_read(&ufile->hw_destroy_rwsem);
+ /*
+ * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
+ * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
+ * This is because any other concurrent thread can still see the object
+ * in the xarray due to RCU. Leaving it locked ensures nothing else will
+ * touch it.
+ */
ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
if (ret)
goto out_unlock;
@@ -223,7 +230,7 @@ out_unlock:
/*
* uobj_get_destroy destroys the HW object and returns a handle to the uobj
* with a NULL object pointer. The caller must pair this with
- * uverbs_put_destroy.
+ * uobj_put_destroy().
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id, struct uverbs_attr_bundle *attrs)
@@ -257,8 +264,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
-
- rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ uobj_put_destroy(uobj);
return 0;
}
@@ -362,7 +368,7 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
* and the caller is expected to ensure that uverbs_close_fd is never
* done while a call top lookup is possible.
*/
- if (f->f_op != fd_type->fops) {
+ if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
fput(f);
return ERR_PTR(-EBADF);
}
@@ -689,7 +695,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
{
assert_uverbs_usecnt(uobj, mode);
- uobj->uapi_object->type_class->lookup_put(uobj, mode);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
@@ -706,6 +711,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
break;
}
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
/* Pairs with the kref obtained by type->lookup_get */
uverbs_uobject_put(uobj);
}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7d8071c7e564..e3058b9e814b 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
struct ib_sa_mad *mad;
int len;
+ unsigned long flags;
+ unsigned long delay;
+ gfp_t gfp_flag;
+ int ret;
+
+ INIT_LIST_HEAD(&query->list);
+ query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
@@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
-}
+ gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
+ GFP_NOWAIT;
-static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
-{
- unsigned long flags;
- unsigned long delay;
- int ret;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_flag);
- INIT_LIST_HEAD(&query->list);
- query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ if (ret)
+ goto out;
- /* Put the request on the list first.*/
- spin_lock_irqsave(&ib_nl_request_lock, flags);
+ /* Put the request on the list.*/
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query, gfp_mask);
- if (ret) {
- ret = -EIO;
- /* Remove the request */
- spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_del(&query->list);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- }
+out:
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return ret;
}
@@ -1068,7 +1064,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
}
settimeout_out:
- return skb->len;
+ return 0;
}
static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
@@ -1139,7 +1135,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
}
resp_out:
- return skb->len;
+ return 0;
}
static void free_sm_ah(struct kref *kref)
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 6eb6d2717ca5..75e7ec017836 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -339,27 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
if (!new_pps)
return NULL;
- if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
- if (!qp_pps) {
- new_pps->main.port_num = qp_attr->port_num;
- new_pps->main.pkey_index = qp_attr->pkey_index;
- } else {
- new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
- qp_attr->port_num :
- qp_pps->main.port_num;
-
- new_pps->main.pkey_index =
- (qp_attr_mask & IB_QP_PKEY_INDEX) ?
- qp_attr->pkey_index :
- qp_pps->main.pkey_index;
- }
- new_pps->main.state = IB_PORT_PKEY_VALID;
- } else if (qp_pps) {
+ if (qp_attr_mask & IB_QP_PORT)
+ new_pps->main.port_num = qp_attr->port_num;
+ else if (qp_pps)
new_pps->main.port_num = qp_pps->main.port_num;
+
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
- new_pps->main.state = IB_PORT_PKEY_VALID;
- }
+
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
+ new_pps->main.state = IB_PORT_PKEY_VALID;
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c78d0c9646ae..2b9991b37bc8 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1044,8 +1044,7 @@ static int add_port(struct ib_core_device *coredev, int port_num)
coredev->ports_kobj,
"%d", port_num);
if (ret) {
- kfree(p);
- return ret;
+ goto err_put;
}
p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
@@ -1058,8 +1057,7 @@ static int add_port(struct ib_core_device *coredev, int port_num)
ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
&p->kobj, "gid_attrs");
if (ret) {
- kfree(p->gid_attr_group);
- goto err_put;
+ goto err_put_gid_attrs;
}
if (device->ops.process_mad && is_full_dev) {
@@ -1386,8 +1384,10 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
name);
- if (ret)
+ if (ret) {
+ kobject_put(kobj);
return ret;
+ }
}
return 0;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 140a338a135f..176738311020 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -89,6 +89,7 @@ struct ucma_context {
struct ucma_file *file;
struct rdma_cm_id *cm_id;
+ struct mutex mutex;
u64 uid;
struct list_head list;
@@ -215,6 +216,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
ctx->file = file;
+ mutex_init(&ctx->mutex);
mutex_lock(&mut);
ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
@@ -596,6 +598,7 @@ static int ucma_free_ctx(struct ucma_context *ctx)
}
events_reported = ctx->events_reported;
+ mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
@@ -665,7 +668,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
+
ucma_put_ctx(ctx);
return ret;
}
@@ -688,7 +694,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -712,8 +720,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -738,8 +748,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -759,7 +771,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -848,6 +862,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
memset(&resp, 0, sizeof resp);
addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
@@ -871,6 +886,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
+ mutex_unlock(&ctx->mutex);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1022,6 +1038,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
switch (cmd.option) {
case RDMA_USER_CM_QUERY_ADDR:
ret = ucma_query_addr(ctx, response, out_len);
@@ -1036,6 +1053,7 @@ static ssize_t ucma_query(struct ucma_file *file,
ret = -ENOSYS;
break;
}
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1076,7 +1094,9 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
+ mutex_lock(&ctx->mutex);
ret = rdma_connect(ctx->cm_id, &conn_param);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1097,7 +1117,9 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
cmd.backlog : max_backlog;
+ mutex_lock(&ctx->mutex);
ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1120,13 +1142,17 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
+ mutex_lock(&ctx->mutex);
ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
+ mutex_unlock(&ctx->mutex);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
- } else
+ } else {
+ mutex_lock(&ctx->mutex);
ret = __rdma_accept(ctx->cm_id, NULL, NULL);
-
+ mutex_unlock(&ctx->mutex);
+ }
ucma_put_ctx(ctx);
return ret;
}
@@ -1145,7 +1171,9 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1164,7 +1192,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_disconnect(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1195,7 +1225,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
+ mutex_lock(&ctx->mutex);
ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ mutex_unlock(&ctx->mutex);
if (ret)
goto out;
@@ -1281,9 +1313,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
struct sa_path_rec opa;
sa_convert_path_ib_to_opa(&opa, &sa_path);
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &opa);
+ mutex_unlock(&ctx->mutex);
} else {
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
+ mutex_unlock(&ctx->mutex);
}
if (ret)
return ret;
@@ -1316,7 +1352,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
switch (level) {
case RDMA_OPTION_ID:
+ mutex_lock(&ctx->mutex);
ret = ucma_set_option_id(ctx, optname, optval, optlen);
+ mutex_unlock(&ctx->mutex);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
@@ -1376,8 +1414,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
if (ctx->cm_id->device)
ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1420,8 +1460,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+ mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
+ mutex_unlock(&ctx->mutex);
if (ret)
goto err2;
@@ -1525,7 +1567,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
goto out;
}
+ mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&mc->ctx->mutex);
+
mutex_lock(&mc->ctx->file->mut);
ucma_cleanup_mc_events(mc);
list_del(&mc->list);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e7ea819fcb11..b9f1dfb2a37a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -168,10 +168,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
* for any address.
*/
mask |= (sg_dma_address(sg) + pgoff) ^ va;
- if (i && i != (umem->nmap - 1))
- /* restrict by length as well for interior SGEs */
- mask |= sg_dma_len(sg);
va += sg_dma_len(sg) - pgoff;
+ /* Except for the last entry, the ending iova alignment sets
+ * the maximum possible page size as the low bits of the iova
+ * must be zero when starting the next chunk.
+ */
+ if (i != (umem->nmap - 1))
+ mask |= va;
pgoff = 0;
}
best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 5e5f7dd82c50..bab412516b07 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -626,7 +626,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
while (bcnt > 0) {
const size_t gup_num_pages = min_t(size_t,
- (bcnt + BIT(page_shift) - 1) >> page_shift,
+ ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
PAGE_SIZE / sizeof(struct page *));
down_read(&owning_mm->mmap_sem);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 025b6d86a61f..9b9fa953595b 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1274,6 +1274,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
struct ib_umad_file *file;
int id;
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
+
mutex_lock(&port->file_mutex);
/* Mark ib_dev NULL and block ioctl or other file ops to progress
@@ -1293,8 +1296,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- cdev_device_del(&port->sm_cdev, &port->sm_dev);
- cdev_device_del(&port->cdev, &port->dev);
ida_free(&umad_ida, port->dev_num);
/* balances device_initialize() */
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 1e5aeb39f774..63f7f7db5902 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
struct ib_uverbs_device {
atomic_t refcount;
- int num_comp_vectors;
+ u32 num_comp_vectors;
struct completion comp;
struct device dev;
/* First group for device attributes, NULL terminated array */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index df8e8ac2c16b..6273d2430736 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -760,6 +760,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
+ mr->iova = cmd.hca_va;
rdma_restrack_uadd(&mr->res);
uobj->object = mr;
@@ -850,6 +851,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
atomic_dec(&old_pd->usecnt);
}
+ if (cmd.flags & IB_MR_REREG_TRANS)
+ mr->iova = cmd.hca_va;
+
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
@@ -1429,17 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (ret)
goto err_cb;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->rwq_ind_tbl = ind_tbl;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- atomic_set(&qp->usecnt, 0);
atomic_inc(&pd->usecnt);
- qp->port = 0;
if (attr.send_cq)
atomic_inc(&attr.send_cq->usecnt);
if (attr.recv_cq)
@@ -2716,12 +2710,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
return 0;
}
-static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
-{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
-}
-
static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
@@ -2865,11 +2853,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
- ssize_t kern_filter_sz;
+ size_t kern_filter_sz;
void *kern_spec_mask;
void *kern_spec_val;
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+ if (check_sub_overflow((size_t)kern_spec->hdr.size,
+ sizeof(struct ib_uverbs_flow_spec_hdr),
+ &kern_filter_sz))
+ return -EINVAL;
+
+ kern_filter_sz /= 2;
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 84a5e9a6d483..5b7f9d33dd80 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -215,7 +215,6 @@ void ib_uverbs_release_file(struct kref *ref)
}
static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
- struct ib_uverbs_file *uverbs_file,
struct file *filp, char __user *buf,
size_t count, loff_t *pos,
size_t eventsz)
@@ -233,19 +232,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
if (wait_event_interruptible(ev_queue->poll_wait,
(!list_empty(&ev_queue->event_list) ||
- /* The barriers built into wait_event_interruptible()
- * and wake_up() guarentee this will see the null set
- * without using RCU
- */
- !uverbs_file->device->ib_dev)))
+ ev_queue->is_closed)))
return -ERESTARTSYS;
+ spin_lock_irq(&ev_queue->lock);
+
/* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) &&
- !uverbs_file->device->ib_dev)
+ if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
return -EIO;
-
- spin_lock_irq(&ev_queue->lock);
+ }
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
@@ -280,8 +276,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
{
struct ib_uverbs_async_event_file *file = filp->private_data;
- return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
sizeof(struct ib_uverbs_async_event_desc));
}
@@ -291,9 +286,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
- return ib_uverbs_event_read(&comp_ev_file->ev_queue,
- comp_ev_file->uobj.ufile, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+ pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
@@ -308,6 +302,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
@@ -316,7 +312,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
static __poll_t ib_uverbs_async_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
- return ib_uverbs_event_poll(filp->private_data, filp, wait);
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
}
static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
@@ -330,9 +328,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+ struct ib_uverbs_async_event_file *file = filp->private_data;
- return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+ return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
}
static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e666a1f7608d..41caf36575df 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -316,7 +316,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
EXPORT_SYMBOL(__ib_alloc_pd);
/**
- * ib_dealloc_pd - Deallocates a protection domain.
+ * ib_dealloc_pd_user - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
* @udata: Valid user data or NULL for kernel object
*
@@ -661,16 +661,17 @@ static bool find_gid_index(const union ib_gid *gid,
void *context)
{
struct find_gid_index_context *ctx = context;
+ u16 vlan_id = 0xffff;
+ int ret;
if (ctx->gid_type != gid_attr->gid_type)
return false;
- if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
- (is_vlan_dev(gid_attr->ndev) &&
- vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
return false;
- return true;
+ return ctx->vlan_id == vlan_id;
}
static const struct ib_gid_attr *
@@ -1174,16 +1175,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
if (ret)
goto err;
- qp->qp_type = qp_init_attr->qp_type;
- qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
-
- atomic_set(&qp->usecnt, 0);
- qp->mrs_used = 0;
- spin_lock_init(&qp->mr_lock);
- INIT_LIST_HEAD(&qp->rdma_mrs);
- INIT_LIST_HEAD(&qp->sig_mrs);
- qp->port = 0;
-
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
struct ib_qp *xrc_qp =
create_xrc_qp_user(qp, qp_init_attr, udata);
@@ -1650,7 +1641,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (!(rdma_protocol_ib(qp->device,
attr->alt_ah_attr.port_num) &&
rdma_protocol_ib(qp->device, port))) {
- ret = EINVAL;
+ ret = -EINVAL;
goto out;
}
}
@@ -1981,7 +1972,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
EXPORT_SYMBOL(ib_dereg_mr_user);
/**
- * ib_alloc_mr() - Allocates a memory region
+ * ib_alloc_mr_user() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 94559169924f..7d9f44434562 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3346,8 +3346,10 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
int rc;
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc)
+ if (rc) {
dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
if (mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 814f959c7db9..5ef9d5252358 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1268,10 +1268,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
return;
}
rdev->qplib_ctx.hwrm_intf_ver =
- (u64)resp.hwrm_intf_major << 48 |
- (u64)resp.hwrm_intf_minor << 32 |
- (u64)resp.hwrm_intf_build << 16 |
- resp.hwrm_intf_patch;
+ (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(resp.hwrm_intf_patch);
}
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 958c1ff9c515..4d07d22bfa7b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2283,13 +2283,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
/* Add qp to flush list of the CQ */
bnxt_qplib_add_flush_qp(qp);
} else {
+ /* Before we complete, do WA 9060 */
+ if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ cqe_sq_cons)) {
+ *lib_qp = qp;
+ goto out;
+ }
if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
- /* Before we complete, do WA 9060 */
- if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
- cqe_sq_cons)) {
- *lib_qp = qp;
- goto out;
- }
cqe->status = CQ_REQ_STATUS_OK;
cqe++;
(*budget)--;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index fbda11a7ab1a..aaa76d792185 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx {
u8 chip_metal;
};
-#define CHIP_NUM_57500 0x1750
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
struct bnxt_qplib_res {
struct pci_dev *pdev;
@@ -203,7 +205,9 @@ struct bnxt_qplib_res {
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
- return (cctx->chip_num == CHIP_NUM_57500);
+ return (cctx->chip_num == CHIP_NUM_57508 ||
+ cctx->chip_num == CHIP_NUM_57504 ||
+ cctx->chip_num == CHIP_NUM_57502);
}
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f3b1193d5f8..69d5fc3eba45 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
c4iw_put_ep(&ep->parent_ep->com);
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -2421,20 +2419,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
- skb_get(skb);
- rpl = cplhdr(skb);
- if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
-
cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
enable_tcp_timestamps && req->tcpopt.tstamp,
(ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2480,6 +2464,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
+
+ skb_get(skb);
+ rpl = cplhdr(skb);
+ if (!is_t4(adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID_F;
@@ -2890,8 +2888,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
srqidx = ABORT_RSS_SRQIDX_G(
be32_to_cpu(req->srqidx_status));
if (srqidx) {
- complete_cached_srq_buffers(ep,
- req->srqidx_status);
+ complete_cached_srq_buffers(ep, srqidx);
} else {
/* Hold ep ref until finish_peer_abort() */
c4iw_get_ep(&ep->com);
@@ -3035,6 +3032,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
+ /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
+ * when entering the TERM state the RNIC MUST initiate a CLOSE.
+ */
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
@@ -3868,8 +3869,8 @@ static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
- ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
- TCB_RQ_START_S);
+ ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
+ TCB_RQ_START_S);
cleanup:
pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4c0d925c5ff5..1e6f38dea488 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
}
}
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
int space;
int cc;
+ if (id != qp->wq.sq.qid)
+ return 0;
space = qpd->bufsize - qpd->pos - 1;
if (space == 0)
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
xa_lock_irq(&qpd->devp->qps);
xa_for_each(&qpd->devp->qps, index, qp)
- dump_qp(qp, qpd);
+ dump_qp(index, qp, qpd);
xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
@@ -950,6 +953,7 @@ void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
pr_debug("c4iw_dev %p\n", ctx->dev);
+ debugfs_remove_recursive(ctx->dev->debugfs_root);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 69b287d025f7..7af958b3318e 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -398,7 +398,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
mhp->ibmr.length = mhp->attr.len;
- mhp->ibmr.iova = mhp->attr.va_fbo;
mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e92b9544357a..8b587cb2aa55 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1976,10 +1976,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
ep = qhp->ep;
- c4iw_get_ep(&ep->com);
- disconnect = 1;
if (!internal) {
+ c4iw_get_ep(&ep->com);
terminate = 1;
+ disconnect = 1;
} else {
terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
@@ -2754,15 +2754,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
srq->flags = T4_SRQ_LIMIT_SUPPORT;
- ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
- if (ret)
- goto err_free_queue;
-
if (udata) {
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
if (!srq_key_mm) {
ret = -ENOMEM;
- goto err_remove_handle;
+ goto err_free_queue;
}
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
if (!srq_db_key_mm) {
@@ -2806,8 +2802,6 @@ err_free_srq_db_key_mm:
kfree(srq_db_key_mm);
err_free_srq_key_mm:
kfree(srq_key_mm);
-err_remove_handle:
- xa_erase_irq(&rhp->qps, srq->wq.qid);
err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
@@ -2830,8 +2824,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rhp = srq->rhp;
pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
- xa_erase_irq(&rhp->qps, srq->wq.qid);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index a5c788741a04..294733ebf754 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -318,6 +318,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
struct efa_admin_acq_entry *comp,
size_t comp_size_in_bytes)
{
+ struct efa_admin_aq_entry *aqe;
struct efa_comp_ctx *comp_ctx;
u16 queue_size_mask;
u16 ctx_id;
@@ -347,7 +348,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
reinit_completion(&comp_ctx->wait_event);
- memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
+ aqe = &aq->sq.entries[pi];
+ memset(aqe, 0, sizeof(*aqe));
+ memcpy(aqe, cmd, cmd_size_in_bytes);
aq->sq.pc++;
atomic64_inc(&aq->stats.submitted_cmd);
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 4fe662c3bbc1..1aeea5d65c01 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
}
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
return 0;
fail:
@@ -1038,7 +1040,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = &current->cpus_allowed;
+ *proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
@@ -1046,7 +1048,7 @@ int hfi1_get_proc_affinity(int node)
* check whether process/context affinity has already
* been set
*/
- if (cpumask_weight(proc_mask) == 1) {
+ if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -1057,7 +1059,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 67052dc3100c..e510f019d712 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
return dd->verbs_dev.n_piodrain;
}
+static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = context;
+
+ return dd->ctx0_seq_drop;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -4105,6 +4113,7 @@ def_access_ibp_counter(seq_naks);
static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
@@ -4248,6 +4257,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_cpu_intr),
[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
access_sw_cpu_rcv_limit),
+[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
+ access_sw_ctx0_seq_drop),
[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b76cf81f927f..39a57c35af22 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -859,6 +859,7 @@ static inline int idx_from_vl(int vl)
enum {
C_RCV_OVF = 0,
C_RX_LEN_ERR,
+ C_RX_SHORT_ERR,
C_RX_ICRC_ERR,
C_RX_EBP,
C_RX_TID_FULL,
@@ -926,6 +927,7 @@ enum {
C_DC_PG_STS_TX_MBE_CNT,
C_SW_CPU_INTR,
C_SW_CPU_RCV_LIM,
+ C_SW_CTX0_SEQ_DROP,
C_SW_VTX_WAIT,
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ab3589d17aee..fb3ec9bff7a2 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -381,6 +381,7 @@
#define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468)
#define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0)
#define RCV_LENGTH_ERR_CNT 0
+#define RCV_SHORT_ERR_CNT 2
#define RCV_ICRC_ERR_CNT 6
#define RCV_EBP_CNT 9
#define RCV_BUF_OVFL_CNT 10
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 01aa1f132f55..941b465244ab 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -734,6 +734,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
+ packet->rcd->dd->ctx0_seq_drop++;
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f9a7e9d29c8b..89e1dfd07a1b 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
- if (fd) {
- fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- mmgrab(fd->mm);
- fd->dd = dd;
- kobject_get(&fd->dd->kobj);
- fp->private_data = fd;
- } else {
- fp->private_data = NULL;
-
- if (atomic_dec_and_test(&dd->user_refcount))
- complete(&dd->user_comp);
-
- return -ENOMEM;
- }
-
+ if (!fd || init_srcu_struct(&fd->pq_srcu))
+ goto nomem;
+ spin_lock_init(&fd->pq_rcu_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->mm = current->mm;
+ mmgrab(fd->mm);
+ fd->dd = dd;
+ kobject_get(&fd->dd->kobj);
+ fp->private_data = fd;
return 0;
+nomem:
+ kfree(fd);
+ fp->private_data = NULL;
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+ return -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
+ int idx;
- if (!cq || !pq)
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EIO;
+ }
- if (!iter_is_iovec(from) || !dim)
+ if (!iter_is_iovec(from) || !dim) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -EINVAL;
+ }
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -ENOSPC;
+ }
while (dim) {
int ret;
@@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
reqs++;
}
+ srcu_read_unlock(&fd->pq_srcu, idx);
return reqs;
}
@@ -707,6 +718,7 @@ done:
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
+ cleanup_srcu_struct(&fdata->pq_srcu);
kfree(fdata);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index fa45350a9a1d..b79931cc74ab 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1153,6 +1153,8 @@ struct hfi1_devdata {
char *boardname; /* human readable board info */
+ u64 ctx0_seq_drop;
+
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
@@ -1436,10 +1438,13 @@ struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct srcu_struct pq_srcu;
struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
struct hfi1_user_sdma_comp_q *cq;
- struct hfi1_user_sdma_pkt_q *pq;
+ /* update side lock for SRCU */
+ spinlock_t pq_rcu_lock;
+ struct hfi1_user_sdma_pkt_q __rcu *pq;
u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 71cb9525c074..fbff6b2f00e7 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -845,6 +845,29 @@ wq_error:
}
/**
+ * destroy_workqueues - destroy per port workqueues
+ * @dd: the hfi1_ib device
+ */
+static void destroy_workqueues(struct hfi1_devdata *dd)
+{
+ int pidx;
+ struct hfi1_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ if (ppd->hfi1_wq) {
+ destroy_workqueue(ppd->hfi1_wq);
+ ppd->hfi1_wq = NULL;
+ }
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
+ }
+}
+
+/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
@@ -1117,15 +1140,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
hfi1_quiet_serdes(ppd);
-
- if (ppd->hfi1_wq) {
- destroy_workqueue(ppd->hfi1_wq);
- ppd->hfi1_wq = NULL;
- }
- if (ppd->link_wq) {
- destroy_workqueue(ppd->link_wq);
- ppd->link_wq = NULL;
- }
+ if (ppd->hfi1_wq)
+ flush_workqueue(ppd->hfi1_wq);
+ if (ppd->link_wq)
+ flush_workqueue(ppd->link_wq);
}
sdma_exit(dd);
}
@@ -1489,7 +1507,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
- hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
@@ -1815,6 +1832,7 @@ static void remove_one(struct pci_dev *pdev)
* clear dma engines, etc.
*/
shutdown_device(dd);
+ destroy_workqueues(dd);
stop_timers(dd);
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
index adb4a1ba921b..5836fe7b2817 100644
--- a/drivers/infiniband/hw/hfi1/iowait.c
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -81,7 +81,9 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
void iowait_cancel_work(struct iowait *w)
{
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
- cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+ /* Make sure that the iowork for TID RDMA is used */
+ if (iowait_get_tid_work(w)->iowork.func)
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index c96d193bb236..177dbeaa9d14 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 4e5c2d1b8cfa..79126b2b14ab 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1594,9 +1594,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
else
sc_del_credit_return_intr(sc);
trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
- if (needint) {
+ if (needint)
sc_return_credits(sc);
- }
}
/**
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4e0e9fc0a777..30ba09b510fa 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -381,7 +381,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if (dd->flags & HFI1_SHUTDOWN)
+ return true;
return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5c0d90418e8c..0d3e86da2784 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2210,15 +2210,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
- if (qp->s_rnr_retry == 0 &&
- !((rdi->post_parms[wqe->wr.opcode].flags &
- RVT_OPERATION_IGN_RNR_CNT) &&
- qp->s_rnr_retry_cnt == 0)) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
- qp->s_rnr_retry--;
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index bc45b3b11f91..c61b6022575e 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -65,6 +65,7 @@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -869,14 +870,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;
/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
- if (cpumask_weight(current_mask) != 1)
+ if (current->nr_cpus_allowed != 1)
goto out;
cpu_id = smp_processor_id();
@@ -1297,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@ -1492,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 90f62c4bddba..074ec71772d2 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -674,7 +674,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
dd_dev_err(dd,
"Skipping sc2vl sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail;
+ /*
+ * Based on the documentation for kobject_init_and_add(), the
+ * caller should call kobject_put even if this call fails.
+ */
+ goto bail_sc2vl;
}
kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD);
@@ -684,7 +688,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
dd_dev_err(dd,
"Skipping sl2sc sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_sc2vl;
+ goto bail_sl2sc;
}
kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD);
@@ -694,7 +698,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
dd_dev_err(dd,
"Skipping vl2mtu sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_sl2sc;
+ goto bail_vl2mtu;
}
kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
@@ -704,7 +708,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
dd_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_vl2mtu;
+ goto bail_cc;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
@@ -742,7 +746,6 @@ bail_sl2sc:
kobject_put(&ppd->sl2sc_kobj);
bail_sc2vl:
kobject_put(&ppd->sc2vl_kobj);
-bail:
return ret;
}
@@ -853,8 +856,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < dd->num_sdma; i++)
- kobject_del(&dd->per_sdma[i].kobj);
+ /*
+ * The function kobject_put() will call kobject_del() if the kobject
+ * has been added successfully. The sysfs files created under the
+ * kobject directory will also be removed during the process.
+ */
+ for (; i >= 0; i--)
+ kobject_put(&dd->per_sdma[i].kobj);
return ret;
}
@@ -867,6 +875,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd)
struct hfi1_pportdata *ppd;
int i;
+ /* Unwind operations in hfi1_verbs_register_sysfs() */
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_put(&dd->per_sdma[i].kobj);
+
for (i = 0; i < dd->num_pports; i++) {
ppd = &dd->pport[i];
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 7e9527ab6d64..b5bc41e10e22 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/
-static u32 tid_rdma_flow_wt;
-
static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
@@ -2730,11 +2748,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
diff = cmp_psn(psn,
flow->flow_state.r_next_psn);
if (diff > 0) {
- if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
- restart_tid_rdma_read_req(rcd,
- qp,
- wqe);
-
/* Drop the packet.*/
goto s_unlock;
} else if (diff < 0) {
@@ -3004,10 +3017,7 @@ nak_psn:
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
- qpriv->r_tid_ack = qpriv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto unlock;
}
@@ -3370,18 +3380,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
- * segments between two sync points), assuming PMTU of 4K. The rationale
- * for this is that flows are released and recycled at each sync point.
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
*/
- tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
- TID_RDMA_MAX_SEGMENT_SIZE;
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3504,7 +3513,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
- to_seg = tid_rdma_flow_wt *
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
@@ -3525,7 +3534,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
- * caused due a delay in scheduling the second leg which we
+ * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
@@ -3533,8 +3542,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
break;
}
@@ -4334,8 +4342,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
+ validate_r_tid_ack(priv);
if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
@@ -4374,8 +4381,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}
done:
- priv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
@@ -4387,10 +4393,7 @@ send_nak:
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
- priv->s_flags |= RVT_S_ACK_PENDING;
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto done;
}
@@ -4624,6 +4627,15 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
*/
fpsn = full_flow_psn(flow, flow->flow_state.spsn);
req->r_ack_psn = psn;
+ /*
+ * If resync_psn points to the last flow PSN for a
+ * segment and the new segment (likely from a new
+ * request) starts with a new generation number, we
+ * need to adjust resync_psn accordingly.
+ */
+ if (flow->flow_state.generation !=
+ (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
+ resync_psn = mask_psn(fpsn - 1);
flow->resync_npkts +=
delta_psn(mask_psn(resync_psn + 1), fpsn);
/*
@@ -4938,8 +4950,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
@@ -5389,7 +5400,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if ((dd->flags & HFI1_SHUTDOWN))
+ return true;
return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 1c536185261e..6e82df2190b7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
/*
* Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
-void hfi1_compute_tid_rdma_flow_wt(void);
-
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 3592a9ec155e..4d732353379d 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- spin_lock_init(&fd->tid_lock);
- spin_lock_init(&fd->invalid_lock);
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
sizeof(struct rb_node *),
GFP_KERNEL);
@@ -165,10 +162,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
if (fd->handler) {
hfi1_mmu_rb_unregister(fd->handler);
} else {
+ mutex_lock(&uctxt->exp_mutex);
if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
}
kfree(fd->invalid_tids);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index fd754a16475a..a92346e88628 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -141,6 +141,7 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
if (list_empty(&pq->busy.list)) {
+ pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
@@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ pq->busy.lock = NULL;
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
@@ -179,7 +181,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
return -ENOMEM;
-
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@@ -236,7 +237,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
goto pq_mmu_fail;
}
- fd->pq = pq;
+ rcu_assign_pointer(fd->pq, pq);
fd->cq = cq;
return 0;
@@ -257,6 +258,21 @@ pq_reqs_nomem:
return ret;
}
+static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
+{
+ unsigned long flags;
+ seqlock_t *lock = pq->busy.lock;
+
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
+ if (!list_empty(&pq->busy.list)) {
+ list_del_init(&pq->busy.list);
+ pq->busy.lock = NULL;
+ }
+ write_sequnlock_irqrestore(lock, flags);
+}
+
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
@@ -264,8 +280,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
- pq = fd->pq;
+ spin_lock(&fd->pq_rcu_lock);
+ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
+ lockdep_is_held(&fd->pq_rcu_lock));
if (pq) {
+ rcu_assign_pointer(fd->pq, NULL);
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
if (pq->handler)
hfi1_mmu_rb_unregister(pq->handler);
iowait_sdma_drain(&pq->busy);
@@ -276,8 +298,10 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
kfree(pq);
- fd->pq = NULL;
+ } else {
+ spin_unlock(&fd->pq_rcu_lock);
}
if (fd->cq) {
vfree(fd->cq->comps);
@@ -321,7 +345,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
{
int ret = 0, i;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq =
+ srcu_dereference(fd->pq, &fd->pq_srcu);
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0;
@@ -564,10 +589,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
pq->state = SDMA_PKT_Q_ACTIVE;
- /* Send the first N packets in the request to buy us some time */
- ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY))
- goto free_req;
/*
* This is a somewhat blocking send implementation.
@@ -580,11 +601,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
if (ret < 0) {
if (ret != -EBUSY)
goto free_req;
- wait_event_interruptible_timeout(
+ if (wait_event_interruptible_timeout(
pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
+ pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
- SDMA_IOWAIT_TIMEOUT));
+ SDMA_IOWAIT_TIMEOUT)) <= 0)
+ flush_pq_iowait(pq);
}
}
*count += idx;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 4d8510b0fc38..9972e0e6545e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -110,12 +110,6 @@ enum pkt_q_sdma_state {
SDMA_PKT_Q_DEFERRED,
};
-/*
- * Maximum retry attempts to submit a TX request
- * before putting the process to sleep.
- */
-#define MAX_DEFER_RETRY_COUNT 1
-
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
#define SDMA_DBG(req, fmt, ...) \
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 7f9c23450579..53dd30fd2d56 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -518,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
opa_get_lid(packet->dlid, 9B));
if (!mcast)
goto drop;
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
if (hfi1_do_pkey_check(packet))
- goto drop;
+ goto unlock_drop;
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -530,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
@@ -820,8 +819,8 @@ static int build_verbs_tx_desc(
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
- (void *)trail_buf, extra_bytes);
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq,
+ sde->dd->sdma_pad_phys, extra_bytes);
bail_txadd:
return ret;
@@ -1089,7 +1088,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
seg_pio_copy_end(pbuf);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 563cf39df6d5..9cc2e72093be 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -381,7 +381,7 @@ struct hns_roce_mr_table {
struct hns_roce_wq {
u64 *wrid; /* Work request ID */
spinlock_t lock;
- int wqe_cnt; /* WQE num */
+ u32 wqe_cnt; /* WQE num */
u32 max_post;
int max_gs;
int offset;
@@ -613,7 +613,6 @@ struct hns_roce_qp {
u8 sdb_en;
u32 doorbell_qpn;
__le32 sq_signal_bits;
- u32 sq_next_wqe;
int sq_max_wqes_per_wr;
int sq_spare_wqes;
struct hns_roce_wq sq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index d9d668992e49..3ca163c7f513 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -59,7 +59,7 @@ enum {
#define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
+ (sizeof(struct scatterlist) + sizeof(void *)))
#define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 9496c69fff3a..902e74555654 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
unsigned long flags = 0;
void *wqe = NULL;
u32 doorbell[2];
+ u32 wqe_idx = 0;
int nreq = 0;
- u32 ind = 0;
int ret = 0;
u8 *smac;
int loopback;
@@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
}
spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
+
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
ret = -ENOMEM;
@@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, qp->sq.max_gs);
@@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
goto out;
}
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
+ wqe = get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
/* Corresponding to the RC and RD type wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) {
@@ -213,7 +214,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
cpu_to_le32((wr->sg_list[1].addr) >> 32);
ud_sq_wqe->l_key1 =
cpu_to_le32(wr->sg_list[1].lkey);
- ind++;
} else if (ibqp->qp_type == IB_QPT_RC) {
u32 tmp_len = 0;
@@ -311,7 +311,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp,
ctrl->flag |= cpu_to_le32(wr->num_sge <<
HNS_ROCE_WQE_SGE_NUM_BIT);
}
- ind++;
}
}
@@ -339,7 +338,6 @@ out:
doorbell[1] = le32_to_cpu(sq_db.u32_8);
hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -351,12 +349,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- int ret = 0;
- int nreq = 0;
- int ind = 0;
- int i = 0;
- u32 reg_val;
- unsigned long flags = 0;
struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
struct hns_roce_wqe_data_seg *scat = NULL;
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -364,9 +356,14 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_rq_db rq_db;
uint32_t doorbell[2] = {0};
+ unsigned long flags = 0;
+ unsigned int wqe_idx;
+ int ret = 0;
+ int nreq = 0;
+ int i = 0;
+ u32 reg_val;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
@@ -376,6 +373,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, hr_qp->rq.max_gs);
@@ -384,7 +383,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
goto out;
}
- ctrl = get_recv_wqe(hr_qp, ind);
+ ctrl = get_recv_wqe(hr_qp, wqe_idx);
roce_set_field(ctrl->rwqe_byte_12,
RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
@@ -396,9 +395,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
for (i = 0; i < wr->num_sge; i++)
set_data_seg(scat + i, wr->sg_list + i);
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
@@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
}
kfree(context);
@@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
}
out:
kfree(context);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b5392cb5b20f..ff0e5a3df724 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -110,7 +110,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
}
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
- unsigned int *sge_ind)
+ unsigned int *sge_ind, int valid_num_sge)
{
struct hns_roce_v2_wqe_data_seg *dseg;
struct ib_sge *sg;
@@ -123,7 +123,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
- extend_sge_num = wr->num_sge - num_in_wqe;
+ extend_sge_num = valid_num_sge - num_in_wqe;
sg = wr->sg_list + num_in_wqe;
shift = qp->hr_buf.page_shift;
@@ -159,14 +159,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind,
+ int valid_num_sge,
const struct ib_send_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_v2_wqe_data_seg *dseg = wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int j = 0;
int i;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
if (le32_to_cpu(rc_sq_wqe->msg_len) >
hr_dev->caps.max_sq_inline) {
*bad_wr = wr;
@@ -190,7 +192,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
+ if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -203,19 +205,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
+ for (i = 0; i < wr->num_sge &&
+ j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
+ j++;
}
}
- set_extend_sge(qp, wr, sge_ind);
+ set_extend_sge(qp, wr, sge_ind, valid_num_sge);
}
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
+ V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
}
return 0;
@@ -239,10 +243,11 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr;
- unsigned int sge_ind = 0;
unsigned int owner_bit;
+ unsigned int sge_idx;
+ unsigned int wqe_idx;
unsigned long flags;
- unsigned int ind;
+ int valid_num_sge;
void *wqe = NULL;
bool loopback;
int attr_mask;
@@ -269,8 +274,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
}
spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
- sge_ind = qp->next_sge;
+ sge_idx = qp->next_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -279,6 +283,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, qp->sq.max_gs);
@@ -287,14 +293,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
goto out;
}
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
-
+ wqe = get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
owner_bit =
~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+ valid_num_sge = 0;
tmp_len = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ tmp_len += wr->sg_list[i].length;
+ valid_num_sge++;
+ }
+ }
+
/* Corresponding to the QP type, wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) {
ud_sq_wqe = wqe;
@@ -330,9 +342,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND);
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
ud_sq_wqe->msg_len =
cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
@@ -368,12 +377,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(ud_sq_wqe->byte_16,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
+ valid_num_sge);
roce_set_field(ud_sq_wqe->byte_20,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- sge_ind & (qp->sge.sge_cnt - 1));
+ sge_idx & (qp->sge.sge_cnt - 1));
roce_set_field(ud_sq_wqe->byte_24,
V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
@@ -426,13 +435,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
GID_LEN_V2);
- set_extend_sge(qp, wr, &sge_ind);
- ind++;
+ set_extend_sge(qp, wr, &sge_idx, valid_num_sge);
} else if (ibqp->qp_type == IB_QPT_RC) {
rc_sq_wqe = wqe;
memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
rc_sq_wqe->msg_len =
cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
@@ -553,15 +559,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
+ valid_num_sge);
} else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
- wqe, &sge_ind, bad_wr);
+ wqe, &sge_idx,
+ valid_num_sge, bad_wr);
if (ret)
goto out;
}
-
- ind++;
} else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -591,8 +596,7 @@ out:
hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
- qp->next_sge = sge_ind;
+ qp->next_sge = sge_idx;
if (qp->state == IB_QPS_ERR) {
attr_mask = IB_QP_STATE;
@@ -626,13 +630,12 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
unsigned long flags;
void *wqe = NULL;
int attr_mask;
+ u32 wqe_idx;
int ret = 0;
int nreq;
- int ind;
int i;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
if (hr_qp->state == IB_QPS_RESET) {
spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -648,6 +651,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
wr->num_sge, hr_qp->rq.max_gs);
@@ -656,7 +661,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
- wqe = get_recv_wqe(hr_qp, ind);
+ wqe = get_recv_wqe(hr_qp, wqe_idx);
dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
for (i = 0; i < wr->num_sge; i++) {
if (!wr->sg_list[i].length)
@@ -672,8 +677,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
/* rq support inline data */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
- hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
+ sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list;
+ hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt =
(u32)wr->num_sge;
for (i = 0; i < wr->num_sge; i++) {
sge_list[i].addr =
@@ -682,9 +687,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
}
}
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
}
out:
@@ -1218,34 +1221,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_pf_timer_res_a *req_a;
- struct hns_roce_cmq_desc desc[2];
- int ret, i;
+ struct hns_roce_cmq_desc desc;
+ int ret;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
- true);
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
+ true);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- }
-
- ret = hns_roce_cmq_send(hr_dev, desc, 2);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
- req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data;
+ req_a = (struct hns_roce_pf_timer_res_a *)desc.data;
hr_dev->caps.qpc_timer_bt_num =
- roce_get_field(req_a->qpc_timer_bt_idx_num,
- PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M,
- PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S);
+ roce_get_field(req_a->qpc_timer_bt_idx_num,
+ PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M,
+ PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S);
hr_dev->caps.cqc_timer_bt_num =
- roce_get_field(req_a->cqc_timer_bt_idx_num,
- PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M,
- PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S);
+ roce_get_field(req_a->cqc_timer_bt_idx_num,
+ PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M,
+ PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S);
return 0;
}
@@ -4260,7 +4255,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->rq.tail = 0;
hr_qp->sq.head = 0;
hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
hr_qp->next_sge = 0;
if (hr_qp->rq.wqe_cnt)
*hr_qp->rdb.db_record = 0;
@@ -4362,7 +4356,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->path_mig_state = IB_MIG_ARMED;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
if (hr_qp->ibqp.qp_type == IB_QPT_UD)
- qp_attr->qkey = V2_QKEY_VAL;
+ qp_attr->qkey = le32_to_cpu(context->qkey_xrcd);
qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn,
V2_QPC_BYTE_108_RX_REQ_EPSN_M,
@@ -5191,9 +5185,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
return;
}
- if (eq->buf_list)
- dma_free_coherent(hr_dev->dev, buf_chk_sz,
- eq->buf_list->buf, eq->buf_list->map);
+ dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
+ eq->buf_list->map);
+ kfree(eq->buf_list);
}
static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@@ -5860,11 +5854,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- hr_dev->caps.idx_ba_pg_sz);
+ hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- hr_dev->caps.idx_buf_pg_sz);
+ hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET);
srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
srq_context->idx_nxt_blk_addr =
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index edfdbe2ce0db..4211a982a8e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -87,8 +87,8 @@
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
-#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096
-#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 6110ec408626..c68527f77418 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -1018,8 +1018,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
if (page_addr & ((1 << mtt->page_shift) - 1)) {
dev_err(dev,
- "page_addr 0x%llx is not page_shift %d alignment!\n",
- page_addr, mtt->page_shift);
+ "page_addr is not page_shift %d alignment!\n",
+ mtt->page_shift);
ret = -EINVAL;
goto out;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 0a31d0a3d657..06871731ac43 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
goto err;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
- if (!table_attr)
+ if (!table_attr) {
+ ret = -EMSGSIZE;
goto err;
+ }
- if (hns_roce_fill_cq(msg, context))
+ if (hns_roce_fill_cq(msg, context)) {
+ ret = -EMSGSIZE;
goto err_cancel_table;
+ }
nla_nest_end(msg, table_attr);
kfree(context);
@@ -113,7 +117,7 @@ err_cancel_table:
nla_nest_cancel(msg, table_attr);
err:
kfree(context);
- return -EMSGSIZE;
+ return ret;
}
int hns_roce_fill_res_entry(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index b3421b1f21e0..1dfd3b649d3c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -231,7 +231,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
srq->max_gs = srq_init_attr->attr.max_sge;
- srq_desc_size = max(16, 16 * srq->max_gs);
+ srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
srq->wqe_shift = ilog2(srq_desc_size);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 8233f5a4e623..47d7be972d34 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1984,7 +1984,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
struct rtable *rt;
struct neighbour *neigh;
int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
__be32 dst_ipaddr = htonl(dst_ip);
__be32 src_ipaddr = htonl(src_ip);
@@ -1994,9 +1993,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
return rc;
}
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
rcu_read_lock();
@@ -2062,7 +2058,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
{
struct neighbour *neigh;
int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
struct dst_entry *dst;
struct sockaddr_in6 dst_addr;
struct sockaddr_in6 src_addr;
@@ -2083,9 +2078,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
return rc;
}
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
rcu_read_lock();
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 55a1fbf0e670..ae8b97c30665 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -534,7 +534,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
int arp_index;
arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
- if (arp_index == -1)
+ if (arp_index < 0)
return;
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
if (!cqp_request)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 10932baee279..7749d680017a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1223,6 +1223,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
(rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
(dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
idev = in_dev_get(dev);
+ if (!idev)
+ continue;
for_ifa(idev) {
i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
"IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index ecd6cadd529a..b591861934b3 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -186,23 +186,6 @@ out:
kfree(ent);
}
-static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
-{
- struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
- struct rb_root *sl_id_map = &sriov->sl_id_map;
- struct id_map_entry *ent, *found_ent;
-
- spin_lock(&sriov->id_map_lock);
- ent = xa_erase(&sriov->pv_id_table, pv_cm_id);
- if (!ent)
- goto out;
- found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
- if (found_ent && found_ent == ent)
- rb_erase(&found_ent->node, sl_id_map);
-out:
- spin_unlock(&sriov->id_map_lock);
-}
-
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
@@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
- if (!sriov->is_going_down) {
+ if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
@@ -341,9 +324,6 @@ cont:
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
- id_map_find_del(ibdev, pv_cm_id);
-
return 0;
}
@@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
- if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
- id_map_find_del(ibdev, (int) pv_cm_id);
- }
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 25d09d53b51c..4aa439a7ce61 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
+static void free_gid_entry(struct gid_entry *entry)
+{
+ memset(&entry->gid, 0, sizeof(entry->gid));
+ kfree(entry->ctx);
+ entry->ctx = NULL;
+}
+
static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
@@ -306,6 +313,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
@@ -317,6 +326,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
if (!ret && hw_update) {
ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+ if (ret) {
+ spin_lock_bh(&iboe->lock);
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
+ spin_unlock_bh(&iboe->lock);
+ }
kfree(gids);
}
@@ -346,10 +361,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
- memset(&port_gid_table->gids[real_index].gid, 0,
- sizeof(port_gid_table->gids[real_index].gid));
- kfree(port_gid_table->gids[real_index].ctx);
- port_gid_table->gids[real_index].ctx = NULL;
+ free_gid_entry(&port_gid_table->gids[real_index]);
hw_update = 1;
}
}
@@ -768,7 +780,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->ip_gids = true;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
- props->pkey_tbl_len = 1;
+ if (mdev->dev->caps.pkey_table_len[port])
+ props->pkey_tbl_len = 1;
props->max_mtu = IB_MTU_4096;
props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
@@ -1478,8 +1491,9 @@ static int __mlx4_ib_create_default_rules(
int i;
for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
+ union ib_flow_spec ib_spec = {};
int ret;
- union ib_flow_spec ib_spec;
+
switch (pdefault_rules->rules_create_list[i]) {
case 0:
/* no rule */
@@ -3004,16 +3018,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->ib_active = false;
flush_workqueue(wq);
- mlx4_ib_close_sriov(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
- ib_unregister_device(&ibdev->ib_dev);
- mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+ mlx4_ib_close_sriov(ibdev);
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
+
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 355205a28544..6fd661076ade 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -440,7 +440,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.length = length;
- mr->ibmr.iova = virt_addr;
mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5221c0794d1d..89c2038f50da 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2817,6 +2817,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
int send_size;
int header_size;
int spc;
+ int err;
int i;
if (wr->wr.opcode != IB_WR_SEND)
@@ -2851,7 +2852,9 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ if (err)
+ return err;
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
@@ -3138,9 +3141,14 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
}
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+ err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
+ err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index,
+ &pkey);
+ if (err)
+ return err;
+
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2e2e65f00257..347a563736e3 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ u16 tail, u16 head)
+{
+ u16 idx;
+
+ do {
+ idx = tail & (qp->sq.wqe_cnt - 1);
+ if (idx == head)
+ break;
+
+ tail = qp->sq.w_list[idx].next;
+ } while (1);
+ tail = qp->sq.w_list[idx].next;
+ qp->sq.last_poll = tail;
+}
+
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
@@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
}
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
- int *npolled, int is_send)
+ int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
@@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
return;
for (i = 0; i < cur && np < num_entries; i++) {
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ unsigned int idx;
+
+ idx = (is_send) ? wq->last_poll : wq->tail;
+ idx &= (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
+ if (is_send)
+ wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
@@ -476,6 +498,7 @@ repoll:
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
+ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 80b42d069328..4c1f4fe5eb02 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -306,6 +306,10 @@ static u64 devx_get_obj_id(const void *in)
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
MLX5_GET(rst2init_qp_in, in, qpn));
break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2init_qp_in, in, qpn));
+ break;
case MLX5_CMD_OP_INIT2RTR_QP:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
MLX5_GET(init2rtr_qp_in, in, qpn));
@@ -621,6 +625,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
case MLX5_CMD_OP_RST2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
case MLX5_CMD_OP_SQERR2RTS_QP:
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 4950df3f71b6..5c73c0a790fa 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
if (ret) {
/* Undo the effect of adding the outstanding wr */
- gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
- gsi->cap.max_send_wr;
+ gsi->outstanding_pi--;
goto err;
}
spin_unlock_irqrestore(&gsi->lock, flags);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ff6a2b1fa8b6..660310d54ff6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -516,7 +516,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
mdev_port_num);
if (err)
goto out;
- ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
props->active_width = IB_WIDTH_4X;
@@ -828,6 +828,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
+ size_t uhw_outlen = (uhw) ? uhw->outlen : 0;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
@@ -841,12 +842,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
u64 max_tso;
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
- if (uhw->outlen && uhw->outlen < resp_len)
+ if (uhw_outlen && uhw_outlen < resp_len)
return -EINVAL;
else
resp.response_length = resp_len;
- if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
+ if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@@ -910,7 +911,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |=
IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), tso_caps, uhw_outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
@@ -920,7 +921,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), rss_caps, uhw_outlen)) {
resp.rss_caps.rx_hash_function =
MLX5_RX_HASH_FUNC_TOEPLITZ;
resp.rss_caps.rx_hash_fields_mask =
@@ -940,9 +941,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.rss_caps);
}
} else {
- if (field_avail(typeof(resp), tso_caps, uhw->outlen))
+ if (field_avail(typeof(resp), tso_caps, uhw_outlen))
resp.response_length += sizeof(resp.tso_caps);
- if (field_avail(typeof(resp), rss_caps, uhw->outlen))
+ if (field_avail(typeof(resp), rss_caps, uhw_outlen))
resp.response_length += sizeof(resp.rss_caps);
}
@@ -1063,7 +1064,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_MAX_CQ_PERIOD;
}
- if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+ if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.cqe_comp_caps);
if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
@@ -1081,7 +1082,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
+ if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) &&
raw_support) {
if (MLX5_CAP_QOS(mdev, packet_pacing) &&
MLX5_CAP_GEN(mdev, qos)) {
@@ -1100,7 +1101,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
+ uhw_outlen)) {
if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
resp.mlx5_ib_support_multi_pkt_send_wqes =
MLX5_IB_ALLOW_MPW;
@@ -1113,7 +1114,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
- if (field_avail(typeof(resp), flags, uhw->outlen)) {
+ if (field_avail(typeof(resp), flags, uhw_outlen)) {
resp.response_length += sizeof(resp.flags);
if (MLX5_CAP_GEN(mdev, cqe_compression_128))
@@ -1129,8 +1130,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
}
- if (field_avail(typeof(resp), sw_parsing_caps,
- uhw->outlen)) {
+ if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.sw_parsing_caps);
if (MLX5_CAP_ETH(mdev, swp)) {
resp.sw_parsing_caps.sw_parsing_offloads |=
@@ -1150,7 +1150,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
+ if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) &&
raw_support) {
resp.response_length += sizeof(resp.striding_rq_caps);
if (MLX5_CAP_GEN(mdev, striding_rq)) {
@@ -1167,8 +1167,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), tunnel_offloads_caps,
- uhw->outlen)) {
+ if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) {
resp.response_length += sizeof(resp.tunnel_offloads_caps);
if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
resp.tunnel_offloads_caps |=
@@ -1179,17 +1178,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_GRE;
- if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
- MLX5_FLEX_PROTO_CW_MPLS_GRE)
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
- if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
- MLX5_FLEX_PROTO_CW_MPLS_UDP)
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
}
- if (uhw->outlen) {
+ if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -3502,10 +3499,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
- if (dst) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
@@ -3519,6 +3512,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
@@ -3563,10 +3561,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+ if (!dest_num)
rule_dst = NULL;
- dest_num = 0;
- }
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
@@ -4678,7 +4674,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
- struct ib_udata uhw = {.inlen = 0, .outlen = 0};
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
@@ -4688,7 +4683,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
if (!dprops)
goto out;
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
+ err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f52b845f2f7b..3b11ed0d95ad 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -282,6 +282,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
+ u16 last_poll;
void *cur_edge;
};
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d239fc58c002..eaab3e5b785b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -454,7 +454,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
ent = &cache->ent[entry];
@@ -1425,6 +1425,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (!mr->umem)
return -EINVAL;
+ if (is_odp_mr(mr))
+ return -EOPNOTSUPP;
+
if (flags & IB_MR_REREG_TRANS) {
addr = virt_addr;
len = length;
@@ -1469,8 +1472,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1499,7 +1500,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
- update_odp_mr(mr);
return 0;
err:
@@ -1593,13 +1593,14 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
*/
mr->live = 0;
+ /* Wait for all running page-fault handlers to finish. */
+ synchronize_srcu(&dev->mr_srcu);
+
/* dequeue pending prefetch requests for the mr */
if (atomic_read(&mr->num_pending_prefetch))
flush_workqueue(system_unbound_wq);
WARN_ON(atomic_read(&mr->num_pending_prefetch));
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (umem_odp->page_list)
mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index fda3dfd6f87b..e0ecb540c67e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1205,7 +1205,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
{
bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
u16 wqe_index = pfault->wqe.wqe_index;
- void *wqe = NULL, *wqe_end = NULL;
+ void *wqe, *wqe_start = NULL, *wqe_end = NULL;
u32 bytes_mapped, total_wqe_bytes;
struct mlx5_core_rsc_common *res;
int resume_with_error = 1;
@@ -1226,12 +1226,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- wqe = (void *)__get_free_page(GFP_KERNEL);
- if (!wqe) {
+ wqe_start = (void *)__get_free_page(GFP_KERNEL);
+ if (!wqe_start) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
+ wqe = wqe_start;
qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
if (qp && sq) {
ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
@@ -1286,7 +1287,7 @@ resolve_page_fault:
pfault->wqe.wq_num, resume_with_error,
pfault->type);
mlx5_core_res_put(res);
- free_page((unsigned long)wqe);
+ free_page((unsigned long)wqe_start);
}
static int pages_in_range(u64 address, u32 length)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6dbca72a73b1..b19e580939ac 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1468,6 +1468,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u16 uid = to_mpd(pd)->uid;
u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
+ if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
+ return -EINVAL;
if (qp->sq.wqe_cnt) {
err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
@@ -3254,10 +3256,12 @@ static int modify_raw_packet_qp_sq(
}
/* Only remove the old rate after new rate was set */
- if ((old_rl.rate &&
- !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
- (new_state != MLX5_SQC_STATE_RDY))
+ if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+ (new_state != MLX5_SQC_STATE_RDY)) {
mlx5_rl_remove_rate(dev, &old_rl);
+ if (new_state != MLX5_SQC_STATE_RDY)
+ memset(&new_rl, 0, sizeof(new_rl));
+ }
ibqp->rl = new_rl;
sq->state = new_state;
@@ -3702,6 +3706,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.cur_post = 0;
if (qp->sq.wqe_cnt)
qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
@@ -5372,7 +5377,9 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f);
rdma_ah_set_static_rate(ah_attr,
path->static_rate ? path->static_rate - 5 : 0);
- if (path->grh_mlid & (1 << 7)) {
+
+ if (path->grh_mlid & (1 << 7) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
u32 tc_fl = be32_to_cpu(path->tclass_flowlabel);
rdma_ah_set_grh(ah_attr, NULL,
@@ -6008,6 +6015,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO) &&
+ init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
+ return ERR_PTR(-EPERM);
+
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e7fde86c96b..c29c1f7da4a1 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (udata)
- if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+ if (udata) {
+ struct mlx5_ib_create_srq_resp resp = {
+ .srqn = srq->msrq.srqn,
+ };
+
+ if (ib_copy_to_udata(udata, &resp, min(udata->outlen,
+ sizeof(resp)))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
goto err_core;
}
+ }
init_attr->attr.max_wr = srq->msrq.max - 1;
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index b0d0687c7a68..2e8fa8767fd7 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -83,11 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *srq;
- xa_lock(&table->array);
+ xa_lock_irq(&table->array);
srq = xa_load(&table->array, srqn);
if (srq)
atomic_inc(&srq->common.refcount);
- xa_unlock(&table->array);
+ xa_unlock_irq(&table->array);
return srq;
}
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index dfdd1e16de7f..a37fb659c54e 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str)
struct qedr_dev *qedr = get_qedr_dev(ibdev);
u32 fw_ver = (u32)qedr->attr.fw_ver;
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
(fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
(fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
}
@@ -354,9 +354,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
return -ENOMEM;
spin_lock_init(&dev->sgid_lock);
+ xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ);
if (IS_IWARP(dev)) {
- xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ);
+ xa_init(&dev->qps);
dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
}
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 6175d1e98717..bbf72944b720 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -40,6 +40,7 @@
#include <linux/qed/qed_rdma_if.h>
#include <linux/qed/qede_rdma.h>
#include <linux/qed/roce_common.h>
+#include <linux/completion.h>
#include "qedr_hsi_rdma.h"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
@@ -346,10 +347,10 @@ struct qedr_srq_hwq_info {
u32 wqe_prod;
u32 sge_prod;
u32 wr_prod_cnt;
- u32 wr_cons_cnt;
+ atomic_t wr_cons_cnt;
u32 num_elems;
- u32 *virt_prod_pair_addr;
+ struct rdma_srq_producers *virt_prod_pair_addr;
dma_addr_t phy_prod_pair_addr;
};
@@ -375,10 +376,20 @@ enum qedr_qp_err_bitmap {
QEDR_QP_ERR_RQ_PBL_FULL = 32,
};
+enum qedr_qp_create_type {
+ QEDR_QP_CREATE_NONE,
+ QEDR_QP_CREATE_USER,
+ QEDR_QP_CREATE_KERNEL,
+};
+
+enum qedr_iwarp_cm_flags {
+ QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0),
+ QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1),
+};
+
struct qedr_qp {
struct ib_qp ibqp; /* must be first */
struct qedr_dev *dev;
- struct qedr_iw_ep *ep;
struct qedr_qp_hwq_info sq;
struct qedr_qp_hwq_info rq;
@@ -393,6 +404,7 @@ struct qedr_qp {
u32 id;
struct qedr_pd *pd;
enum ib_qp_type qp_type;
+ enum qedr_qp_create_type create_type;
struct qed_rdma_qp *qed_qp;
u32 qp_id;
u16 icid;
@@ -435,8 +447,11 @@ struct qedr_qp {
/* Relevant to qps created from user space only (applications) */
struct qedr_userq usq;
struct qedr_userq urq;
- atomic_t refcnt;
- bool destroyed;
+
+ /* synchronization objects used with iwarp ep */
+ struct kref refcnt;
+ struct completion iwarp_cm_comp;
+ unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */
};
struct qedr_ah {
@@ -529,7 +544,7 @@ struct qedr_iw_ep {
struct iw_cm_id *cm_id;
struct qedr_qp *qp;
void *qed_context;
- u8 during_connect;
+ struct kref refcnt;
};
static inline
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 22881d4442b9..180f26794f28 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info,
}
}
+static void qedr_iw_free_qp(struct kref *ref)
+{
+ struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt);
+
+ kfree(qp);
+}
+
+static void
+qedr_iw_free_ep(struct kref *ref)
+{
+ struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt);
+
+ if (ep->qp)
+ kref_put(&ep->qp->refcnt, qedr_iw_free_qp);
+
+ if (ep->cm_id)
+ ep->cm_id->rem_ref(ep->cm_id);
+
+ kfree(ep);
+}
+
static void
qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
{
@@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
ep->dev = dev;
ep->qed_context = params->ep_context;
+ kref_init(&ep->refcnt);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
@@ -128,8 +150,17 @@ qedr_iw_issue_event(void *context,
if (params->cm_info) {
event.ird = params->cm_info->ird;
event.ord = params->cm_info->ord;
- event.private_data_len = params->cm_info->private_data_len;
- event.private_data = (void *)params->cm_info->private_data;
+ /* Only connect_request and reply have valid private data
+ * the rest of the events this may be left overs from
+ * connection establishment. CONNECT_REQUEST is issued via
+ * qedr_iw_mpa_request
+ */
+ if (event_type == IW_CM_EVENT_CONNECT_REPLY) {
+ event.private_data_len =
+ params->cm_info->private_data_len;
+ event.private_data =
+ (void *)params->cm_info->private_data;
+ }
}
if (ep->cm_id)
@@ -141,12 +172,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params)
{
struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
- if (ep->cm_id) {
+ if (ep->cm_id)
qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE);
- ep->cm_id->rem_ref(ep->cm_id);
- ep->cm_id = NULL;
- }
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
}
static void
@@ -186,11 +215,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
struct qedr_qp *qp = ep->qp;
struct iw_cm_event event;
- if (qp->destroyed) {
- kfree(dwork);
- qedr_iw_qp_rem_ref(&qp->ibqp);
- return;
- }
+ /* The qp won't be released until we release the ep.
+ * the ep's refcnt was increased before calling this
+ * function, therefore it is safe to access qp
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ goto out;
memset(&event, 0, sizeof(event));
event.status = dwork->status;
@@ -204,7 +235,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
else
qp_params.new_state = QED_ROCE_QP_STATE_SQD;
- kfree(dwork);
if (ep->cm_id)
ep->cm_id->event_handler(ep->cm_id, &event);
@@ -214,7 +244,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work)
dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params);
- qedr_iw_qp_rem_ref(&qp->ibqp);
+ complete(&ep->qp->iwarp_cm_comp);
+out:
+ kfree(dwork);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
}
static void
@@ -224,13 +257,17 @@ qedr_iw_disconnect_event(void *context,
struct qedr_discon_work *work;
struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
struct qedr_dev *dev = ep->dev;
- struct qedr_qp *qp = ep->qp;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
- qedr_iw_qp_add_ref(&qp->ibqp);
+ /* We can't get a close event before disconnect, but since
+ * we're scheduling a work queue we need to make sure close
+ * won't delete the ep, so we increase the refcnt
+ */
+ kref_get(&ep->refcnt);
+
work->ep = ep;
work->event = params->event;
work->status = params->status;
@@ -252,16 +289,30 @@ qedr_iw_passive_complete(void *context,
if ((params->status == -ECONNREFUSED) && (!ep->qp)) {
DP_DEBUG(dev, QEDR_MSG_IWARP,
"PASSIVE connection refused releasing ep...\n");
- kfree(ep);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
return;
}
+ complete(&ep->qp->iwarp_cm_comp);
qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED);
if (params->status < 0)
qedr_iw_close_event(context, params);
}
+static void
+qedr_iw_active_complete(void *context,
+ struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+
+ complete(&ep->qp->iwarp_cm_comp);
+ qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY);
+
+ if (params->status < 0)
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
static int
qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params)
{
@@ -288,27 +339,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params)
qedr_iw_mpa_reply(context, params);
break;
case QED_IWARP_EVENT_PASSIVE_COMPLETE:
- ep->during_connect = 0;
qedr_iw_passive_complete(context, params);
break;
-
case QED_IWARP_EVENT_ACTIVE_COMPLETE:
- ep->during_connect = 0;
- qedr_iw_issue_event(context,
- params,
- IW_CM_EVENT_CONNECT_REPLY);
- if (params->status < 0) {
- struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
-
- ep->cm_id->rem_ref(ep->cm_id);
- ep->cm_id = NULL;
- }
+ qedr_iw_active_complete(context, params);
break;
case QED_IWARP_EVENT_DISCONNECT:
qedr_iw_disconnect_event(context, params);
break;
case QED_IWARP_EVENT_CLOSE:
- ep->during_connect = 0;
qedr_iw_close_event(context, params);
break;
case QED_IWARP_EVENT_RQ_EMPTY:
@@ -476,6 +515,19 @@ qedr_addr6_resolve(struct qedr_dev *dev,
return rc;
}
+static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn)
+{
+ struct qedr_qp *qp;
+
+ xa_lock(&dev->qps);
+ qp = xa_load(&dev->qps, qpn);
+ if (qp)
+ kref_get(&qp->refcnt);
+ xa_unlock(&dev->qps);
+
+ return qp;
+}
+
int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
{
struct qedr_dev *dev = get_qedr_dev(cm_id->device);
@@ -491,10 +543,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int rc = 0;
int i;
- qp = xa_load(&dev->qps, conn_param->qpn);
- if (unlikely(!qp))
- return -EINVAL;
-
laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
@@ -516,8 +564,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
return -ENOMEM;
ep->dev = dev;
+ kref_init(&ep->refcnt);
+
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
+ if (!qp) {
+ rc = -EINVAL;
+ goto err;
+ }
+
ep->qp = qp;
- qp->ep = ep;
cm_id->add_ref(cm_id);
ep->cm_id = cm_id;
@@ -580,16 +635,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
in_params.qp = qp->qed_qp;
memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN);
- ep->during_connect = 1;
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ goto err; /* QP already being destroyed */
+
rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params);
- if (rc)
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
goto err;
+ }
return rc;
err:
- cm_id->rem_ref(cm_id);
- kfree(ep);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
return rc;
}
@@ -677,18 +736,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct qedr_dev *dev = ep->dev;
struct qedr_qp *qp;
struct qed_iwarp_accept_in params;
- int rc;
+ int rc = 0;
DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
- qp = xa_load(&dev->qps, conn_param->qpn);
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
if (!qp) {
DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
return -EINVAL;
}
ep->qp = qp;
- qp->ep = ep;
cm_id->add_ref(cm_id);
ep->cm_id = cm_id;
@@ -700,15 +758,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
params.ird = conn_param->ird;
params.ord = conn_param->ord;
- ep->during_connect = 1;
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ goto err; /* QP already destroyed */
+
rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
- if (rc)
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
goto err;
+ }
return rc;
+
err:
- ep->during_connect = 0;
- cm_id->rem_ref(cm_id);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+
return rc;
}
@@ -731,17 +795,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
- atomic_inc(&qp->refcnt);
+ kref_get(&qp->refcnt);
}
void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
- if (atomic_dec_and_test(&qp->refcnt)) {
- xa_erase_irq(&qp->dev->qps, qp->qp_id);
- kfree(qp);
- }
+ kref_put(&qp->refcnt, qedr_iw_free_qp);
}
struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 3d7bde19838e..9d3204410ead 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,7 @@
#include "verbs.h"
#include <rdma/qedr-abi.h>
#include "qedr_roce_cm.h"
+#include "qedr_iw_cm.h"
#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
#define RDMA_MAX_SGE_PER_SRQ (4)
@@ -1226,7 +1227,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
spin_lock_init(&qp->q_lock);
- atomic_set(&qp->refcnt, 1);
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ kref_init(&qp->refcnt);
+ init_completion(&qp->iwarp_cm_comp);
+ }
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
@@ -1612,6 +1616,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
if (qp->urq.umem)
ib_umem_release(qp->urq.umem);
qp->urq.umem = NULL;
+
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+ qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ kfree(qp->urq.pbl_tbl);
+ }
}
static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1627,6 +1639,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
+ qp->create_type = QEDR_QP_CREATE_USER;
memset(&ureq, 0, sizeof(ureq));
rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
if (rc) {
@@ -1840,6 +1853,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_entries;
memset(&in_params, 0, sizeof(in_params));
+ qp->create_type = QEDR_QP_CREATE_KERNEL;
/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
* the ring. The ring should allow at least a single WR, even if the
@@ -1953,7 +1967,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
qp->ibqp.qp_num = qp->qp_id;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
- rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
+ rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
if (rc)
goto err;
}
@@ -2472,7 +2486,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
return rc;
}
- if (udata)
+ if (qp->create_type == QEDR_QP_CREATE_USER)
qedr_cleanup_user(dev, qp);
else
qedr_cleanup_kernel(dev, qp);
@@ -2503,34 +2517,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
}
} else {
- /* Wait for the connect/accept to complete */
- if (qp->ep) {
- int wait_count = 1;
-
- while (qp->ep->during_connect) {
- DP_DEBUG(dev, QEDR_MSG_QP,
- "Still in during connect/accept\n");
-
- msleep(100);
- if (wait_count++ > 200) {
- DP_NOTICE(dev,
- "during connect timeout\n");
- break;
- }
- }
- }
+ /* If connection establishment started the WAIT_FOR_CONNECT
+ * bit will be on and we need to Wait for the establishment
+ * to complete before destroying the qp.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
+
+ /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
+ * bit will be on, and we need to wait for the disconnect to
+ * complete before continuing. We can use the same completion,
+ * iwarp_cm_comp, since this is the only place that waits for
+ * this completion and it is sequential. In addition,
+ * disconnect can't occur before the connection is fully
+ * established, therefore if WAIT_FOR_DISCONNECT is on it
+ * means WAIT_FOR_CONNECT is also on and the completion for
+ * CONNECT already occurred.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
}
if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
+ /* We need to remove the entry from the xarray before we release the
+ * qp_id to avoid a race of the qp_id being reallocated and failing
+ * on xa_insert
+ */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ xa_erase(&dev->qps, qp->qp_id);
+
qedr_free_qp_resources(dev, qp, udata);
- if (atomic_dec_and_test(&qp->refcnt) &&
- rdma_protocol_iwarp(&dev->ibdev, 1)) {
- xa_erase_irq(&dev->qps, qp->qp_id);
- kfree(qp);
- }
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_iw_qp_rem_ref(&qp->ibqp);
+
return rc;
}
@@ -2709,8 +2733,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
- if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
- qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+ if (mr->type != QEDR_MR_DMA)
+ free_mr_info(dev, &mr->info);
/* it could be user registered memory. */
if (mr->umem)
@@ -3473,7 +3497,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
* count and consumer count and subtract it from max
* work request supported so that we get elements left.
*/
- used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
+ used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
return hw_srq->max_wr - used;
}
@@ -3488,7 +3512,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
unsigned long flags;
int status = 0;
u32 num_sge;
- u32 offset;
spin_lock_irqsave(&srq->lock, flags);
@@ -3501,7 +3524,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (!qedr_srq_elem_left(hw_srq) ||
wr->num_sge > srq->hw_srq.max_sges) {
DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
- hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
+ hw_srq->wr_prod_cnt,
+ atomic_read(&hw_srq->wr_cons_cnt),
wr->num_sge, srq->hw_srq.max_sges);
status = -ENOMEM;
*bad_wr = wr;
@@ -3535,22 +3559,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
hw_srq->sge_prod++;
}
- /* Flush WQE and SGE information before
+ /* Update WQE and SGE information before
* updating producer.
*/
- wmb();
+ dma_wmb();
/* SRQ producer is 8 bytes. Need to update SGE producer index
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
- *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
- offset = offsetof(struct rdma_srq_producers, wqe_prod);
- *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
- hw_srq->wqe_prod;
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+ /* Make sure sge producer is updated first */
+ dma_wmb();
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
- /* Flush producer after updating it. */
- wmb();
wr = wr->next;
}
@@ -3969,7 +3991,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
} else {
__process_resp_one(dev, qp, cq, wc, resp, wr_id);
}
- srq->hw_srq.wr_cons_cnt++;
+ atomic_inc(&srq->hw_srq.wr_cons_cnt);
return 1;
}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 78fa634de98a..27b6e664e59d 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
{
struct qib_filedata *fd = fp->private_data;
- const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const unsigned int weight = current->nr_cpus_allowed;
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
int local_cpu;
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
else {
int unit;
- const unsigned int cpu = cpumask_first(&current->cpus_allowed);
- const unsigned int weight =
- cpumask_weight(&current->cpus_allowed);
+ const unsigned int cpu = cpumask_first(current->cpus_ptr);
+ const unsigned int weight = current->nr_cpus_allowed;
if (weight == 1 && !test_bit(cpu, qib_cpulist))
if (!find_hca(cpu, &unit) && unit >= 0)
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 905206a0c2d5..d7d74dda3aee 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_kobj);
+ if (!pattr->show)
+ return -EIO;
+
return pattr->show(ppd, buf);
}
@@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
struct qib_pportdata *ppd =
container_of(kobj, struct qib_pportdata, pport_kobj);
+ if (!pattr->store)
+ return -EIO;
+
return pattr->store(ppd, buf, len);
}
@@ -752,7 +758,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping linkcontrol sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail;
+ goto bail_link;
}
kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
@@ -762,7 +768,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping sl2vl sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_link;
+ goto bail_sl;
}
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
@@ -772,7 +778,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping diag_counters sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_sl;
+ goto bail_diagc;
}
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
@@ -785,7 +791,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num);
- goto bail_diagc;
+ goto bail_cc;
}
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
@@ -846,6 +852,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
&cc_table_bin_attr);
kobject_put(&ppd->pport_cc_kobj);
}
+ kobject_put(&ppd->diagc_kobj);
kobject_put(&ppd->sl2vl_kobj);
kobject_put(&ppd->pport_kobj);
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 2c4e569ce438..12c8ec59e657 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -329,8 +329,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
if (mcast == NULL)
goto drop;
this_cpu_inc(ibp->pmastats->n_multicast_rcv);
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 40182297f87f..6f38dc7c6a07 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -827,7 +827,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
ret = -ENOMEM;
- goto err_free_device;
+ goto err_disable_pdev;
}
ret = pci_request_regions(pdev, DRV_NAME);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index cb9e171d7e7b..78809766fc5f 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -59,6 +59,8 @@
#include "trace.h"
static void rvt_rc_timeout(struct timer_list *t);
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type);
/*
* Convert the AETH RNR timeout code into the number of microseconds.
@@ -450,40 +452,41 @@ no_qp_table:
}
/**
- * free_all_qps - check for QPs still in use
+ * rvt_free_qp_cb - callback function to reset a qp
+ * @qp: the qp to reset
+ * @v: a 64-bit value
+ *
+ * This function resets the qp and removes it from the
+ * qp hash table.
+ */
+static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v)
+{
+ unsigned int *qp_inuse = (unsigned int *)v;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ /* Reset the qp and remove it from the qp hash list */
+ rvt_reset_qp(rdi, qp, qp->ibqp.qp_type);
+
+ /* Increment the qp_inuse count */
+ (*qp_inuse)++;
+}
+
+/**
+ * rvt_free_all_qps - check for QPs still in use
* @rdi: rvt device info structure
*
* There should not be any QPs still in use.
* Free memory for table.
+ * Return the number of QPs still in use.
*/
static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
{
- unsigned long flags;
- struct rvt_qp *qp;
- unsigned n, qp_inuse = 0;
- spinlock_t *ql; /* work around too long line below */
-
- if (rdi->driver_f.free_all_qps)
- qp_inuse = rdi->driver_f.free_all_qps(rdi);
+ unsigned int qp_inuse = 0;
qp_inuse += rvt_mcast_tree_empty(rdi);
- if (!rdi->qp_dev)
- return qp_inuse;
+ rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb);
- ql = &rdi->qp_dev->qpt_lock;
- spin_lock_irqsave(ql, flags);
- for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
- qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
- lockdep_is_held(ql));
- RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
-
- for (; qp; qp = rcu_dereference_protected(qp->next,
- lockdep_is_held(ql)))
- qp_inuse++;
- }
- spin_unlock_irqrestore(ql, flags);
- synchronize_rcu();
return qp_inuse;
}
@@ -861,14 +864,14 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
}
/**
- * rvt_reset_qp - initialize the QP state to the reset state
+ * _rvt_reset_qp - initialize the QP state to the reset state
* @qp: the QP to reset
* @type: the QP type
*
* r_lock, s_hlock, and s_lock are required to be held by the caller
*/
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- enum ib_qp_type type)
+static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
__must_hold(&qp->s_lock)
__must_hold(&qp->s_hlock)
__must_hold(&qp->r_lock)
@@ -914,6 +917,27 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
lockdep_assert_held(&qp->s_lock);
}
+/**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @rdi: the device info
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock
+ * before calling _rvt_reset_qp().
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+{
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ _rvt_reset_qp(rdi, qp, type);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+}
+
/** rvt_free_qpn - Free a qpn from the bit map
* @qpt: QP table
* @qpn: queue pair number to free
@@ -1465,7 +1489,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
if (qp->state != IB_QPS_RESET)
- rvt_reset_qp(rdi, qp, ibqp->qp_type);
+ _rvt_reset_qp(rdi, qp, ibqp->qp_type);
break;
case IB_QPS_RTR:
@@ -1614,13 +1638,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
rvt_reset_qp(rdi, qp, ibqp->qp_type);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
wait_event(qp->wait, !atomic_read(&qp->refcount));
/* qpn is now available for use again */
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 00eb99d3df86..7c5999ada61d 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -329,7 +329,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
return COMPST_ERROR_RETRY;
@@ -463,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -479,7 +479,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
}
@@ -725,7 +725,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task, 1);
+ rxe_run_task(&qp->req.task, 0);
}
if (pkt) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 5a3474f9351b..312c2fc961c0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -117,10 +117,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
memcpy(&fl6.daddr, daddr, sizeof(*daddr));
fl6.flowi6_proto = IPPROTO_UDP;
- if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk),
- recv_sockets.sk6->sk, &ndst, &fl6))) {
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk),
+ recv_sockets.sk6->sk, &fl6,
+ NULL);
+ if (unlikely(IS_ERR(ndst))) {
pr_err_ratelimited("no route to %pI6\n", daddr);
- goto put;
+ return NULL;
}
if (unlikely(ndst->error)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index f9a492ed900b..46e111c218fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -330,10 +330,14 @@ err1:
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
const struct ib_gid_attr *gid_attr;
union ib_gid dgid;
union ib_gid *pdgid;
+ if (pkt->mask & RXE_LOOPBACK_MASK)
+ return 0;
+
if (skb->protocol == htons(ETH_P_IP)) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
(struct in6_addr *)&dgid);
@@ -366,7 +370,7 @@ void rxe_rcv(struct sk_buff *skb)
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
- if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
+ if (rxe_match_dgid(rxe, skb) < 0) {
pr_warn_ratelimited("failed matching dgid\n");
goto drop;
}
@@ -389,7 +393,7 @@ void rxe_rcv(struct sk_buff *skb)
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
+ payload_size(pkt) + bth_pad(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index c5d9b558fa90..e5031172c019 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (err)
return err;
}
+ if (bth_pad(pkt)) {
+ u8 *pad = payload_addr(pkt) + paylen;
+
+ memset(pad, 0, bth_pad(pkt));
+ crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
+ }
}
p = payload_addr(pkt) + paylen + bth_pad(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 1cbfbd98eb22..c4a8195bf670 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp,
if (err)
pr_err("Failed copying memory\n");
+ if (bth_pad(&ack_pkt)) {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u8 *pad = payload_addr(&ack_pkt) + payload;
+
+ memset(pad, 0, bth_pad(&ack_pkt));
+ icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
+ }
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 8c3e2a18cfe4..75a41f99a23a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -679,6 +679,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
unsigned int mask;
unsigned int length = 0;
int i;
+ struct ib_send_wr *next;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
@@ -695,6 +696,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
break;
}
+ next = wr->next;
+
length = 0;
for (i = 0; i < wr->num_sge; i++)
length += wr->sg_list[i].length;
@@ -705,7 +708,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
*bad_wr = wr;
break;
}
- wr = wr->next;
+ wr = next;
}
rxe_run_task(&qp->req.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 28bfb3ece104..3536ab43eed3 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -407,7 +407,7 @@ struct rxe_dev {
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock; /* guard mmap_offset */
- int mmap_offset;
+ u64 mmap_offset;
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 2aa3457a30ce..50a355738609 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -377,8 +377,12 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring;
+ /* cyclic ring variables for managing tx_ring, for UD only */
unsigned int tx_head;
unsigned int tx_tail;
+ /* cyclic ring variables for counting overall outstanding send WRs */
+ unsigned int global_tx_head;
+ unsigned int global_tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_ud_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE];
@@ -511,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
-int ipoib_ib_dev_stop(struct net_device *dev);
+void ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_ib_dev_up(struct net_device *dev);
void ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop_default(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index aa9dcfc36cd3..196f1e6b5396 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return;
}
- if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) {
+ if ((priv->global_tx_head - priv->global_tx_tail) ==
+ ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
netif_stop_queue(dev);
@@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
} else {
netif_trans_update(dev);
++tx->tx_head;
- ++priv->tx_head;
+ ++priv->global_tx_head;
}
}
@@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_tx_lock(dev);
++tx->tx_tail;
- ++priv->tx_tail;
+ ++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) &&
- (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 &&
+ ((priv->global_tx_head - priv->global_tx_tail) <=
+ ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev);
@@ -1233,8 +1235,9 @@ timeout:
dev_kfree_skb_any(tx_req->skb);
netif_tx_lock_bh(p->dev);
++p->tx_tail;
- ++priv->tx_tail;
- if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) &&
+ ++priv->global_tx_tail;
+ if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
+ ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 78fa777c87b1..7b598b66f235 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -406,9 +406,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
+ ++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) &&
- ((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) &&
+ ((priv->global_tx_head - priv->global_tx_tail) <=
+ ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev);
@@ -633,7 +635,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
else
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
/* increase the tx_head after send success, but use it for queue state */
- if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) {
+ if ((priv->global_tx_head - priv->global_tx_tail) ==
+ ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
}
@@ -661,17 +664,17 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
rc = priv->tx_head;
++priv->tx_head;
+ ++priv->global_tx_head;
}
return rc;
}
-static void __ipoib_reap_ah(struct net_device *dev)
+static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
unsigned long flags;
- netif_tx_lock_bh(dev);
+ netif_tx_lock_bh(priv->dev);
spin_lock_irqsave(&priv->lock, flags);
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
@@ -682,37 +685,37 @@ static void __ipoib_reap_ah(struct net_device *dev)
}
spin_unlock_irqrestore(&priv->lock, flags);
- netif_tx_unlock_bh(dev);
+ netif_tx_unlock_bh(priv->dev);
}
void ipoib_reap_ah(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
- struct net_device *dev = priv->dev;
- __ipoib_reap_ah(dev);
+ ipoib_reap_dead_ahs(priv);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
-static void ipoib_flush_ah(struct net_device *dev)
+static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
- cancel_delayed_work(&priv->ah_reap_task);
- flush_workqueue(priv->wq);
- ipoib_reap_ah(&priv->ah_reap_task.work);
+ clear_bit(IPOIB_STOP_REAPER, &priv->flags);
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
+ round_jiffies_relative(HZ));
}
-static void ipoib_stop_ah(struct net_device *dev)
+static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
set_bit(IPOIB_STOP_REAPER, &priv->flags);
- ipoib_flush_ah(dev);
+ cancel_delayed_work(&priv->ah_reap_task);
+ /*
+ * After ipoib_stop_ah_reaper() we always go through
+ * ipoib_reap_dead_ahs() which ensures the work is really stopped and
+ * does a final flush out of the dead_ah's list
+ */
}
static int recvs_pending(struct net_device *dev)
@@ -806,6 +809,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
+ ++priv->global_tx_tail;
}
for (i = 0; i < ipoib_recvq_size; ++i) {
@@ -840,18 +844,6 @@ timeout:
return 0;
}
-int ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
- priv->rn_ops->ndo_stop(dev);
-
- clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
- ipoib_flush_ah(dev);
-
- return 0;
-}
-
int ipoib_ib_dev_open_default(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -895,10 +887,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
return -1;
}
- clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(priv->wq, &priv->ah_reap_task,
- round_jiffies_relative(HZ));
-
+ ipoib_start_ah_reaper(priv);
if (priv->rn_ops->ndo_open(dev)) {
pr_warn("%s: Failed to open dev\n", dev->name);
goto dev_stop;
@@ -909,13 +898,20 @@ int ipoib_ib_dev_open(struct net_device *dev)
return 0;
dev_stop:
- set_bit(IPOIB_STOP_REAPER, &priv->flags);
- cancel_delayed_work(&priv->ah_reap_task);
- set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
- ipoib_ib_dev_stop(dev);
+ ipoib_stop_ah_reaper(priv);
return -1;
}
+void ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_stop_ah_reaper(priv);
+}
+
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -1226,7 +1222,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_mcast_dev_flush(dev);
if (oper_up)
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
- ipoib_flush_ah(dev);
+ ipoib_reap_dead_ahs(priv);
}
if (level >= IPOIB_FLUSH_NORMAL)
@@ -1301,7 +1297,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
* the neighbor garbage collection is stopped and reaped.
* That should all be done now, so make a final ah flush.
*/
- ipoib_stop_ah(dev);
+ ipoib_reap_dead_ahs(priv);
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ac0583ff280d..044bcacad6e4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1188,9 +1188,11 @@ static void ipoib_timeout(struct net_device *dev)
ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev)));
- ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
- netif_queue_stopped(dev),
- priv->tx_head, priv->tx_tail);
+ ipoib_warn(priv,
+ "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
+ netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
+ priv->global_tx_head, priv->global_tx_tail);
+
/* XXX reset QP, etc. */
}
@@ -1705,7 +1707,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
goto out_rx_ring_cleanup;
}
- /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
+ /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */
if (ipoib_transport_dev_init(dev, priv->ca)) {
pr_warn("%s: ipoib_transport_dev_init failed\n",
@@ -1977,6 +1979,8 @@ static void ipoib_ndo_uninit(struct net_device *dev)
/* no more works over the priv->wq */
if (priv->wq) {
+ /* See ipoib_mcast_carrier_on_task() */
+ WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags));
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9c185a8dabd3..695df55d1b2f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -646,6 +646,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
if (ib_conn->pi_support) {
u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
+ shost->sg_prot_tablesize = shost->sg_tablesize;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
SHOST_DIX_GUARD_CRC);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0b09d0cd9b3c..19b6cebfed19 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2553,7 +2553,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (lrsp->opcode == SRP_LOGIN_RSP) {
ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
- ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
+ ch->use_imm_data = srp_use_imm_data &&
+ (lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP);
ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
ch->use_imm_data);
WARN_ON_ONCE(ch->max_it_iu_len >
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1a039f16d315..7b9ff07d9118 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1364,9 +1364,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx, u64 tag,
int status)
{
+ struct se_cmd *cmd = &ioctx->cmd;
struct srp_rsp *srp_rsp;
const u8 *sense_data;
int sense_data_len, max_sense_len;
+ u32 resid = cmd->residual_count;
/*
* The lowest bit of all SAM-3 status codes is zero (see also
@@ -1388,6 +1390,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
srp_rsp->tag = tag;
srp_rsp->status = status;
+ if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+ if (cmd->data_direction == DMA_TO_DEVICE) {
+ /* residual data from an underflow write */
+ srp_rsp->flags = SRP_RSP_FLAG_DOUNDER;
+ srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
+ } else if (cmd->data_direction == DMA_FROM_DEVICE) {
+ /* residual data from an underflow read */
+ srp_rsp->flags = SRP_RSP_FLAG_DIUNDER;
+ srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
+ }
+ } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) {
+ if (cmd->data_direction == DMA_TO_DEVICE) {
+ /* residual data from an overflow write */
+ srp_rsp->flags = SRP_RSP_FLAG_DOOVER;
+ srp_rsp->data_out_res_cnt = cpu_to_be32(resid);
+ } else if (cmd->data_direction == DMA_FROM_DEVICE) {
+ /* residual data from an overflow read */
+ srp_rsp->flags = SRP_RSP_FLAG_DIOVER;
+ srp_rsp->data_in_res_cnt = cpu_to_be32(resid);
+ }
+ }
+
if (sense_data_len) {
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);