diff options
Diffstat (limited to 'drivers/infiniband')
100 files changed, 1243 insertions, 786 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index bf539c34ccd3..152b3069588a 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, @@ -421,16 +421,15 @@ static int addr6_resolve(struct sockaddr *src_sock, (const struct sockaddr_in6 *)dst_sock; struct flowi6 fl6; struct dst_entry *dst; - int ret; memset(&fl6, 0, sizeof fl6); fl6.daddr = dst_in->sin6_addr; fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); - if (ret < 0) - return ret; + dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); if (ipv6_addr_any(&src_in->sin6_addr)) src_in->sin6_addr = fl6.saddr; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 18e476b3ced0..bac02a44cc23 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -51,9 +51,8 @@ struct ib_pkey_cache { struct ib_update_work { struct work_struct work; - struct ib_device *device; - u8 port_num; - bool enforce_security; + struct ib_event event; + bool enforce_security; }; union ib_gid zgid; @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { @@ -1386,9 +1385,8 @@ err: return ret; } -static void ib_cache_update(struct ib_device *device, - u8 port, - bool enforce_security) +static int +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; @@ -1396,11 +1394,11 @@ static void ib_cache_update(struct ib_device *device, int ret; if (!rdma_is_port_valid(device, port)) - return; + return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) - return; + return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { @@ -1418,8 +1416,10 @@ static void ib_cache_update(struct ib_device *device, pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); - if (!pkey_cache) + if (!pkey_cache) { + ret = -ENOMEM; goto err; + } pkey_cache->table_len = tprops->pkey_tbl_len; @@ -1451,50 +1451,84 @@ static void ib_cache_update(struct ib_device *device, kfree(old_pkey_cache); kfree(tprops); - return; + return 0; err: kfree(pkey_cache); kfree(tprops); + return ret; +} + +static void ib_cache_event_task(struct work_struct *_work) +{ + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); + int ret; + + /* Before distributing the cache update event, first sync + * the cache. + */ + ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->enforce_security); + + /* GID event is notified already for individual GID entries by + * dispatch_gid_change_event(). Hence, notifiy for rest of the + * events. + */ + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) + ib_dispatch_event_clients(&work->event); + + kfree(work); } -static void ib_cache_task(struct work_struct *_work) +static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); - ib_cache_update(work->device, - work->port_num, - work->enforce_security); + ib_dispatch_event_clients(&work->event); kfree(work); } -static void ib_cache_event(struct ib_event_handler *handler, - struct ib_event *event) +static bool is_cache_update_event(const struct ib_event *event) +{ + return (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE); +} + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_GID_CHANGE) { - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (work) { - INIT_WORK(&work->work, ib_cache_task); - work->device = event->device; - work->port_num = event->element.port_num; - if (event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_GID_CHANGE) - work->enforce_security = true; - else - work->enforce_security = false; - - queue_work(ib_wq, &work->work); - } - } + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + if (is_cache_update_event(event)) + INIT_WORK(&work->work, ib_cache_event_task); + else + INIT_WORK(&work->work, ib_generic_event_task); + + work->event = *event; + if (event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_GID_CHANGE) + work->enforce_security = true; + + queue_work(ib_wq, &work->work); } +EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { @@ -1507,12 +1541,12 @@ int ib_cache_setup_one(struct ib_device *device) if (err) return err; - rdma_for_each_port (device, p) - ib_cache_update(device, p, true); + rdma_for_each_port (device, p) { + err = ib_cache_update(device, p, true); + if (err) + return err; + } - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, - device, ib_cache_event); - ib_register_event_handler(&device->cache.event_handler); return 0; } @@ -1534,14 +1568,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { - /* The cleanup function unregisters the event handler, - * waits for all in-progress workqueue elements and cleans - * up the GID cache. This function should be called after - * the device was removed from the devices list and all - * clients were removed, so the cache exists but is + /* The cleanup function waits for all in-progress workqueue + * elements and cleans up the GID cache. This function should be + * called after the device was removed from the devices list and + * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ - ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..09af96ec41dd 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -597,18 +597,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return 0; } -static int cm_alloc_id(struct cm_id_private *cm_id_priv) -{ - int err; - u32 id; - - err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv, - xa_limit_32b, &cm.local_id_next, GFP_KERNEL); - - cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return err; -} - static u32 cm_local_id(__be32 local_id) { return (__force u32) (local_id ^ cm.random_id_operand); @@ -862,6 +850,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, void *context) { struct cm_id_private *cm_id_priv; + u32 id; int ret; cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); @@ -873,9 +862,6 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; - ret = cm_alloc_id(cm_id_priv); - if (ret) - goto error; spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); @@ -884,11 +870,20 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); + + ret = xa_alloc_cyclic_irq(&cm.local_id_table, &id, NULL, xa_limit_32b, + &cm.local_id_next, GFP_KERNEL); + if (ret < 0) + goto error; + cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; + xa_store_irq(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id), + cm_id_priv, GFP_KERNEL); + return &cm_id_priv->id; error: kfree(cm_id_priv); - return ERR_PTR(-ENOMEM); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_create_cm_id); @@ -1228,6 +1223,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); @@ -4399,6 +4395,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4404,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4458,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a68d0ccf67a4..08d6d7b2d635 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1631,6 +1631,8 @@ static struct rdma_id_private *cma_find_listener( { struct rdma_id_private *id_priv, *id_priv_dev; + lockdep_assert_held(&lock); + if (!bind_list) return ERR_PTR(-EINVAL); @@ -1677,6 +1679,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } } + mutex_lock(&lock); /* * Net namespace might be getting deleted while route lookup, * cm_id lookup is in progress. Therefore, perform netdevice @@ -1718,6 +1721,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev); err: rcu_read_unlock(); + mutex_unlock(&lock); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; @@ -2396,9 +2400,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); @@ -2472,6 +2477,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; + lockdep_assert_held(&lock); + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; @@ -2910,6 +2917,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; + route->num_paths = 0; err1: kfree(work); return ret; @@ -3090,6 +3098,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3116,6 +3125,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3152,19 +3162,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { @@ -3234,6 +3251,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list, u64 sid, mask; __be16 port; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); port = htons(bind_list->port); @@ -3262,6 +3281,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps, struct rdma_bind_list *bind_list; int ret; + lockdep_assert_held(&lock); + bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; @@ -3288,6 +3309,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list, struct sockaddr *saddr = cma_src_addr(id_priv); __be16 dport = cma_port(daddr); + lockdep_assert_held(&lock); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { struct sockaddr *cur_daddr = cma_dst_addr(cur_id); struct sockaddr *cur_saddr = cma_src_addr(cur_id); @@ -3327,6 +3350,8 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, unsigned int rover; struct net *net = id_priv->id.route.addr.dev_addr.net; + lockdep_assert_held(&lock); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; @@ -3374,6 +3399,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; + lockdep_assert_held(&lock); + addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) @@ -3404,6 +3431,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps, unsigned short snum; int ret; + lockdep_assert_held(&lock); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; @@ -4709,6 +4738,19 @@ static int __init cma_init(void) { int ret; + /* + * There is a rare lock ordering dependency in cma_netdev_callback() + * that only happens when bonding is enabled. Teach lockdep that rtnl + * must never be nested under lock so it can find these without having + * to test with bonding. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + rtnl_lock(); + mutex_lock(&lock); + mutex_unlock(&lock); + rtnl_unlock(); + } + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; @@ -4735,6 +4777,7 @@ err_ib: err: unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 3ec2c415bb70..6b3f5b25b6ff 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -322,8 +322,21 @@ fail: return ERR_PTR(err); } +static void drop_cma_dev(struct config_group *cgroup, struct config_item *item) +{ + struct config_group *group = + container_of(item, struct config_group, cg_item); + struct cma_dev_group *cma_dev_group = + container_of(group, struct cma_dev_group, device_group); + + configfs_remove_default_groups(&cma_dev_group->ports_group); + configfs_remove_default_groups(&cma_dev_group->device_group); + config_item_put(item); +} + static struct configfs_group_operations cma_subsys_group_ops = { .make_group = make_cma_dev, + .drop_item = drop_cma_dev, }; static const struct config_item_type cma_subsys_type = { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index ff9e0d7fb4f3..4c9193d2f894 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -119,6 +119,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +void ib_dispatch_event_clients(struct ib_event *event); #ifdef CONFIG_CGROUP_RDMA void ib_device_register_rdmacg(struct ib_device *device); @@ -307,6 +308,21 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + qp->qp_context = attr->qp_context; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a24c900fbdf6..d17e7af352fd 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -113,7 +113,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq - allocate a completion queue + * __ib_alloc_cq_user - allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate @@ -193,7 +193,7 @@ out_destroy_cq: EXPORT_SYMBOL(__ib_alloc_cq_user); /** - * ib_free_cq - free a completion queue + * ib_free_cq_user - free a completion queue * @cq: completion queue to free. * @udata: User data or NULL for kernel object */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 156d210de195..80a1e20a95ae 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -589,6 +589,7 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); + init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be @@ -896,7 +897,9 @@ static int add_one_compat_dev(struct ib_device *device, cdev->dev.parent = device->dev.parent; rdma_init_coredev(cdev, device, read_pnet(&rnet->net)); cdev->dev.release = compatdev_release; - dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev)); + if (ret) + goto add_err; ret = device_add(&cdev->dev); if (ret) @@ -1190,9 +1193,21 @@ static void setup_dma_device(struct ib_device *device) WARN_ON_ONCE(!parent); device->dma_device = parent; } - /* Setup default max segment size for all IB devices */ - dma_set_max_seg_size(device->dma_device, SZ_2G); + if (!device->dev.dma_parms) { + if (parent) { + /* + * The caller did not provide DMA parameters, so + * 'parent' probably represents a PCI device. The PCI + * core sets the maximum segment size to 64 + * KB. Increase this parameter to 2 GB. + */ + device->dev.dma_parms = parent->dma_parms; + dma_set_max_seg_size(device->dma_device, SZ_2G); + } else { + WARN_ON_ONCE(true); + } + } } /* @@ -1300,6 +1315,10 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + /** * ib_register_device - Register an IB device with IB core * @device:Device to register @@ -1365,11 +1384,11 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; @@ -1417,7 +1436,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } @@ -1808,17 +1828,15 @@ EXPORT_SYMBOL(ib_set_client_data); * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. + * chapter 11 of the InfiniBand Architecture Specification). This + * callback occurs in workqueue context. */ void ib_register_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_register_event_handler); @@ -1831,35 +1849,23 @@ EXPORT_SYMBOL(ib_register_event_handler); */ void ib_unregister_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_unregister_event_handler); -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) +void ib_dispatch_event_clients(struct ib_event *event) { - unsigned long flags; struct ib_event_handler *handler; - spin_lock_irqsave(&event->device->event_handler_lock, flags); + down_read(&event->device->event_handler_rwsem); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); + up_read(&event->device->event_handler_rwsem); } -EXPORT_SYMBOL(ib_dispatch_event); /** * ib_query_port - Query IB port attributes @@ -2229,8 +2235,12 @@ int ib_modify_port(struct ib_device *device, rc = device->ops.modify_port(device, port_num, port_modify_mask, port_modify); + else if (rdma_protocol_roce(device, port_num) && + ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 || + (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0)) + rc = 0; else - rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; + rc = -EOPNOTSUPP; return rc; } EXPORT_SYMBOL(ib_modify_port); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 72141c5b7c95..570dc526a942 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 9947d16edef2..2284930b5f91 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -639,10 +639,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid); flush_workqueue(port_priv->wq); - ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); + ib_cancel_rmpp_recvs(mad_agent_priv); ib_mad_agent_security_cleanup(&mad_agent_priv->agent); @@ -2960,6 +2960,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { + kfree(mad_priv); ret = -ENOMEM; break; } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 69188cbbd99b..6123780f9602 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -692,6 +692,10 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); + if (strlen(name) == 0) { + err = -EINVAL; + goto done; + } err = ib_device_rename(device, name); goto done; } @@ -1055,7 +1059,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1069,10 +1073,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); ret = fe->fill_res_func(msg, has_cap_net_admin, res, port); - rdma_restrack_put(res); if (ret) goto err_free; + rdma_restrack_put(res); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); @@ -1292,7 +1296,7 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); - if (strchr(ibdev_name, '%')) + if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index ccf4d069c25c..d0580eed3bcb 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -160,9 +160,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, uobj->context = NULL; /* - * For DESTROY the usecnt is held write locked, the caller is expected - * to put it unlock and put the object when done with it. Only DESTROY - * can remove the IDR handle. + * For DESTROY the usecnt is not changed, the caller is expected to + * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR + * handle. */ if (reason != RDMA_REMOVE_DESTROY) atomic_set(&uobj->usecnt, 0); @@ -194,7 +194,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, /* * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY * sequence. It should only be used from command callbacks. On success the - * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This + * caller must pair this with uobj_put_destroy(). This * version requires the caller to have already obtained an * LOOKUP_DESTROY uobject kref. */ @@ -205,6 +205,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) down_read(&ufile->hw_destroy_rwsem); + /* + * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left + * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY. + * This is because any other concurrent thread can still see the object + * in the xarray due to RCU. Leaving it locked ensures nothing else will + * touch it. + */ ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); if (ret) goto out_unlock; @@ -223,7 +230,7 @@ out_unlock: /* * uobj_get_destroy destroys the HW object and returns a handle to the uobj * with a NULL object pointer. The caller must pair this with - * uverbs_put_destroy. + * uobj_put_destroy(). */ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, u32 id, struct uverbs_attr_bundle *attrs) @@ -257,8 +264,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, uobj = __uobj_get_destroy(obj, id, attrs); if (IS_ERR(uobj)) return PTR_ERR(uobj); - - rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); + uobj_put_destroy(uobj); return 0; } @@ -362,7 +368,7 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, * and the caller is expected to ensure that uverbs_close_fd is never * done while a call top lookup is possible. */ - if (f->f_op != fd_type->fops) { + if (f->f_op != fd_type->fops || uobject->ufile != ufile) { fput(f); return ERR_PTR(-EBADF); } @@ -689,7 +695,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode) { assert_uverbs_usecnt(uobj, mode); - uobj->uapi_object->type_class->lookup_put(uobj, mode); /* * In order to unlock an object, either decrease its usecnt for * read access or zero it in case of exclusive access. See @@ -706,6 +711,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, break; } + uobj->uapi_object->type_class->lookup_put(uobj, mode); /* Pairs with the kref obtained by type->lookup_get */ uverbs_uobject_put(uobj); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7d8071c7e564..e3058b9e814b 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -829,13 +829,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) return len; } -static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) +static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; void *data; struct ib_sa_mad *mad; int len; + unsigned long flags; + unsigned long delay; + gfp_t gfp_flag; + int ret; + + INIT_LIST_HEAD(&query->list); + query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); mad = query->mad_buf->mad; len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); @@ -860,36 +867,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); -} + gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC : + GFP_NOWAIT; -static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) -{ - unsigned long flags; - unsigned long delay; - int ret; + spin_lock_irqsave(&ib_nl_request_lock, flags); + ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_flag); - INIT_LIST_HEAD(&query->list); - query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); + if (ret) + goto out; - /* Put the request on the list first.*/ - spin_lock_irqsave(&ib_nl_request_lock, flags); + /* Put the request on the list.*/ delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - ret = ib_nl_send_msg(query, gfp_mask); - if (ret) { - ret = -EIO; - /* Remove the request */ - spin_lock_irqsave(&ib_nl_request_lock, flags); - list_del(&query->list); - spin_unlock_irqrestore(&ib_nl_request_lock, flags); - } +out: + spin_unlock_irqrestore(&ib_nl_request_lock, flags); return ret; } @@ -1068,7 +1064,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1139,7 +1135,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 6eb6d2717ca5..75e7ec017836 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,27 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } - new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) new_pps->main.port_num = qp_pps->main.port_num; + + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) new_pps->main.pkey_index = qp_pps->main.pkey_index; - if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) - new_pps->main.state = IB_PORT_PKEY_VALID; - } + + if (((qp_attr_mask & IB_QP_PKEY_INDEX) && + (qp_attr_mask & IB_QP_PORT)) || + (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)) + new_pps->main.state = IB_PORT_PKEY_VALID; if (qp_attr_mask & IB_QP_ALT_PATH) { new_pps->alt.port_num = qp_attr->alt_port_num; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c78d0c9646ae..2b9991b37bc8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1044,8 +1044,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) coredev->ports_kobj, "%d", port_num); if (ret) { - kfree(p); - return ret; + goto err_put; } p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL); @@ -1058,8 +1057,7 @@ static int add_port(struct ib_core_device *coredev, int port_num) ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type, &p->kobj, "gid_attrs"); if (ret) { - kfree(p->gid_attr_group); - goto err_put; + goto err_put_gid_attrs; } if (device->ops.process_mad && is_full_dev) { @@ -1386,8 +1384,10 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num, ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s", name); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } } return 0; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 140a338a135f..176738311020 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -89,6 +89,7 @@ struct ucma_context { struct ucma_file *file; struct rdma_cm_id *cm_id; + struct mutex mutex; u64 uid; struct list_head list; @@ -215,6 +216,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; + mutex_init(&ctx->mutex); mutex_lock(&mut); ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); @@ -596,6 +598,7 @@ static int ucma_free_ctx(struct ucma_context *ctx) } events_reported = ctx->events_reported; + mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; } @@ -665,7 +668,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); + ucma_put_ctx(ctx); return ret; } @@ -688,7 +694,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -712,8 +720,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -738,8 +748,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -759,7 +771,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -848,6 +862,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? @@ -871,6 +886,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: + mutex_unlock(&ctx->mutex); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1022,6 +1038,7 @@ static ssize_t ucma_query(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); @@ -1036,6 +1053,7 @@ static ssize_t ucma_query(struct ucma_file *file, ret = -ENOSYS; break; } + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1076,7 +1094,9 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + mutex_lock(&ctx->mutex); ret = rdma_connect(ctx->cm_id, &conn_param); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1097,7 +1117,9 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? cmd.backlog : max_backlog; + mutex_lock(&ctx->mutex); ret = rdma_listen(ctx->cm_id, ctx->backlog); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1120,13 +1142,17 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); + mutex_unlock(&ctx->mutex); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); - } else + } else { + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, NULL, NULL); - + mutex_unlock(&ctx->mutex); + } ucma_put_ctx(ctx); return ret; } @@ -1145,7 +1171,9 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1164,7 +1192,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_disconnect(ctx->cm_id); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1195,7 +1225,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; + mutex_lock(&ctx->mutex); ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + mutex_unlock(&ctx->mutex); if (ret) goto out; @@ -1281,9 +1313,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx, struct sa_path_rec opa; sa_convert_path_ib_to_opa(&opa, &sa_path); + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &opa); + mutex_unlock(&ctx->mutex); } else { + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &sa_path); + mutex_unlock(&ctx->mutex); } if (ret) return ret; @@ -1316,7 +1352,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, switch (level) { case RDMA_OPTION_ID: + mutex_lock(&ctx->mutex); ret = ucma_set_option_id(ctx, optname, optval, optlen); + mutex_unlock(&ctx->mutex); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); @@ -1376,8 +1414,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); if (ctx->cm_id->device) ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1420,8 +1460,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); + mutex_unlock(&ctx->mutex); if (ret) goto err2; @@ -1525,7 +1567,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, goto out; } + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_unlock(&mc->ctx->mutex); + mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index e7ea819fcb11..b9f1dfb2a37a 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -168,10 +168,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, * for any address. */ mask |= (sg_dma_address(sg) + pgoff) ^ va; - if (i && i != (umem->nmap - 1)) - /* restrict by length as well for interior SGEs */ - mask |= sg_dma_len(sg); va += sg_dma_len(sg) - pgoff; + /* Except for the last entry, the ending iova alignment sets + * the maximum possible page size as the low bits of the iova + * must be zero when starting the next chunk. + */ + if (i != (umem->nmap - 1)) + mask |= va; pgoff = 0; } best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 5e5f7dd82c50..bab412516b07 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -626,7 +626,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 025b6d86a61f..9b9fa953595b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1274,6 +1274,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; + cdev_device_del(&port->sm_cdev, &port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + mutex_lock(&port->file_mutex); /* Mark ib_dev NULL and block ioctl or other file ops to progress @@ -1293,8 +1296,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - cdev_device_del(&port->sm_cdev, &port->sm_dev); - cdev_device_del(&port->cdev, &port->dev); ida_free(&umad_ida, port->dev_num); /* balances device_initialize() */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 1e5aeb39f774..63f7f7db5902 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device dev; /* First group for device attributes, NULL terminated array */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index df8e8ac2c16b..6273d2430736 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -760,6 +760,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; + mr->iova = cmd.hca_va; rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -850,6 +851,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; @@ -1429,17 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) @@ -2716,12 +2710,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, return 0; } -static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec) -{ - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; -} - static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { @@ -2865,11 +2853,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { - ssize_t kern_filter_sz; + size_t kern_filter_sz; void *kern_spec_mask; void *kern_spec_val; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + if (check_sub_overflow((size_t)kern_spec->hdr.size, + sizeof(struct ib_uverbs_flow_spec_hdr), + &kern_filter_sz)) + return -EINVAL; + + kern_filter_sz /= 2; kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 84a5e9a6d483..5b7f9d33dd80 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -215,7 +215,6 @@ void ib_uverbs_release_file(struct kref *ref) } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, - struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) @@ -233,19 +232,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !uverbs_file->device->ib_dev))) + ev_queue->is_closed))) return -ERESTARTSYS; + spin_lock_irq(&ev_queue->lock); + /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && - !uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); return -EIO; - - spin_lock_irq(&ev_queue->lock); + } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); @@ -280,8 +276,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_async_event_file *file = filp->private_data; - return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, - buf, count, pos, + return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } @@ -291,9 +286,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj.ufile, filp, - buf, count, pos, + return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, + pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -308,6 +302,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, spin_lock_irq(&ev_queue->lock); if (!list_empty(&ev_queue->event_list)) pollflags = EPOLLIN | EPOLLRDNORM; + else if (ev_queue->is_closed) + pollflags = EPOLLERR; spin_unlock_irq(&ev_queue->lock); return pollflags; @@ -316,7 +312,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { - return ib_uverbs_event_poll(filp->private_data, filp, wait); + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, @@ -330,9 +328,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp, static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_queue *ev_queue = filp->private_data; + struct ib_uverbs_async_event_file *file = filp->private_data; - return fasync_helper(fd, filp, on, &ev_queue->async_queue); + return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e666a1f7608d..41caf36575df 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -316,7 +316,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, EXPORT_SYMBOL(__ib_alloc_pd); /** - * ib_dealloc_pd - Deallocates a protection domain. + * ib_dealloc_pd_user - Deallocates a protection domain. * @pd: The protection domain to deallocate. * @udata: Valid user data or NULL for kernel object * @@ -661,16 +661,17 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * @@ -1174,16 +1175,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; - - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { struct ib_qp *xrc_qp = create_xrc_qp_user(qp, qp_init_attr, udata); @@ -1650,7 +1641,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } @@ -1981,7 +1972,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) EXPORT_SYMBOL(ib_dereg_mr_user); /** - * ib_alloc_mr() - Allocates a memory region + * ib_alloc_mr_user() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 94559169924f..7d9f44434562 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3346,8 +3346,10 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) int rc; rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); - if (rc) + if (rc) { dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc); + return rc; + } if (mr->pages) { rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 814f959c7db9..5ef9d5252358 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1268,10 +1268,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) return; } rdev->qplib_ctx.hwrm_intf_ver = - (u64)resp.hwrm_intf_major << 48 | - (u64)resp.hwrm_intf_minor << 32 | - (u64)resp.hwrm_intf_build << 16 | - resp.hwrm_intf_patch; + (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | + (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | + (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | + le16_to_cpu(resp.hwrm_intf_patch); } static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 958c1ff9c515..4d07d22bfa7b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2283,13 +2283,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, /* Add qp to flush list of the CQ */ bnxt_qplib_add_flush_qp(qp); } else { + /* Before we complete, do WA 9060 */ + if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + cqe_sq_cons)) { + *lib_qp = qp; + goto out; + } if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { - /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, - cqe_sq_cons)) { - *lib_qp = qp; - goto out; - } cqe->status = CQ_REQ_STATUS_OK; cqe++; (*budget)--; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index fbda11a7ab1a..aaa76d792185 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx { u8 chip_metal; }; -#define CHIP_NUM_57500 0x1750 +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 struct bnxt_qplib_res { struct pci_dev *pdev; @@ -203,7 +205,9 @@ struct bnxt_qplib_res { static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { - return (cctx->chip_num == CHIP_NUM_57500); + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); } static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0f3b1193d5f8..69d5fc3eba45 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -2421,20 +2419,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; pr_debug("ep %p tid %u\n", ep, ep->hwtid); - - skb_get(skb); - rpl = cplhdr(skb); - if (!is_t4(adapter_type)) { - skb_trim(skb, roundup(sizeof(*rpl5), 16)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); - cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); @@ -2480,6 +2464,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } + + skb_get(skb); + rpl = cplhdr(skb); + if (!is_t4(adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; @@ -2890,8 +2888,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) srqidx = ABORT_RSS_SRQIDX_G( be32_to_cpu(req->srqidx_status)); if (srqidx) { - complete_cached_srq_buffers(ep, - req->srqidx_status); + complete_cached_srq_buffers(ep, srqidx); } else { /* Hold ep ref until finish_peer_abort() */ c4iw_get_ep(&ep->com); @@ -3035,6 +3032,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } + /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3, + * when entering the TERM state the RNIC MUST initiate a CLOSE. + */ + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); @@ -3868,8 +3869,8 @@ static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } - ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W, - TCB_RQ_START_S); + ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M, + TCB_RQ_START_S); cleanup: pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4c0d925c5ff5..1e6f38dea488 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; @@ -950,6 +953,7 @@ void c4iw_dealloc(struct uld_ctx *ctx) static void c4iw_remove(struct uld_ctx *ctx) { pr_debug("c4iw_dev %p\n", ctx->dev); + debugfs_remove_recursive(ctx->dev->debugfs_root); c4iw_unregister_device(ctx->dev); c4iw_dealloc(ctx); } diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 69b287d025f7..7af958b3318e 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -398,7 +398,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index e92b9544357a..8b587cb2aa55 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1976,10 +1976,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; - c4iw_get_ep(&ep->com); - disconnect = 1; if (!internal) { + c4iw_get_ep(&ep->com); terminate = 1; + disconnect = 1; } else { terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); @@ -2754,15 +2754,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2806,8 +2802,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2830,8 +2824,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index a5c788741a04..294733ebf754 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -318,6 +318,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu struct efa_admin_acq_entry *comp, size_t comp_size_in_bytes) { + struct efa_admin_aq_entry *aqe; struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 ctx_id; @@ -347,7 +348,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu reinit_completion(&comp_ctx->wait_event); - memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + aqe = &aq->sq.entries[pi]; + memset(aqe, 0, sizeof(*aqe)); + memcpy(aqe, cmd, cmd_size_in_bytes); aq->sq.pc++; atomic64_inc(&aq->stats.submitted_cmd); diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 4fe662c3bbc1..39c0067fe66a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); } + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); return 0; fail: diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 67052dc3100c..e510f019d712 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4105,6 +4113,7 @@ def_access_ibp_counter(seq_naks); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), [C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH), [C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), [C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, @@ -4248,6 +4257,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b76cf81f927f..39a57c35af22 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -859,6 +859,7 @@ static inline int idx_from_vl(int vl) enum { C_RCV_OVF = 0, C_RX_LEN_ERR, + C_RX_SHORT_ERR, C_RX_ICRC_ERR, C_RX_EBP, C_RX_TID_FULL, @@ -926,6 +927,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index ab3589d17aee..fb3ec9bff7a2 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -381,6 +381,7 @@ #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) #define RCV_LENGTH_ERR_CNT 0 +#define RCV_SHORT_ERR_CNT 2 #define RCV_ICRC_ERR_CNT 6 #define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01aa1f132f55..941b465244ab 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -734,6 +734,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f9a7e9d29c8b..89e1dfd07a1b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -707,6 +718,7 @@ done: if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fa45350a9a1d..b79931cc74ab 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1153,6 +1153,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1436,10 +1438,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 71cb9525c074..fbff6b2f00e7 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -845,6 +845,29 @@ wq_error: } /** + * destroy_workqueues - destroy per port workqueues + * @dd: the hfi1_ib device + */ +static void destroy_workqueues(struct hfi1_devdata *dd) +{ + int pidx; + struct hfi1_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + if (ppd->hfi1_wq) { + destroy_workqueue(ppd->hfi1_wq); + ppd->hfi1_wq = NULL; + } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } + } +} + +/** * enable_general_intr() - Enable the IRQs that will be handled by the * general interrupt handler. * @dd: valid devdata @@ -1117,15 +1140,10 @@ static void shutdown_device(struct hfi1_devdata *dd) * We can't count on interrupts since we are stopping. */ hfi1_quiet_serdes(ppd); - - if (ppd->hfi1_wq) { - destroy_workqueue(ppd->hfi1_wq); - ppd->hfi1_wq = NULL; - } - if (ppd->link_wq) { - destroy_workqueue(ppd->link_wq); - ppd->link_wq = NULL; - } + if (ppd->hfi1_wq) + flush_workqueue(ppd->hfi1_wq); + if (ppd->link_wq) + flush_workqueue(ppd->link_wq); } sdma_exit(dd); } @@ -1489,7 +1507,6 @@ static int __init hfi1_mod_init(void) goto bail_dev; } - hfi1_compute_tid_rdma_flow_wt(); /* * These must be called before the driver is registered with * the PCI subsystem. @@ -1815,6 +1832,7 @@ static void remove_one(struct pci_dev *pdev) * clear dma engines, etc. */ shutdown_device(dd); + destroy_workqueues(dd); stop_timers(dd); diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index adb4a1ba921b..5836fe7b2817 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -81,7 +81,9 @@ void iowait_init(struct iowait *wait, u32 tx_limit, void iowait_cancel_work(struct iowait *w) { cancel_work_sync(&iowait_get_ib_work(w)->iowork); - cancel_work_sync(&iowait_get_tid_work(w)->iowork); + /* Make sure that the iowork for TID RDMA is used */ + if (iowait_get_tid_work(w)->iowork.func) + cancel_work_sync(&iowait_get_tid_work(w)->iowork); } /** diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index c96d193bb236..177dbeaa9d14 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 4e5c2d1b8cfa..79126b2b14ab 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1594,9 +1594,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) else sc_del_credit_return_intr(sc); trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl); - if (needint) { + if (needint) sc_return_credits(sc); - } } /** diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4e0e9fc0a777..30ba09b510fa 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -381,7 +381,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if (dd->flags & HFI1_SHUTDOWN) + return true; return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5c0d90418e8c..0d3e86da2784 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2210,15 +2210,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_flags & RVT_S_WAIT_RNR) goto bail_stop; rdi = ib_to_rvt(qp->ibqp.device); - if (qp->s_rnr_retry == 0 && - !((rdi->post_parms[wqe->wr.opcode].flags & - RVT_OPERATION_IGN_RNR_CNT) && - qp->s_rnr_retry_cnt == 0)) { - status = IB_WC_RNR_RETRY_EXC_ERR; - goto class_b; + if (!(rdi->post_parms[wqe->wr.opcode].flags & + RVT_OPERATION_IGN_RNR_CNT)) { + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) + qp->s_rnr_retry--; } - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) - qp->s_rnr_retry--; /* * The last valid PSN is the previous PSN. For TID RDMA WRITE diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bc45b3b11f91..5f1212837e5c 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -65,6 +65,7 @@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32)) static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@ -1297,7 +1298,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) struct sdma_engine *sde; if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@ -1492,7 +1493,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } /* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 90f62c4bddba..074ec71772d2 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -674,7 +674,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sc2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + /* + * Based on the documentation for kobject_init_and_add(), the + * caller should call kobject_put even if this call fails. + */ + goto bail_sc2vl; } kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD); @@ -684,7 +688,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sl2sc sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sc2vl; + goto bail_sl2sc; } kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD); @@ -694,7 +698,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping vl2mtu sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl2sc; + goto bail_vl2mtu; } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); @@ -704,7 +708,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_vl2mtu; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -742,7 +746,6 @@ bail_sl2sc: kobject_put(&ppd->sl2sc_kobj); bail_sc2vl: kobject_put(&ppd->sc2vl_kobj); -bail: return ret; } @@ -853,8 +856,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) return 0; bail: - for (i = 0; i < dd->num_sdma; i++) - kobject_del(&dd->per_sdma[i].kobj); + /* + * The function kobject_put() will call kobject_del() if the kobject + * has been added successfully. The sysfs files created under the + * kobject directory will also be removed during the process. + */ + for (; i >= 0; i--) + kobject_put(&dd->per_sdma[i].kobj); return ret; } @@ -867,6 +875,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; + /* Unwind operations in hfi1_verbs_register_sysfs() */ + for (i = 0; i < dd->num_sdma; i++) + kobject_put(&dd->per_sdma[i].kobj); + for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 7e9527ab6d64..b5bc41e10e22 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -107,8 +107,6 @@ static u32 mask_generation(u32 a) * C - Capcode */ -static u32 tid_rdma_flow_wt; - static void tid_rdma_trigger_resume(struct work_struct *work); static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, @@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet, struct tid_rdma_flow *flow, bool fecn); +static void validate_r_tid_ack(struct hfi1_qp_priv *priv) +{ + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) + priv->r_tid_ack = priv->r_tid_tail; +} + +static void tid_rdma_schedule_ack(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + priv->s_flags |= RVT_S_ACK_PENDING; + hfi1_schedule_tid_send(qp); +} + +static void tid_rdma_trigger_ack(struct rvt_qp *qp) +{ + validate_r_tid_ack(qp->priv); + tid_rdma_schedule_ack(qp); +} + static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) { return @@ -2730,11 +2748,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, diff = cmp_psn(psn, flow->flow_state.r_next_psn); if (diff > 0) { - if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) - restart_tid_rdma_read_req(rcd, - qp, - wqe); - /* Drop the packet.*/ goto s_unlock; } else if (diff < 0) { @@ -3004,10 +3017,7 @@ nak_psn: qpriv->s_nak_state = IB_NAK_PSN_ERROR; /* We are NAK'ing the next expected PSN */ qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); - qpriv->s_flags |= RVT_S_ACK_PENDING; - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) - qpriv->r_tid_ack = qpriv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto unlock; } @@ -3370,18 +3380,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); } -void hfi1_compute_tid_rdma_flow_wt(void) +static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) { /* * Heuristic for computing the RNR timeout when waiting on the flow * queue. Rather than a computationaly expensive exact estimate of when * a flow will be available, we assume that if a QP is at position N in * the flow queue it has to wait approximately (N + 1) * (number of - * segments between two sync points), assuming PMTU of 4K. The rationale - * for this is that flows are released and recycled at each sync point. + * segments between two sync points). The rationale for this is that + * flows are released and recycled at each sync point. */ - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / - TID_RDMA_MAX_SEGMENT_SIZE; + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; } static u32 position_in_queue(struct hfi1_qp_priv *qpriv, @@ -3504,7 +3513,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); if (ret) { - to_seg = tid_rdma_flow_wt * + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * position_in_queue(qpriv, &rcd->flow_queue); break; @@ -3525,7 +3534,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) /* * If overtaking req->acked_tail, send an RNR NAK. Because the * QP is not queued in this case, and the issue can only be - * caused due a delay in scheduling the second leg which we + * caused by a delay in scheduling the second leg which we * cannot estimate, we use a rather arbitrary RNR timeout of * (MAX_FLOWS / 2) segments */ @@ -3533,8 +3542,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx) MAX_FLOWS)) { ret = -EAGAIN; to_seg = MAX_FLOWS >> 1; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); break; } @@ -4334,8 +4342,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, req); trace_hfi1_tid_write_rsp_rcv_data(qp); - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; + validate_r_tid_ack(priv); if (opcode == TID_OP(WRITE_DATA_LAST)) { release_rdma_sge_mr(e); @@ -4374,8 +4381,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet) } done: - priv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_schedule_ack(qp); exit: priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; if (fecn) @@ -4387,10 +4393,7 @@ send_nak: if (!priv->s_nak_state) { priv->s_nak_state = IB_NAK_PSN_ERROR; priv->s_nak_psn = flow->flow_state.r_next_psn; - priv->s_flags |= RVT_S_ACK_PENDING; - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) - priv->r_tid_ack = priv->r_tid_tail; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); } goto done; } @@ -4624,6 +4627,15 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) */ fpsn = full_flow_psn(flow, flow->flow_state.spsn); req->r_ack_psn = psn; + /* + * If resync_psn points to the last flow PSN for a + * segment and the new segment (likely from a new + * request) starts with a new generation number, we + * need to adjust resync_psn accordingly. + */ + if (flow->flow_state.generation != + (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT)) + resync_psn = mask_psn(fpsn - 1); flow->resync_npkts += delta_psn(mask_psn(resync_psn + 1), fpsn); /* @@ -4938,8 +4950,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet) qpriv->resync = true; /* RESYNC request always gets a TID RDMA ACK. */ qpriv->s_nak_state = 0; - qpriv->s_flags |= RVT_S_ACK_PENDING; - hfi1_schedule_tid_send(qp); + tid_rdma_trigger_ack(qp); bail: if (fecn) qp->s_flags |= RVT_S_ECN; @@ -5389,7 +5400,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp) struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_devdata *dd = ppd->dd; + + if ((dd->flags & HFI1_SHUTDOWN)) + return true; return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq, priv->s_sde ? diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h index 1c536185261e..6e82df2190b7 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.h +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -17,6 +17,7 @@ #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) +#define TID_RDMA_SEGMENT_SHIFT 18 /* * Bit definitions for priv->s_flags. @@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, struct ib_other_headers *ohdr, u32 *bth1, u32 *bth2, u32 *len); -void hfi1_compute_tid_rdma_flow_wt(void); - void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e, diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3592a9ec155e..4d732353379d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); @@ -165,10 +162,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) if (fd->handler) { hfi1_mmu_rb_unregister(fd->handler); } else { + mutex_lock(&uctxt->exp_mutex); if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); + mutex_unlock(&uctxt->exp_mutex); } kfree(fd->invalid_tids); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fd754a16475a..a92346e88628 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -141,6 +141,7 @@ static int defer_packet_queue( */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); if (list_empty(&pq->busy.list)) { + pq->busy.lock = &sde->waitlock; iowait_get_priority(&pq->busy); iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); } @@ -155,6 +156,7 @@ static void activate_packet_queue(struct iowait *wait, int reason) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); + pq->busy.lock = NULL; xchg(&pq->state, SDMA_PKT_Q_ACTIVE); wake_up(&wait->wait_dma); }; @@ -179,7 +181,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +237,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -257,6 +258,21 @@ pq_reqs_nomem: return ret; } +static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq) +{ + unsigned long flags; + seqlock_t *lock = pq->busy.lock; + + if (!lock) + return; + write_seqlock_irqsave(lock, flags); + if (!list_empty(&pq->busy.list)) { + list_del_init(&pq->busy.list); + pq->busy.lock = NULL; + } + write_sequnlock_irqrestore(lock, flags); +} + int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt) { @@ -264,8 +280,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -276,8 +298,10 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->reqs); kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); + flush_pq_iowait(pq); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +345,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; @@ -564,10 +589,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); pq->state = SDMA_PKT_Q_ACTIVE; - /* Send the first N packets in the request to buy us some time */ - ret = user_sdma_send_pkts(req, pcount); - if (unlikely(ret < 0 && ret != -EBUSY)) - goto free_req; /* * This is a somewhat blocking send implementation. @@ -580,11 +601,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) goto free_req; - wait_event_interruptible_timeout( + if (wait_event_interruptible_timeout( pq->busy.wait_dma, - (pq->state == SDMA_PKT_Q_ACTIVE), + pq->state == SDMA_PKT_Q_ACTIVE, msecs_to_jiffies( - SDMA_IOWAIT_TIMEOUT)); + SDMA_IOWAIT_TIMEOUT)) <= 0) + flush_pq_iowait(pq); } } *count += idx; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 4d8510b0fc38..9972e0e6545e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -110,12 +110,6 @@ enum pkt_q_sdma_state { SDMA_PKT_Q_DEFERRED, }; -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ #define SDMA_DBG(req, fmt, ...) \ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 7f9c23450579..53dd30fd2d56 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -518,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -530,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. @@ -820,8 +819,8 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, + sde->dd->sdma_pad_phys, extra_bytes); bail_txadd: return ret; @@ -1089,7 +1088,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 563cf39df6d5..9cc2e72093be 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -381,7 +381,7 @@ struct hns_roce_mr_table { struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; - int wqe_cnt; /* WQE num */ + u32 wqe_cnt; /* WQE num */ u32 max_post; int max_gs; int offset; @@ -613,7 +613,6 @@ struct hns_roce_qp { u8 sdb_en; u32 doorbell_qpn; __le32 sq_signal_bits; - u32 sq_next_wqe; int sq_max_wqes_per_wr; int sq_spare_wqes; struct hns_roce_wq sq; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index d9d668992e49..3ca163c7f513 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -59,7 +59,7 @@ enum { #define HNS_ROCE_HEM_CHUNK_LEN \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ - (sizeof(struct scatterlist))) + (sizeof(struct scatterlist) + sizeof(void *))) #define check_whether_bt_num_3(type, hop_num) \ (type < HEM_TYPE_MTT && hop_num == 2) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 9496c69fff3a..902e74555654 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, unsigned long flags = 0; void *wqe = NULL; u32 doorbell[2]; + u32 wqe_idx = 0; int nreq = 0; - u32 ind = 0; int ret = 0; u8 *smac; int loopback; @@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; + for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { ret = -ENOMEM; @@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; /* Corresponding to the RC and RD type wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -213,7 +214,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, cpu_to_le32((wr->sg_list[1].addr) >> 32); ud_sq_wqe->l_key1 = cpu_to_le32(wr->sg_list[1].lkey); - ind++; } else if (ibqp->qp_type == IB_QPT_RC) { u32 tmp_len = 0; @@ -311,7 +311,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ctrl->flag |= cpu_to_le32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT); } - ind++; } } @@ -339,7 +338,6 @@ out: doorbell[1] = le32_to_cpu(sq_db.u32_8); hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -351,12 +349,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - int ret = 0; - int nreq = 0; - int ind = 0; - int i = 0; - u32 reg_val; - unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -364,9 +356,14 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_rq_db rq_db; uint32_t doorbell[2] = {0}; + unsigned long flags = 0; + unsigned int wqe_idx; + int ret = 0; + int nreq = 0; + int i = 0; + u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&hr_qp->rq, nreq, @@ -376,6 +373,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -384,7 +383,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } - ctrl = get_recv_wqe(hr_qp, ind); + ctrl = get_recv_wqe(hr_qp, wqe_idx); roce_set_field(ctrl->rwqe_byte_12, RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, @@ -396,9 +395,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, for (i = 0; i < wr->num_sge; i++) set_data_seg(scat + i, wr->sg_list + i); - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } kfree(context); @@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } out: kfree(context); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b5392cb5b20f..ff0e5a3df724 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -110,7 +110,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, } static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind) + unsigned int *sge_ind, int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; struct ib_sge *sg; @@ -123,7 +123,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = wr->num_sge - num_in_wqe; + extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; shift = qp->hr_buf.page_shift; @@ -159,14 +159,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, + int valid_num_sge, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); + int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (le32_to_cpu(rc_sq_wqe->msg_len) > hr_dev->caps.max_sq_inline) { *bad_wr = wr; @@ -190,7 +192,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -203,19 +205,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && + j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; + j++; } } - set_extend_sge(qp, wr, sge_ind); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge); + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } return 0; @@ -239,10 +243,11 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; struct ib_qp_attr attr; - unsigned int sge_ind = 0; unsigned int owner_bit; + unsigned int sge_idx; + unsigned int wqe_idx; unsigned long flags; - unsigned int ind; + int valid_num_sge; void *wqe = NULL; bool loopback; int attr_mask; @@ -269,8 +274,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, } spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; - sge_ind = qp->next_sge; + sge_idx = qp->next_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { @@ -279,6 +283,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -287,14 +293,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; - + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + valid_num_sge = 0; tmp_len = 0; + for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + tmp_len += wr->sg_list[i].length; + valid_num_sge++; + } + } + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { ud_sq_wqe = wqe; @@ -330,9 +342,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, V2_UD_SEND_WQE_BYTE_4_OPCODE_S, HNS_ROCE_V2_WQE_OP_SEND); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; - ud_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len); @@ -368,12 +377,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); roce_set_field(ud_sq_wqe->byte_20, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - sge_ind & (qp->sge.sge_cnt - 1)); + sge_idx & (qp->sge.sge_cnt - 1)); roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, @@ -426,13 +435,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); - set_extend_sge(qp, wr, &sge_ind); - ind++; + set_extend_sge(qp, wr, &sge_idx, valid_num_sge); } else if (ibqp->qp_type == IB_QPT_RC) { rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; rc_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); @@ -553,15 +559,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, - wqe, &sge_ind, bad_wr); + wqe, &sge_idx, + valid_num_sge, bad_wr); if (ret) goto out; } - - ind++; } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -591,8 +596,7 @@ out: hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; - qp->next_sge = sge_ind; + qp->next_sge = sge_idx; if (qp->state == IB_QPS_ERR) { attr_mask = IB_QP_STATE; @@ -626,13 +630,12 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; int attr_mask; + u32 wqe_idx; int ret = 0; int nreq; - int ind; int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); if (hr_qp->state == IB_QPS_RESET) { spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -648,6 +651,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -656,7 +661,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe = get_recv_wqe(hr_qp, ind); + wqe = get_recv_wqe(hr_qp, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; for (i = 0; i < wr->num_sge; i++) { if (!wr->sg_list[i].length) @@ -672,8 +677,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, /* rq support inline data */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; for (i = 0; i < wr->num_sge; i++) { sge_list[i].addr = @@ -682,9 +687,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } } - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -1218,34 +1221,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_pf_timer_res_a *req_a; - struct hns_roce_cmq_desc desc[2]; - int ret, i; + struct hns_roce_cmq_desc desc; + int ret; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_TIMER_RES, - true); + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES, + true); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } - - ret = hns_roce_cmq_send(hr_dev, desc, 2); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; - req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data; + req_a = (struct hns_roce_pf_timer_res_a *)desc.data; hr_dev->caps.qpc_timer_bt_num = - roce_get_field(req_a->qpc_timer_bt_idx_num, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, - PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); + roce_get_field(req_a->qpc_timer_bt_idx_num, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M, + PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S); hr_dev->caps.cqc_timer_bt_num = - roce_get_field(req_a->cqc_timer_bt_idx_num, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, - PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); + roce_get_field(req_a->cqc_timer_bt_idx_num, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M, + PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S); return 0; } @@ -4260,7 +4255,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; hr_qp->next_sge = 0; if (hr_qp->rq.wqe_cnt) *hr_qp->rdb.db_record = 0; @@ -4362,7 +4356,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->path_mig_state = IB_MIG_ARMED; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; if (hr_qp->ibqp.qp_type == IB_QPT_UD) - qp_attr->qkey = V2_QKEY_VAL; + qp_attr->qkey = le32_to_cpu(context->qkey_xrcd); qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_EPSN_M, @@ -5191,9 +5185,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, return; } - if (eq->buf_list) - dma_free_coherent(hr_dev->dev, buf_chk_sz, - eq->buf_list->buf, eq->buf_list->map); + dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, + eq->buf_list->map); + kfree(eq->buf_list); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, @@ -5860,11 +5854,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); srq_context->idx_nxt_blk_addr = diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index edfdbe2ce0db..4211a982a8e0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6110ec408626..c68527f77418 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1018,8 +1018,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { if (page_addr & ((1 << mtt->page_shift) - 1)) { dev_err(dev, - "page_addr 0x%llx is not page_shift %d alignment!\n", - page_addr, mtt->page_shift); + "page_addr is not page_shift %d alignment!\n", + mtt->page_shift); ret = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index b3421b1f21e0..1dfd3b649d3c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -231,7 +231,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); srq->max_gs = srq_init_attr->attr.max_sge; - srq_desc_size = max(16, 16 * srq->max_gs); + srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); srq->wqe_shift = ilog2(srq_desc_size); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 8233f5a4e623..47d7be972d34 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1984,7 +1984,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, struct rtable *rt; struct neighbour *neigh; int rc = arpindex; - struct net_device *netdev = iwdev->netdev; __be32 dst_ipaddr = htonl(dst_ip); __be32 src_ipaddr = htonl(src_ip); @@ -1994,9 +1993,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } - if (netif_is_bond_slave(netdev)) - netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); @@ -2062,7 +2058,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, { struct neighbour *neigh; int rc = arpindex; - struct net_device *netdev = iwdev->netdev; struct dst_entry *dst; struct sockaddr_in6 dst_addr; struct sockaddr_in6 src_addr; @@ -2083,9 +2078,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, return rc; } - if (netif_is_bond_slave(netdev)) - netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 55a1fbf0e670..ae8b97c30665 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -534,7 +534,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, int arp_index; arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action); - if (arp_index == -1) + if (arp_index < 0) return; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); if (!cqp_request) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 10932baee279..7749d680017a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1223,6 +1223,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev) (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) || (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) { idev = in_dev_get(dev); + if (!idev) + continue; for_ifa(idev) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address, diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index ecd6cadd529a..b591861934b3 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -186,23 +186,6 @@ out: kfree(ent); } -static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) -{ - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - struct id_map_entry *ent, *found_ent; - - spin_lock(&sriov->id_map_lock); - ent = xa_erase(&sriov->pv_id_table, pv_cm_id); - if (!ent) - goto out; - found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); - if (found_ent && found_ent == ent) - rb_erase(&found_ent->node, sl_id_map); -out: - spin_unlock(&sriov->id_map_lock); -} - static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) { struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; @@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) spin_lock(&sriov->id_map_lock); spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ - if (!sriov->is_going_down) { + if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } @@ -341,9 +324,6 @@ cont: if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) - id_map_find_del(ibdev, pv_cm_id); - return 0; } @@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); - if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) + if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || - mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { - id_map_find_del(ibdev, (int) pv_cm_id); - } return 0; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 25d09d53b51c..4aa439a7ce61 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } +static void free_gid_entry(struct gid_entry *entry) +{ + memset(&entry->gid, 0, sizeof(entry->gid)); + kfree(entry->ctx); + entry->ctx = NULL; +} + static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); @@ -306,6 +313,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) GFP_ATOMIC); if (!gids) { ret = -ENOMEM; + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); } else { for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) { memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); @@ -317,6 +326,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!ret && hw_update) { ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); + if (ret) { + spin_lock_bh(&iboe->lock); + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); + spin_unlock_bh(&iboe->lock); + } kfree(gids); } @@ -346,10 +361,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx->refcount) { unsigned int real_index = ctx->real_index; - memset(&port_gid_table->gids[real_index].gid, 0, - sizeof(port_gid_table->gids[real_index].gid)); - kfree(port_gid_table->gids[real_index].ctx); - port_gid_table->gids[real_index].ctx = NULL; + free_gid_entry(&port_gid_table->gids[real_index]); hw_update = 1; } } @@ -768,7 +780,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; - props->pkey_tbl_len = 1; + if (mdev->dev->caps.pkey_table_len[port]) + props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; @@ -1478,8 +1491,9 @@ static int __mlx4_ib_create_default_rules( int i; for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { + union ib_flow_spec ib_spec = {}; int ret; - union ib_flow_spec ib_spec; + switch (pdefault_rules->rules_create_list[i]) { case 0: /* no rule */ @@ -3004,16 +3018,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->ib_active = false; flush_workqueue(wq); - mlx4_ib_close_sriov(ibdev); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); - mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + mlx4_ib_close_sriov(ibdev); + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 355205a28544..6fd661076ade 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -440,7 +440,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5221c0794d1d..89c2038f50da 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2817,6 +2817,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, int send_size; int header_size; int spc; + int err; int i; if (wr->wr.opcode != IB_WR_SEND) @@ -2851,7 +2852,9 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + if (err) + return err; sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); @@ -3138,9 +3141,14 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) - ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, + &pkey); else - ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, + &pkey); + if (err) + return err; + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2e2e65f00257..347a563736e3 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -330,6 +330,22 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + u16 idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -368,7 +384,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, } static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, - int *npolled, int is_send) + int *npolled, bool is_send) { struct mlx5_ib_wq *wq; unsigned int cur; @@ -383,10 +399,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, return; for (i = 0; i < cur && np < num_entries; i++) { - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + unsigned int idx; + + idx = (is_send) ? wq->last_poll : wq->tail; + idx &= (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; wc->status = IB_WC_WR_FLUSH_ERR; wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; wq->tail++; + if (is_send) + wq->last_poll = wq->w_list[idx].next; np++; wc->qp = &qp->ibqp; wc++; @@ -476,6 +498,7 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 80b42d069328..4c1f4fe5eb02 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -306,6 +306,10 @@ static u64 devx_get_obj_id(const void *in) obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(rst2init_qp_in, in, qpn)); break; + case MLX5_CMD_OP_INIT2INIT_QP: + obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + MLX5_GET(init2init_qp_in, in, qpn)); + break; case MLX5_CMD_OP_INIT2RTR_QP: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, MLX5_GET(init2rtr_qp_in, in, qpn)); @@ -621,6 +625,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_RST2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..5c73c0a790fa 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ff6a2b1fa8b6..660310d54ff6 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -516,7 +516,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, mdev_port_num); if (err) goto out; - ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet); + ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); props->active_width = IB_WIDTH_4X; @@ -828,6 +828,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + size_t uhw_outlen = (uhw) ? uhw->outlen : 0; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; @@ -841,12 +842,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, u64 max_tso; resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); - if (uhw->outlen && uhw->outlen < resp_len) + if (uhw_outlen && uhw_outlen < resp_len) return -EINVAL; else resp.response_length = resp_len; - if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) + if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -910,7 +911,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -920,7 +921,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -940,9 +941,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) resp.response_length += sizeof(resp.rss_caps); } @@ -1063,7 +1064,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1081,7 +1082,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && + if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1100,7 +1101,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw->outlen)) { + uhw_outlen)) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1113,7 +1114,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw->outlen)) { + if (field_avail(typeof(resp), flags, uhw_outlen)) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1129,8 +1130,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1150,7 +1150,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) && + if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1167,8 +1167,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1179,17 +1178,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_GRE) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_UDP) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } - if (uhw->outlen) { + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) @@ -3502,10 +3499,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); - if (dst) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, @@ -3519,6 +3512,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } + if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); @@ -3563,10 +3561,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + if (!dest_num) rule_dst = NULL; - dest_num = 0; - } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; @@ -4678,7 +4674,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) @@ -4688,7 +4683,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f52b845f2f7b..3b11ed0d95ad 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -282,6 +282,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; + u16 last_poll; void *cur_edge; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d239fc58c002..eaab3e5b785b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -454,7 +454,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; @@ -1425,6 +1425,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1469,8 +1472,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1499,7 +1500,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1593,13 +1593,14 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) */ mr->live = 0; + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); + /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (umem_odp->page_list) mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index fda3dfd6f87b..e0ecb540c67e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1205,7 +1205,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, { bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; u16 wqe_index = pfault->wqe.wqe_index; - void *wqe = NULL, *wqe_end = NULL; + void *wqe, *wqe_start = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; struct mlx5_core_rsc_common *res; int resume_with_error = 1; @@ -1226,12 +1226,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - wqe = (void *)__get_free_page(GFP_KERNEL); - if (!wqe) { + wqe_start = (void *)__get_free_page(GFP_KERNEL); + if (!wqe_start) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } + wqe = wqe_start; qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; if (qp && sq) { ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, @@ -1286,7 +1287,7 @@ resolve_page_fault: pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)wqe); + free_page((unsigned long)wqe_start); } static int pages_in_range(u64 address, u32 length) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6dbca72a73b1..b19e580939ac 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1468,6 +1468,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u16 uid = to_mpd(pd)->uid; u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt) + return -EINVAL; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); if (err) @@ -3254,10 +3256,12 @@ static int modify_raw_packet_qp_sq( } /* Only remove the old rate after new rate was set */ - if ((old_rl.rate && - !mlx5_rl_are_equal(&old_rl, &new_rl)) || - (new_state != MLX5_SQC_STATE_RDY)) + if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) || + (new_state != MLX5_SQC_STATE_RDY)) { mlx5_rl_remove_rate(dev, &old_rl); + if (new_state != MLX5_SQC_STATE_RDY) + memset(&new_rl, 0, sizeof(new_rl)); + } ibqp->rl = new_rl; sq->state = new_state; @@ -3702,6 +3706,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.cur_post = 0; if (qp->sq.wqe_cnt) qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + qp->sq.last_poll = 0; qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -5372,7 +5377,9 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f); rdma_ah_set_static_rate(ah_attr, path->static_rate ? path->static_rate - 5 : 0); - if (path->grh_mlid & (1 << 7)) { + + if (path->grh_mlid & (1 << 7) || + ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { u32 tc_fl = be32_to_cpu(path->tclass_flowlabel); rdma_ah_set_grh(ah_attr, NULL, @@ -6008,6 +6015,10 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO) && + init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) + return ERR_PTR(-EPERM); + dev = to_mdev(pd->device); switch (init_attr->wq_type) { case IB_WQT_RQ: diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..c29c1f7da4a1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (udata) - if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + if (udata) { + struct mlx5_ib_create_srq_resp resp = { + .srqn = srq->msrq.srqn, + }; + + if (ib_copy_to_udata(udata, &resp, min(udata->outlen, + sizeof(resp)))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; goto err_core; } + } init_attr->attr.max_wr = srq->msrq.max - 1; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index b0d0687c7a68..2e8fa8767fd7 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -83,11 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *srq; - xa_lock(&table->array); + xa_lock_irq(&table->array); srq = xa_load(&table->array, srqn); if (srq) atomic_inc(&srq->common.refcount); - xa_unlock(&table->array); + xa_unlock_irq(&table->array); return srq; } diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index dfdd1e16de7f..a37fb659c54e 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } @@ -354,9 +354,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 6175d1e98717..bbf72944b720 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include <linux/qed/qed_rdma_if.h> #include <linux/qed/qede_rdma.h> #include <linux/qed/roce_common.h> +#include <linux/completion.h> #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -346,10 +347,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; @@ -375,10 +376,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -393,6 +404,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -435,8 +447,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -529,7 +544,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..180f26794f28 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -128,8 +150,17 @@ qedr_iw_issue_event(void *context, if (params->cm_info) { event.ird = params->cm_info->ird; event.ord = params->cm_info->ord; - event.private_data_len = params->cm_info->private_data_len; - event.private_data = (void *)params->cm_info->private_data; + /* Only connect_request and reply have valid private data + * the rest of the events this may be left overs from + * connection establishment. CONNECT_REQUEST is issued via + * qedr_iw_mpa_request + */ + if (event_type == IW_CM_EVENT_CONNECT_REPLY) { + event.private_data_len = + params->cm_info->private_data_len; + event.private_data = + (void *)params->cm_info->private_data; + } } if (ep->cm_id) @@ -141,12 +172,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +215,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +235,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +244,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +257,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +289,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +339,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -476,6 +515,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +543,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +564,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +635,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +736,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +758,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +795,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3d7bde19838e..9d3204410ead 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -1226,7 +1227,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1612,6 +1616,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) if (qp->urq.umem) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1627,6 +1639,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1840,6 +1853,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -1953,7 +1967,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2472,7 +2486,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) + if (qp->create_type == QEDR_QP_CREATE_USER) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2503,34 +2517,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return rc; } @@ -2709,8 +2733,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ if (mr->umem) @@ -3473,7 +3497,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3488,7 +3512,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3501,7 +3524,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3535,22 +3559,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -3969,7 +3991,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 905206a0c2d5..d7d74dda3aee 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } @@ -752,7 +758,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping linkcontrol sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + goto bail_link; } kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); @@ -762,7 +768,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping sl2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_link; + goto bail_sl; } kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); @@ -772,7 +778,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping diag_counters sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl; + goto bail_diagc; } kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); @@ -785,7 +791,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, qib_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_diagc; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -846,6 +852,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) &cc_table_bin_attr); kobject_put(&ppd->pport_cc_kobj); } + kobject_put(&ppd->diagc_kobj); kobject_put(&ppd->sl2vl_kobj); kobject_put(&ppd->pport_kobj); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 2c4e569ce438..12c8ec59e657 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -329,8 +329,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 40182297f87f..6f38dc7c6a07 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -827,7 +827,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); ret = -ENOMEM; - goto err_free_device; + goto err_disable_pdev; } ret = pci_request_regions(pdev, DRV_NAME); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index cb9e171d7e7b..78809766fc5f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -59,6 +59,8 @@ #include "trace.h" static void rvt_rc_timeout(struct timer_list *t); +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); /* * Convert the AETH RNR timeout code into the number of microseconds. @@ -450,40 +452,41 @@ no_qp_table: } /** - * free_all_qps - check for QPs still in use + * rvt_free_qp_cb - callback function to reset a qp + * @qp: the qp to reset + * @v: a 64-bit value + * + * This function resets the qp and removes it from the + * qp hash table. + */ +static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v) +{ + unsigned int *qp_inuse = (unsigned int *)v; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* Reset the qp and remove it from the qp hash list */ + rvt_reset_qp(rdi, qp, qp->ibqp.qp_type); + + /* Increment the qp_inuse count */ + (*qp_inuse)++; +} + +/** + * rvt_free_all_qps - check for QPs still in use * @rdi: rvt device info structure * * There should not be any QPs still in use. * Free memory for table. + * Return the number of QPs still in use. */ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { - unsigned long flags; - struct rvt_qp *qp; - unsigned n, qp_inuse = 0; - spinlock_t *ql; /* work around too long line below */ - - if (rdi->driver_f.free_all_qps) - qp_inuse = rdi->driver_f.free_all_qps(rdi); + unsigned int qp_inuse = 0; qp_inuse += rvt_mcast_tree_empty(rdi); - if (!rdi->qp_dev) - return qp_inuse; + rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb); - ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(ql, flags); - for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { - qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], - lockdep_is_held(ql)); - RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); - - for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql))) - qp_inuse++; - } - spin_unlock_irqrestore(ql, flags); - synchronize_rcu(); return qp_inuse; } @@ -861,14 +864,14 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** - * rvt_reset_qp - initialize the QP state to the reset state + * _rvt_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * * r_lock, s_hlock, and s_lock are required to be held by the caller */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) __must_hold(&qp->s_lock) __must_hold(&qp->s_hlock) __must_hold(&qp->r_lock) @@ -914,6 +917,27 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, lockdep_assert_held(&qp->s_lock); } +/** + * rvt_reset_qp - initialize the QP state to the reset state + * @rdi: the device info + * @qp: the QP to reset + * @type: the QP type + * + * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock + * before calling _rvt_reset_qp(). + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + _rvt_reset_qp(rdi, qp, type); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); +} + /** rvt_free_qpn - Free a qpn from the bit map * @qpt: QP table * @qpn: queue pair number to free @@ -1465,7 +1489,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) - rvt_reset_qp(rdi, qp, ibqp->qp_type); + _rvt_reset_qp(rdi, qp, ibqp->qp_type); break; case IB_QPS_RTR: @@ -1614,13 +1638,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); wait_event(qp->wait, !atomic_read(&qp->refcount)); /* qpn is now available for use again */ diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 00eb99d3df86..7c5999ada61d 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -329,7 +329,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } return COMPST_ERROR_RETRY; @@ -463,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -479,7 +479,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -725,7 +725,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } if (pkt) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 5a3474f9351b..312c2fc961c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -117,10 +117,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, memcpy(&fl6.daddr, daddr, sizeof(*daddr)); fl6.flowi6_proto = IPPROTO_UDP; - if (unlikely(ipv6_stub->ipv6_dst_lookup(sock_net(recv_sockets.sk6->sk), - recv_sockets.sk6->sk, &ndst, &fl6))) { + ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), + recv_sockets.sk6->sk, &fl6, + NULL); + if (unlikely(IS_ERR(ndst))) { pr_err_ratelimited("no route to %pI6\n", daddr); - goto put; + return NULL; } if (unlikely(ndst->error)) { diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f9a492ed900b..46e111c218fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,10 +330,14 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -366,7 +370,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + if (rxe_match_dgid(rxe, skb) < 0) { pr_warn_ratelimited("failed matching dgid\n"); goto drop; } @@ -389,7 +393,7 @@ void rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index c5d9b558fa90..e5031172c019 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1cbfbd98eb22..c4a8195bf670 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 8c3e2a18cfe4..75a41f99a23a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -679,6 +679,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -695,6 +696,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -705,7 +708,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 28bfb3ece104..3536ab43eed3 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -407,7 +407,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 2aa3457a30ce..50a355738609 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -377,8 +377,12 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; + /* cyclic ring variables for managing tx_ring, for UD only */ unsigned int tx_head; unsigned int tx_tail; + /* cyclic ring variables for counting overall outstanding send WRs */ + unsigned int global_tx_head; + unsigned int global_tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -511,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index aa9dcfc36cd3..196f1e6b5396 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ return; } - if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) { + if ((priv->global_tx_head - priv->global_tx_tail) == + ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); netif_stop_queue(dev); @@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ } else { netif_trans_update(dev); ++tx->tx_head; - ++priv->tx_head; + ++priv->global_tx_head; } } @@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) netif_tx_lock(dev); ++tx->tx_tail; - ++priv->tx_tail; + ++priv->global_tx_tail; if (unlikely(netif_queue_stopped(dev) && - (priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 && + ((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); @@ -1233,8 +1235,9 @@ timeout: dev_kfree_skb_any(tx_req->skb); netif_tx_lock_bh(p->dev); ++p->tx_tail; - ++priv->tx_tail; - if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && + ++priv->global_tx_tail; + if (unlikely((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 78fa777c87b1..7b598b66f235 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -406,9 +406,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; + ++priv->global_tx_tail; if (unlikely(netif_queue_stopped(dev) && - ((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) && + ((priv->global_tx_head - priv->global_tx_tail) <= + ipoib_sendq_size >> 1) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) netif_wake_queue(dev); @@ -633,7 +635,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, else priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; /* increase the tx_head after send success, but use it for queue state */ - if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { + if ((priv->global_tx_head - priv->global_tx_tail) == + ipoib_sendq_size - 1) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); netif_stop_queue(dev); } @@ -661,17 +664,17 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, rc = priv->tx_head; ++priv->tx_head; + ++priv->global_tx_head; } return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -682,37 +685,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -806,6 +809,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ipoib_dma_unmap_tx(priv, tx_req); dev_kfree_skb_any(tx_req->skb); ++priv->tx_tail; + ++priv->global_tx_tail; } for (i = 0; i < ipoib_recvq_size; ++i) { @@ -840,18 +844,6 @@ timeout: return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); - - return 0; -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -895,10 +887,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -909,13 +898,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1226,7 +1222,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1301,7 +1297,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac0583ff280d..044bcacad6e4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1188,9 +1188,11 @@ static void ipoib_timeout(struct net_device *dev) ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev_trans_start(dev))); - ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", - netif_queue_stopped(dev), - priv->tx_head, priv->tx_tail); + ipoib_warn(priv, + "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n", + netif_queue_stopped(dev), priv->tx_head, priv->tx_tail, + priv->global_tx_head, priv->global_tx_tail); + /* XXX reset QP, etc. */ } @@ -1705,7 +1707,7 @@ static int ipoib_dev_init_default(struct net_device *dev) goto out_rx_ring_cleanup; } - /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ + /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */ if (ipoib_transport_dev_init(dev, priv->ca)) { pr_warn("%s: ipoib_transport_dev_init failed\n", @@ -1977,6 +1979,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9c185a8dabd3..695df55d1b2f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -646,6 +646,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0b09d0cd9b3c..19b6cebfed19 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2553,7 +2553,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (lrsp->opcode == SRP_LOGIN_RSP) { ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); - ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; + ch->use_imm_data = srp_use_imm_data && + (lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP); ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, ch->use_imm_data); WARN_ON_ONCE(ch->max_it_iu_len > diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1a039f16d315..7b9ff07d9118 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1364,9 +1364,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status) { + struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; /* * The lowest bit of all SAM-3 status codes is zero (see also @@ -1388,6 +1390,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->tag = tag; srp_rsp->status = status; + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + if (sense_data_len) { BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); |