diff options
Diffstat (limited to 'net')
314 files changed, 3499 insertions, 14422 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index 32f87d458f05..ce6e4774d333 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -609,7 +609,10 @@ static void mrp_join_timer(struct timer_list *t) spin_unlock(&app->lock); mrp_queue_xmit(app); - mrp_join_timer_arm(app); + spin_lock(&app->lock); + if (likely(app->active)) + mrp_join_timer_arm(app); + spin_unlock(&app->lock); } static void mrp_periodic_timer_arm(struct mrp_applicant *app) @@ -623,11 +626,12 @@ static void mrp_periodic_timer(struct timer_list *t) struct mrp_applicant *app = from_timer(app, t, periodic_timer); spin_lock(&app->lock); - mrp_mad_event(app, MRP_EVENT_PERIODIC); - mrp_pdu_queue(app); + if (likely(app->active)) { + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + mrp_periodic_timer_arm(app); + } spin_unlock(&app->lock); - - mrp_periodic_timer_arm(app); } static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) @@ -875,6 +879,7 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) app->dev = dev; app->app = appl; app->mad = RB_ROOT; + app->active = true; spin_lock_init(&app->lock); skb_queue_head_init(&app->queue); rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); @@ -903,6 +908,9 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) RCU_INIT_POINTER(port->applicants[appl->type], NULL); + spin_lock_bh(&app->lock); + app->active = false; + spin_unlock_bh(&app->lock); /* Delete timer and generate a final TX event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 4f60e86f4b8d..e92c914316cb 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -380,6 +380,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -390,6 +392,8 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -409,8 +413,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 84ef83772114..ba9b8980f100 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -115,8 +115,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... */ - if (veth->h_vlan_proto != vlan->vlan_proto || - vlan->flags & VLAN_FLAG_REORDER_HDR) { + if (vlan->flags & VLAN_FLAG_REORDER_HDR || + veth->h_vlan_proto != vlan->vlan_proto) { u16 vlan_tci; vlan_tci = vlan->vlan_id; vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority); @@ -369,7 +369,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCSHWTSTAMP: - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), dev_net(real_dev))) break; case SIOCGMIIPHY: case SIOCGMIIREG: diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 462ba144cb39..9104a2fce015 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -243,6 +243,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); + *wnames = NULL; + errcode = p9pdu_readf(pdu, proto_version, "w", nwname); if (!errcode) { @@ -252,6 +254,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, GFP_NOFS); if (!*wnames) errcode = -ENOMEM; + else + (*wnames)[0] = NULL; } if (!errcode) { @@ -263,8 +267,10 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, proto_version, "s", &(*wnames)[i]); - if (errcode) + if (errcode) { + (*wnames)[i] = NULL; break; + } } } @@ -272,11 +278,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (*wnames) { int i; - for (i = 0; i < *nwname; i++) + for (i = 0; i < *nwname; i++) { + if (!(*wnames)[i]) + break; kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; } - kfree(*wnames); - *wnames = NULL; } } break; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index e03ff8df1d3f..0ef3d2ede6e6 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -133,7 +133,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -306,7 +306,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -329,7 +329,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 119103bfa82e..4bbb8683d451 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -400,6 +400,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) struct p9_trans_rdma *rdma = client->trans; struct ib_recv_wr wr; struct ib_sge sge; + int ret; c->busa = ib_dma_map_single(rdma->cm_id->device, c->rc.sdata, client->msize, @@ -417,7 +418,12 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) wr.wr_cqe = &c->cqe; wr.sg_list = &sge; wr.num_sge = 1; - return ib_post_recv(rdma->qp, &wr, NULL); + + ret = ib_post_recv(rdma->qp, &wr, NULL); + if (ret) + ib_dma_unmap_single(rdma->cm_id->device, c->busa, + client->msize, DMA_FROM_DEVICE); + return ret; error: p9_debug(P9_DEBUG_ERROR, "EIO\n"); @@ -514,7 +520,7 @@ dont_need_post_recv: if (down_interruptible(&rdma->sq_sem)) { err = -EINTR; - goto send_error; + goto dma_unmap; } /* Mark request as `sent' *before* we actually send it, @@ -524,11 +530,14 @@ dont_need_post_recv: req->status = REQ_STATUS_SENT; err = ib_post_send(rdma->qp, &wr, NULL); if (err) - goto send_error; + goto dma_unmap; /* Success */ return 0; +dma_unmap: + ib_dma_unmap_single(rdma->cm_id->device, c->busa, + c->req->tc.size, DMA_TO_DEVICE); /* Handle errors that happened during or while preparing the send: */ send_error: req->status = REQ_STATUS_ERROR; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index f7cd8e018bde..6b3357a77d99 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -409,7 +409,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, struct page **in_pages = NULL, **out_pages = NULL; struct virtio_chan *chan = client->trans; struct scatterlist *sgs[4]; - size_t offs; + size_t offs = 0; int need_drop = 0; int kicked = 0; diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index ac60ddfcd88b..c87146a49636 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -230,6 +230,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -239,6 +247,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; @@ -290,6 +299,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) write_unlock(&xen_9pfs_lock); for (i = 0; i < priv->num_rings; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + if (!priv->rings[i].intf) break; if (priv->rings[i].irq > 0) @@ -380,19 +393,24 @@ out: return ret; } -static int xen_9pfs_front_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) +static int xen_9pfs_front_init(struct xenbus_device *dev) { int ret, i; struct xenbus_transaction xbt; - struct xen_9pfs_front_priv *priv = NULL; - char *versions; + struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + char *versions, *v; unsigned int max_rings, max_ring_order, len = 0; versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); if (IS_ERR(versions)) return PTR_ERR(versions); - if (strcmp(versions, "1")) { + for (v = versions; *v; v++) { + if (simple_strtoul(v, &v, 10) == 1) { + v = NULL; + break; + } + } + if (v) { kfree(versions); return -EINVAL; } @@ -405,11 +423,6 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, if (max_ring_order < XEN_9PFS_RING_ORDER) return -EINVAL; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->dev = dev; priv->num_rings = XEN_9PFS_NUM_RINGS; priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings), GFP_KERNEL); @@ -467,23 +480,35 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, goto error; } - write_lock(&xen_9pfs_lock); - list_add_tail(&priv->list, &xen_9pfs_devs); - write_unlock(&xen_9pfs_lock); - dev_set_drvdata(&dev->dev, priv); - xenbus_switch_state(dev, XenbusStateInitialised); - return 0; error_xenbus: xenbus_transaction_end(xbt, 1); xenbus_dev_fatal(dev, ret, "writing xenstore"); error: - dev_set_drvdata(&dev->dev, NULL); xen_9pfs_front_free(priv); return ret; } +static int xen_9pfs_front_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct xen_9pfs_front_priv *priv = NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + dev_set_drvdata(&dev->dev, priv); + + write_lock(&xen_9pfs_lock); + list_add_tail(&priv->list, &xen_9pfs_devs); + write_unlock(&xen_9pfs_lock); + + return 0; +} + static int xen_9pfs_front_resume(struct xenbus_device *dev) { dev_warn(&dev->dev, "suspend/resume unsupported\n"); @@ -502,6 +527,8 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev, break; case XenbusStateInitWait: + if (!xen_9pfs_front_init(dev)) + xenbus_switch_state(dev, XenbusStateInitialised); break; case XenbusStateConnected: diff --git a/net/Kconfig b/net/Kconfig index 228dfa382eec..4bef62b4c806 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -197,7 +197,6 @@ config BRIDGE_NETFILTER source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" -source "net/decnet/netfilter/Kconfig" source "net/bridge/netfilter/Kconfig" endif @@ -214,7 +213,6 @@ source "net/802/Kconfig" source "net/bridge/Kconfig" source "net/dsa/Kconfig" source "net/8021q/Kconfig" -source "net/decnet/Kconfig" source "net/llc/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" diff --git a/net/Makefile b/net/Makefile index 449fc0b221f8..177b6fbac29c 100644 --- a/net/Makefile +++ b/net/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_AF_KCM) += kcm/ obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ -obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 20ec8e7f9423..c4f1bfe6e040 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1808,15 +1808,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct sk_buff *skb; long amount = 0; + spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len - sizeof(struct ddpehdr); + spin_unlock_irq(&sk->sk_receive_queue.lock); rc = put_user(amount, (int __user *)argp); break; } diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 2ff0e5e470e3..38f7f164e484 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -71,14 +71,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, case SIOCINQ: { struct sk_buff *skb; + int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } + spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - error = put_user(skb ? skb->len : 0, - (int __user *)argp) ? -EFAULT : 0; + amount = skb ? skb->len : 0; + spin_unlock_irq(&sk->sk_receive_queue.lock); + error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; goto done; } case SIOCGSTAMP: /* borrowed from IP */ diff --git a/net/atm/resources.c b/net/atm/resources.c index bada395ecdb1..9389080224f8 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -447,6 +447,7 @@ done: return error; } +#ifdef CONFIG_PROC_FS void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos) { mutex_lock(&atm_dev_mutex); @@ -462,3 +463,4 @@ void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &atm_devs, pos); } +#endif diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index af3da6cdfc79..17100d9ceaf0 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -513,7 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr; bool res; int ret = NET_RX_DROP; @@ -521,6 +521,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, if (!res) goto free_skb; + ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto free_skb; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 04a620fd1301..5d4232d8d651 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -119,8 +119,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - if (hard_iface->if_status != BATADV_IF_ACTIVE) + if (hard_iface->if_status != BATADV_IF_ACTIVE) { + kfree_skb(skb); return; + } batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, @@ -832,7 +834,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_ogm2_packet *ogm_packet; - struct ethhdr *ethhdr = eth_hdr(skb); + struct ethhdr *ethhdr; int ogm_offset; u8 *packet_pos; int ret = NET_RX_DROP; @@ -846,6 +848,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) goto free_skb; + ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) goto free_skb; diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index f2dc7499d266..af380dc877e3 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -68,7 +68,6 @@ static void batadv_dat_purge(struct work_struct *work); */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work, msecs_to_jiffies(10000)); } @@ -783,6 +782,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) if (!bat_priv->dat.hash) return -ENOMEM; + INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); batadv_dat_start_timer(bat_priv); batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 6d68cdb9dd77..0d5519fcb438 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -643,7 +643,19 @@ out: */ void batadv_update_min_mtu(struct net_device *soft_iface) { - soft_iface->mtu = batadv_hardif_min_mtu(soft_iface); + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + int limit_mtu; + int mtu; + + mtu = batadv_hardif_min_mtu(soft_iface); + + if (bat_priv->mtu_set_by_user) + limit_mtu = bat_priv->mtu_set_by_user; + else + limit_mtu = ETH_DATA_LEN; + + mtu = min(mtu, limit_mtu); + dev_set_mtu(soft_iface, mtu); /* Check if the local translate table should be cleaned up to match a * new (and smaller) MTU. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 1003abb8cc35..7447dbd305fc 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -167,11 +167,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { + struct batadv_priv *bat_priv = netdev_priv(dev); + /* check ranges */ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; dev->mtu = new_mtu; + bat_priv->mtu_set_by_user = new_mtu; return 0; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6bdb70c93e3f..c64d58c1b724 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -793,7 +793,6 @@ check_roaming: if (roamed_back) { batadv_tt_global_free(bat_priv, tt_global, "Roaming canceled"); - tt_global = NULL; } else { /* The global entry has to be marked as ROAMING and * has to be kept for consistency purpose diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 37598ae1d3f7..34c18f72a41b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1515,6 +1515,12 @@ struct batadv_priv { struct net_device *soft_iface; /** + * @mtu_set_by_user: MTU was set once by user + * protected by rtnl_lock + */ + int mtu_set_by_user; + + /** * @bat_counters: mesh internal traffic statistic counters (see * batadv_counters) */ diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9a75f9b00b51..4530ffb2481a 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1014,6 +1014,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); + hci_dev_put(hdev); if (!hcon) return -ENOENT; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ee60c30f3be2..798f8f485e5a 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -743,7 +743,7 @@ static int __init bt_init(void) err = bt_sysfs_init(); if (err < 0) - return err; + goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) @@ -779,6 +779,8 @@ unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); +cleanup_led: + bt_leds_cleanup(); return err; } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1b50e4ef2c68..b8730c5f1cac 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -125,13 +125,11 @@ static void hci_conn_cleanup(struct hci_conn *conn) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); - hci_conn_del_sysfs(conn); - debugfs_remove_recursive(conn->debugfs); - hci_dev_put(hdev); + hci_conn_del_sysfs(conn); - hci_conn_put(conn); + hci_dev_put(hdev); } static void le_scan_cleanup(struct work_struct *work) @@ -1204,6 +1202,15 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Reject outgoing connection to device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, dst)) { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + dst); + return ERR_PTR(-ECONNREFUSED); + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); @@ -1331,12 +1338,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - /* If we're already encrypted set the REAUTH_PEND flag, - * otherwise set the ENCRYPT_PEND. + /* Set the ENCRYPT_PEND to trigger encryption after + * authentication. */ - if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) - set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); - else + if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } @@ -1379,34 +1384,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, if (!test_bit(HCI_CONN_AUTH, &conn->flags)) goto auth; - /* An authenticated FIPS approved combination key has sufficient - * security for security level 4. */ - if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 && - sec_level == BT_SECURITY_FIPS) - goto encrypt; - - /* An authenticated combination key has sufficient security for - security level 3. */ - if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 || - conn->key_type == HCI_LK_AUTH_COMBINATION_P256) && - sec_level == BT_SECURITY_HIGH) - goto encrypt; - - /* An unauthenticated combination key has sufficient security for - security level 1 and 2. */ - if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 || - conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) && - (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW)) - goto encrypt; - - /* A combination key has always sufficient security for the security - levels 1 or 2. High security level requires the combination key - is generated using maximum PIN code length (16). - For pre 2.1 units. */ - if (conn->key_type == HCI_LK_COMBINATION && - (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW || - conn->pin_length == 16)) - goto encrypt; + switch (conn->key_type) { + case HCI_LK_AUTH_COMBINATION_P256: + /* An authenticated FIPS approved combination key has + * sufficient security for security level 4 or lower. + */ + if (sec_level <= BT_SECURITY_FIPS) + goto encrypt; + break; + case HCI_LK_AUTH_COMBINATION_P192: + /* An authenticated combination key has sufficient security for + * security level 3 or lower. + */ + if (sec_level <= BT_SECURITY_HIGH) + goto encrypt; + break; + case HCI_LK_UNAUTH_COMBINATION_P192: + case HCI_LK_UNAUTH_COMBINATION_P256: + /* An unauthenticated combination key has sufficient security + * for security level 2 or lower. + */ + if (sec_level <= BT_SECURITY_MEDIUM) + goto encrypt; + break; + case HCI_LK_COMBINATION: + /* A combination key has always sufficient security for the + * security levels 2 or lower. High security level requires the + * combination key is generated using maximum PIN code length + * (16). For pre 2.1 units. + */ + if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16) + goto encrypt; + break; + default: + break; + } auth: if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3d780220e2d1..4d89e38dceec 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1519,6 +1519,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) hdev->flush(hdev); if (hdev->sent_cmd) { + cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } @@ -2516,10 +2517,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { - struct smp_ltk *k; + struct smp_ltk *k, *tmp; int removed = 0; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type) continue; @@ -2535,9 +2536,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { - struct smp_irk *k; + struct smp_irk *k, *tmp; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type) continue; @@ -3192,7 +3193,11 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; - snprintf(hdev->name, sizeof(hdev->name), "hci%d", id); + error = dev_set_name(&hdev->dev, "hci%u", id); + if (error) + return error; + + hdev->name = dev_name(&hdev->dev); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); @@ -3214,8 +3219,6 @@ int hci_register_dev(struct hci_dev *hdev) if (!IS_ERR_OR_NULL(bt_debugfs)) hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs); - dev_set_name(&hdev->dev, "%s", hdev->name); - error = device_add(&hdev->dev); if (error < 0) goto err_wqueue; @@ -4331,7 +4334,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; else *req_complete = bt_cb(skb)->hci.req_complete; - kfree_skb(skb); + dev_kfree_skb_irq(skb); } spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dd7bf437d88e..9d01874e6b93 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -25,6 +25,8 @@ /* Bluetooth HCI event handling. */ #include <asm/unaligned.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -1699,7 +1701,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -2510,6 +2513,16 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s bdaddr %pMR type 0x%x", hdev->name, &ev->bdaddr, ev->link_type); + /* Reject incoming connection from device with same BD ADDR against + * CVE-2020-26555 + */ + if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + &ev->bdaddr); + hci_reject_conn(hdev, &ev->bdaddr); + return; + } + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); @@ -2709,14 +2722,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!ev->status) { clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); - - if (!hci_conn_ssp_enabled(conn) && - test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { - bt_dev_info(hdev, "re-auth of legacy device is not possible."); - } else { - set_bit(HCI_CONN_AUTH, &conn->flags); - conn->sec_level = conn->pending_sec_level; - } + set_bit(HCI_CONN_AUTH, &conn->flags); + conn->sec_level = conn->pending_sec_level; } else { if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING) set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); @@ -2725,7 +2732,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } clear_bit(HCI_CONN_AUTH_PEND, &conn->flags); - clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags); if (conn->state == BT_CONFIG) { if (!ev->status && hci_conn_ssp_enabled(conn)) { @@ -3816,6 +3822,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!conn) goto unlock; + /* Ignore NULL link key against CVE-2020-26555 */ + if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", + &ev->bdaddr); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_drop(conn); + goto unlock; + } + hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(conn); @@ -4294,8 +4309,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * available, then do not declare that OOB data is * present. */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand256, ZERO_KEY, 16) || + !crypto_memneq(data->hash256, ZERO_KEY, 16)) return 0x00; return 0x02; @@ -4305,8 +4320,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * not supported by the hardware, then check that if * P-192 data values are present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand192, ZERO_KEY, 16) || + !crypto_memneq(data->hash192, ZERO_KEY, 16)) return 0x00; return 0x01; @@ -4322,7 +4337,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; hci_conn_hold(conn); @@ -4557,7 +4572,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; /* Reset the authentication requirement to unknown */ diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 3ba0c6df73ce..a7e5bca9f7e4 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -430,7 +430,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, 8); + memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, + strnlen(hdev->name, sizeof(ni->name)), '\0'); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; @@ -881,10 +882,6 @@ static int hci_sock_release(struct socket *sock) } sock_orphan(sk); - - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); sock_put(sk); return 0; @@ -977,6 +974,34 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, BT_DBG("cmd %x arg %lx", cmd, arg); + /* Make sure the cmd is valid before doing anything */ + switch (cmd) { + case HCIGETDEVLIST: + case HCIGETDEVINFO: + case HCIGETCONNLIST: + case HCIDEVUP: + case HCIDEVDOWN: + case HCIDEVRESET: + case HCIDEVRESTAT: + case HCISETSCAN: + case HCISETAUTH: + case HCISETENCRYPT: + case HCISETPTYPE: + case HCISETLINKPOL: + case HCISETLINKMODE: + case HCISETACLMTU: + case HCISETSCOMTU: + case HCIINQUIRY: + case HCISETRAW: + case HCIGETCONNINFO: + case HCIGETAUTHINFO: + case HCIBLOCKADDR: + case HCIUNBLOCKADDR: + break; + default: + return -ENOIOCTLCMD; + } + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -993,7 +1018,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, if (hci_sock_gen_cookie(sk)) { struct sk_buff *skb; - if (capable(CAP_NET_ADMIN)) + /* Perform careful checks before setting the HCI_SOCK_TRUSTED + * flag. Make sure that not only the current task but also + * the socket opener has the required capability, since + * privileged programs can be tricked into making ioctl calls + * on HCI sockets, and the socket should not be marked as + * trusted simply because the ioctl caller is privileged. + */ + if (sk_capable(sk, CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); /* Send event to monitor */ @@ -1985,6 +2017,12 @@ done: return err; } +static void hci_sock_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, @@ -2035,6 +2073,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; sk->sk_state = BT_OPEN; + sk->sk_destruct = hci_sock_destruct; bt_sock_link(&hci_sk_list, sk); return 0; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ccd2c377bf83..266112c960ee 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -33,7 +33,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); conn->dev.type = &bt_link; conn->dev.class = bt_class; @@ -46,27 +46,30 @@ void hci_conn_add_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (device_is_registered(&conn->dev)) return; dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - if (device_add(&conn->dev) < 0) { + if (device_add(&conn->dev) < 0) bt_dev_err(hdev, "failed to register connection device"); - return; - } - - hci_dev_hold(hdev); } void hci_conn_del_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - if (!device_is_registered(&conn->dev)) + bt_dev_dbg(hdev, "conn %p", conn); + + if (!device_is_registered(&conn->dev)) { + /* If device_add() has *not* succeeded, use *only* put_device() + * to drop the reference count. + */ + put_device(&conn->dev); return; + } while (1) { struct device *dev; @@ -78,9 +81,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn) put_device(dev); } - device_del(&conn->dev); - - hci_dev_put(hdev); + device_unregister(&conn->dev); } static void bt_host_release(struct device *dev) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 0cbd0bca971f..00dae8e875a2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -428,7 +428,7 @@ static void hidp_set_timer(struct hidp_session *session) static void hidp_del_timer(struct hidp_session *session) { if (session->idle_to > 0) - del_timer(&session->timer); + del_timer_sync(&session->timer); } static void hidp_process_report(struct hidp_session *session, int type, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fd6cd47a6c5a..9346fae5d664 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2517,14 +2517,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (IS_ERR(skb)) return PTR_ERR(skb); - /* Channel lock is released before requesting new skb and then - * reacquired thus we need to recheck channel state. - */ - if (chan->state != BT_CONNECTED) { - kfree_skb(skb); - return -ENOTCONN; - } - l2cap_do_send(chan, skb); return len; } @@ -2568,14 +2560,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (IS_ERR(skb)) return PTR_ERR(skb); - /* Channel lock is released before requesting new skb and then - * reacquired thus we need to recheck channel state. - */ - if (chan->state != BT_CONNECTED) { - kfree_skb(skb); - return -ENOTCONN; - } - l2cap_do_send(chan, skb); err = len; break; @@ -2596,14 +2580,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) */ err = l2cap_segment_sdu(chan, &seg_queue, msg, len); - /* The channel could have been closed while segmenting, - * check that it is still connected. - */ - if (chan->state != BT_CONNECTED) { - __skb_queue_purge(&seg_queue); - err = -ENOTCONN; - } - if (err) break; @@ -4031,6 +4007,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, result = __le16_to_cpu(rsp->result); status = __le16_to_cpu(rsp->status); + if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START || + dcid > L2CAP_CID_DYN_END)) + return -EPROTO; + BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); @@ -4062,6 +4042,11 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, switch (result) { case L2CAP_CR_SUCCESS: + if (__l2cap_get_chan_by_dcid(conn, dcid)) { + err = -EBADSLT; + break; + } + l2cap_state_change(chan, BT_CONFIG); chan->ident = 0; chan->dcid = dcid; @@ -4193,7 +4178,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; @@ -4373,33 +4359,29 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, dcid); + chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) { - mutex_unlock(&conn->chan_lock); cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - rsp.dcid = cpu_to_le16(chan->scid); rsp.scid = cpu_to_le16(chan->dcid); l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); chan->ops->set_shutdown(chan); + l2cap_chan_unlock(chan); + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } @@ -4419,33 +4401,28 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); - mutex_lock(&conn->chan_lock); - - chan = __l2cap_get_chan_by_scid(conn, scid); + chan = l2cap_get_chan_by_scid(conn, scid); if (!chan) { - mutex_unlock(&conn->chan_lock); return 0; } - l2cap_chan_hold(chan); - l2cap_chan_lock(chan); - if (chan->state != BT_DISCONN) { l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); return 0; } + l2cap_chan_unlock(chan); + mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); + mutex_unlock(&conn->chan_lock); chan->ops->close(chan); l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); - return 0; } @@ -5728,9 +5705,14 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, if (!chan) goto done; + chan = l2cap_chan_hold_unless_zero(chan); + if (!chan) + goto done; + l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); done: mutex_unlock(&conn->chan_lock); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d938311c58a8..b831e5fe3ebc 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -45,6 +45,7 @@ static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern); +static void l2cap_sock_cleanup_listen(struct sock *parent); bool l2cap_is_socket(struct socket *sock) { @@ -1205,6 +1206,7 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + l2cap_sock_cleanup_listen(sk); bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, 2); @@ -1414,6 +1416,14 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, if (!skb) return ERR_PTR(err); + /* Channel lock is released before requesting new skb and then + * reacquired thus we need to recheck channel state. + */ + if (chan->state != BT_CONNECTED) { + kfree_skb(skb); + return ERR_PTR(-ENOTCONN); + } + skb->priority = sk->sk_priority; bt_cb(skb)->l2cap.chan = chan; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e4eaf5d2acbd..86edf512d497 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -593,7 +593,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) ret = rfcomm_dlc_send_frag(d, frag); if (ret < 0) { - kfree_skb(frag); + dev_kfree_skb_irq(frag); goto unlock; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48ddc60b4fbd..c07a47d65c39 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -122,7 +122,7 @@ static int deliver_clone(const struct net_bridge_port *prev, skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } @@ -261,7 +261,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b5fb2b682e19..ab539551b7d3 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -161,8 +161,9 @@ void br_manage_promisc(struct net_bridge *br) * This lets us disable promiscuous mode and write * this config to hw. */ - if (br->auto_cnt == 0 || - (br->auto_cnt == 1 && br_auto_port(p))) + if ((p->dev->priv_flags & IFF_UNICAST_FLT) && + (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p)))) br_port_clear_promisc(p); else br_port_set_promisc(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 14c2fdc268ea..f3938337ff87 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -146,12 +146,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } mcast_hit = true; } else { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } break; case BR_PKT_UNICAST: diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 348b8cb0bc24..cab0b239f96a 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1022,6 +1022,7 @@ static void caif_sock_destructor(struct sock *sk) return; } sk_stream_kill_queues(&cf_sk->sk); + WARN_ON(sk->sk_forward_alloc); caif_free_client(&cf_sk->layer); } diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 76d49a1bc6f6..609c5793f45a 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -135,6 +135,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct usb_device *usbdev; int res; + if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) + return 0; + /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index a1e85f032108..330cb2b087bb 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, default: pr_warn("Request setup of bad link type = %d\n", param->linktype); + cfpkt_destroy(pkt); return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) + if (!req) { + cfpkt_destroy(pkt); return -ENOMEM; + } + req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; diff --git a/net/can/bcm.c b/net/can/bcm.c index 74e555a22de7..1c9953c68f09 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -935,6 +935,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); + if (err < 0) + goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) @@ -944,12 +946,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, err = -EINVAL; } - if (err < 0) { - if (op->frames != &op->sframe) - kfree(op->frames); - kfree(op); - return err; - } + if (err < 0) + goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ @@ -1020,6 +1018,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; + +free_op: + if (op->frames != &op->sframe) + kfree(op->frames); + kfree(op); + return err; } /* @@ -1516,6 +1520,12 @@ static int bcm_release(struct socket *sock) lock_sock(sk); +#if IS_ENABLED(CONFIG_PROC_FS) + /* remove procfs entry */ + if (net->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, net->can.bcmproc_dir); +#endif /* CONFIG_PROC_FS */ + list_for_each_entry_safe(op, next, &bo->tx_ops, list) bcm_remove_op(op); @@ -1551,12 +1561,6 @@ static int bcm_release(struct socket *sock) list_for_each_entry_safe(op, next, &bo->rx_ops, list) bcm_remove_op(op); -#if IS_ENABLED(CONFIG_PROC_FS) - /* remove procfs entry */ - if (net->can.bcmproc_dir && bo->bcm_proc_read) - remove_proc_entry(bo->procname, net->can.bcmproc_dir); -#endif /* CONFIG_PROC_FS */ - /* remove device reference */ if (bo->bound) { bo->bound = 0; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 21bd37ec5511..7fd18e10755e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -462,7 +462,7 @@ static void set_sock_callbacks(struct socket *sock, */ static int ceph_tcp_connect(struct ceph_connection *con) { - struct sockaddr_storage *paddr = &con->peer_addr.in_addr; + struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */ struct socket *sock; unsigned int noio_flag; int ret; @@ -471,7 +471,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) /* sock_create_kern() allocates with GFP_KERNEL */ noio_flag = memalloc_noio_save(); - ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); memalloc_noio_restore(noio_flag); if (ret) @@ -487,8 +487,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); con_sock_state_connecting(con); - ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), - O_NONBLOCK); + ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), + O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr.in_addr), @@ -1824,14 +1824,15 @@ static int verify_hello(struct ceph_connection *con) return 0; } -static bool addr_is_blank(struct sockaddr_storage *ss) +static bool addr_is_blank(struct ceph_entity_addr *addr) { - struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; - struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + struct sockaddr_storage ss = addr->in_addr; /* align */ + struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr; - switch (ss->ss_family) { + switch (ss.ss_family) { case AF_INET: - return addr->s_addr == htonl(INADDR_ANY); + return addr4->s_addr == htonl(INADDR_ANY); case AF_INET6: return ipv6_addr_any(addr6); default: @@ -1839,25 +1840,25 @@ static bool addr_is_blank(struct sockaddr_storage *ss) } } -static int addr_port(struct sockaddr_storage *ss) +static int addr_port(struct ceph_entity_addr *addr) { - switch (ss->ss_family) { + switch (get_unaligned(&addr->in_addr.ss_family)) { case AF_INET: - return ntohs(((struct sockaddr_in *)ss)->sin_port); + return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port)); case AF_INET6: - return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); + return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port)); } return 0; } -static void addr_set_port(struct sockaddr_storage *ss, int p) +static void addr_set_port(struct ceph_entity_addr *addr, int p) { - switch (ss->ss_family) { + switch (get_unaligned(&addr->in_addr.ss_family)) { case AF_INET: - ((struct sockaddr_in *)ss)->sin_port = htons(p); + put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port); break; case AF_INET6: - ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); + put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port); break; } } @@ -1865,21 +1866,18 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) /* * Unlike other *_pton function semantics, zero indicates success. */ -static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, +static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr, char delim, const char **ipend) { - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; - - memset(ss, 0, sizeof(*ss)); + memset(&addr->in_addr, 0, sizeof(addr->in_addr)); - if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { - ss->ss_family = AF_INET; + if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) { + put_unaligned(AF_INET, &addr->in_addr.ss_family); return 0; } - if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { - ss->ss_family = AF_INET6; + if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) { + put_unaligned(AF_INET6, &addr->in_addr.ss_family); return 0; } @@ -1891,7 +1889,7 @@ static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, */ #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER static int ceph_dns_resolve_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { const char *end, *delim_p; char *colon_p, *ip_addr = NULL; @@ -1920,7 +1918,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, /* do dns_resolve upcall */ ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); if (ip_len > 0) - ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); + ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL); else ret = -ESRCH; @@ -1929,13 +1927,13 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, *ipend = end; pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, - ret, ret ? "failed" : ceph_pr_addr(ss)); + ret, ret ? "failed" : ceph_pr_addr(&addr->in_addr)); return ret; } #else static inline int ceph_dns_resolve_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { return -EINVAL; } @@ -1946,13 +1944,13 @@ static inline int ceph_dns_resolve_name(const char *name, size_t namelen, * then try to extract a hostname to resolve using userspace DNS upcall. */ static int ceph_parse_server_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { int ret; - ret = ceph_pton(name, namelen, ss, delim, ipend); + ret = ceph_pton(name, namelen, addr, delim, ipend); if (ret) - ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); + ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend); return ret; } @@ -1971,7 +1969,6 @@ int ceph_parse_ips(const char *c, const char *end, dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { const char *ipend; - struct sockaddr_storage *ss = &addr[i].in_addr; int port; char delim = ','; @@ -1980,7 +1977,7 @@ int ceph_parse_ips(const char *c, const char *end, p++; } - ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); + ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); if (ret) goto bad; ret = -EINVAL; @@ -2011,9 +2008,9 @@ int ceph_parse_ips(const char *c, const char *end, port = CEPH_MON_PORT; } - addr_set_port(ss, port); + addr_set_port(&addr[i], port); - dout("parse_ips got %s\n", ceph_pr_addr(ss)); + dout("parse_ips got %s\n", ceph_pr_addr(&addr[i].in_addr)); if (p == end) break; @@ -2052,7 +2049,7 @@ static int process_banner(struct ceph_connection *con) */ if (memcmp(&con->peer_addr, &con->actual_peer_addr, sizeof(con->peer_addr)) != 0 && - !(addr_is_blank(&con->actual_peer_addr.in_addr) && + !(addr_is_blank(&con->actual_peer_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { pr_warn("wrong peer, want %s/%d, got %s/%d\n", ceph_pr_addr(&con->peer_addr.in_addr), @@ -2066,13 +2063,13 @@ static int process_banner(struct ceph_connection *con) /* * did we learn our address? */ - if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { - int port = addr_port(&con->msgr->inst.addr.in_addr); + if (addr_is_blank(&con->msgr->inst.addr)) { + int port = addr_port(&con->msgr->inst.addr); memcpy(&con->msgr->inst.addr.in_addr, &con->peer_addr_for_me.in_addr, sizeof(con->peer_addr_for_me.in_addr)); - addr_set_port(&con->msgr->inst.addr.in_addr, port); + addr_set_port(&con->msgr->inst.addr, port); encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", ceph_pr_addr(&con->msgr->inst.addr.in_addr)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 713fe1fbcb18..90ebb0ba927c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3137,17 +3137,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->reg_commit_wait); + ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } -static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq) +static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, + unsigned long timeout) { - int ret; + long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->notify_finish_wait); - return ret ?: lreq->notify_finish_error; + left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, + ceph_timeout_jiffies(timeout)); + if (left <= 0) + left = left ?: -ETIMEDOUT; + else + left = lreq->notify_finish_error; /* completed */ + + return left; } /* @@ -4760,7 +4767,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, ret = linger_reg_commit_wait(lreq); if (!ret) - ret = linger_notify_finish_wait(lreq); + ret = linger_notify_finish_wait(lreq, + msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); diff --git a/net/core/datagram.c b/net/core/datagram.c index 865a8cb7b0bd..6ba82eb14b46 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -837,18 +837,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; __poll_t mask; + u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); - if (sk->sk_shutdown & RCV_SHUTDOWN) + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ @@ -857,10 +860,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, /* Connection-based need to check for termination and startup */ if (connection_based(sk)) { - if (sk->sk_state == TCP_CLOSE) + int state = READ_ONCE(sk->sk_state); + + if (state == TCP_CLOSE) mask |= EPOLLHUP; /* connection hasn't started yet? */ - if (sk->sk_state == TCP_SYN_SENT) + if (state == TCP_SYN_SENT) return mask; } diff --git a/net/core/dev.c b/net/core/dev.c index 880b096eef8a..b5c9648c2192 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2303,6 +2303,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, bool active = false; unsigned int nr_ids; + WARN_ON_ONCE(index >= dev->num_tx_queues); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; @@ -2794,8 +2796,10 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason) { if (in_irq() || irqs_disabled()) __dev_kfree_skb_irq(skb, reason); + else if (unlikely(reason == SKB_REASON_DROPPED)) + kfree_skb(skb); else - dev_kfree_skb(skb); + consume_skb(skb); } EXPORT_SYMBOL(__dev_kfree_skb_any); @@ -3196,6 +3200,14 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > dev->gso_max_segs) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + + if (!skb_shinfo(skb)->gso_type) { + skb_warn_bad_offload(skb); + return features & ~NETIF_F_GSO_MASK; + } + /* Support for GSO partial features requires software * intervention before we can actually process the packets * so we need to strip support for any partial features now @@ -4060,8 +4072,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u32 next_cpu; u32 ident; - /* First check into global flow table if there is a match */ - ident = sock_flow_table->ents[hash & sock_flow_table->mask]; + /* First check into global flow table if there is a match. + * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). + */ + ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); if ((ident ^ hash) & ~rps_cpu_mask) goto try_rps; @@ -9004,9 +9018,7 @@ void netdev_run_todo(void) BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); -#if IS_ENABLED(CONFIG_DECNET) - WARN_ON(dev->dn_ptr); -#endif + if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) @@ -9031,24 +9043,16 @@ void netdev_run_todo(void) void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats) { -#if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); - /* zero out counters that only exist in rtnl_link_stats64 */ - memset((char *)stats64 + sizeof(*netdev_stats), 0, - sizeof(*stats64) - sizeof(*netdev_stats)); -#else - size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); - const unsigned long *src = (const unsigned long *)netdev_stats; + size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t); + const atomic_long_t *src = (atomic_long_t *)netdev_stats; u64 *dst = (u64 *)stats64; BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = src[i]; + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); -#endif } EXPORT_SYMBOL(netdev_stats_to_stats64); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2ed600012640..52e559252a9e 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -122,7 +122,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -370,10 +370,12 @@ static const struct genl_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_START, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, }; diff --git a/net/core/dst.c b/net/core/dst.c index 81ccf20e2826..1b1677683b97 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -97,10 +97,10 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, { struct dst_entry *dst; - if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) - return NULL; - } + if (ops->gc && + !(flags & DST_NOCOUNT) && + dst_entries_get_fast(ops) > ops->gc_thresh) + ops->gc(ops); dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4db9512feba8..d007f1cca64c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2023,7 +2023,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return n_stats; if (n_stats > S32_MAX / sizeof(u64)) return -ENOMEM; - WARN_ON_ONCE(!n_stats); + if (WARN_ON_ONCE(!n_stats)) + return -EOPNOTSUPP; if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; diff --git a/net/core/filter.c b/net/core/filter.c index 5129e89f52bb..dea7132f3813 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2025,6 +2025,10 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, if (mlen) { __skb_pull(skb, mlen); + if (unlikely(!skb->len)) { + kfree_skb(skb); + return -ERANGE; + } /* At ingress, the mac header has already been pulled once. * At egress, skb_pospull_rcsum has to be done in case that @@ -2561,15 +2565,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) { + void *old_data; + /* skb_ensure_writable() is not needed here, as we're * already working on an uncloned skb. */ if (unlikely(!pskb_may_pull(skb, off + len))) return -ENOMEM; - skb_postpull_rcsum(skb, skb->data + off, len); - memmove(skb->data + len, skb->data, off); + old_data = skb->data; __skb_pull(skb, len); + skb_postpull_rcsum(skb, old_data + off, len); + memmove(skb->data, old_data, off); return 0; } @@ -4228,7 +4235,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; - params->ifindex = dev->ifindex; return 0; } @@ -4326,6 +4332,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->ipv4_dst = nh->nh_gw; params->rt_metric = res.fi->fib_priority; + params->ifindex = dev->ifindex; /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here @@ -4440,6 +4447,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, dev = f6i->fib6_nh.nh_dev; params->rt_metric = f6i->fib6_metric; + params->ifindex = dev->ifindex; /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. Can not use __ipv6_neigh_lookup_noref here diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2b96e9a7fc59..5b6f3175d55b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -476,37 +476,6 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, } EXPORT_SYMBOL(neigh_lookup); -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, - const void *pkey) -{ - struct neighbour *n; - unsigned int key_len = tbl->key_len; - u32 hash_val; - struct neigh_hash_table *nht; - - NEIGH_CACHE_STAT_INC(tbl, lookups); - - rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); - - for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); - n != NULL; - n = rcu_dereference_bh(n->next)) { - if (!memcmp(n->primary_key, pkey, key_len) && - net_eq(dev_net(n->dev), net)) { - if (!refcount_inc_not_zero(&n->refcnt)) - n = NULL; - NEIGH_CACHE_STAT_INC(tbl, hits); - break; - } - } - - rcu_read_unlock_bh(); - return n; -} -EXPORT_SYMBOL(neigh_lookup_nodev); - struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { @@ -1661,9 +1630,6 @@ static struct neigh_table *neigh_find_table(int family) case AF_INET6: tbl = neigh_tables[NEIGH_ND_TABLE]; break; - case AF_DECnet: - tbl = neigh_tables[NEIGH_DN_TABLE]; - break; } return tbl; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 56c240c98a56..a87774424829 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -132,12 +132,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) return 0; if (ops->id && ops->size) { -cleanup: ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } +cleanup: kfree(data); out: diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 41e32a958d08..08f0da9e6a80 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -136,6 +136,20 @@ static void queue_process(struct work_struct *work) } } +static int netif_local_xmit_active(struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + return 1; + } + + return 0; +} + static void poll_one_napi(struct napi_struct *napi) { int work; @@ -182,7 +196,10 @@ void netpoll_poll_dev(struct net_device *dev) if (!ni || down_trylock(&ni->dev_lock)) return; - if (!netif_running(dev)) { + /* Some drivers will take the same locks in poll and xmit, + * we can't poll if local CPU is already in xmit. + */ + if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3714cd9e3111..3ade60ec4512 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -651,19 +651,19 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " Flags: "); for (i = 0; i < NR_PKT_FLAGS; i++) { - if (i == F_FLOW_SEQ) + if (i == FLOW_SEQ_SHIFT) if (!pkt_dev->cflows) continue; - if (pkt_dev->flags & (1 << i)) + if (pkt_dev->flags & (1 << i)) { seq_printf(seq, "%s ", pkt_flag_names[i]); - else if (i == F_FLOW_SEQ) - seq_puts(seq, "FLOW_RND "); - #ifdef CONFIG_XFRM - if (i == F_IPSEC && pkt_dev->spi) - seq_printf(seq, "spi:%u", pkt_dev->spi); + if (i == IPSEC_SHIFT && pkt_dev->spi) + seq_printf(seq, "spi:%u ", pkt_dev->spi); #endif + } else if (i == FLOW_SEQ_SHIFT) { + seq_puts(seq, "FLOW_RND "); + } } seq_puts(seq, "\n"); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2837cc03f69e..0d3f724da78b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2970,9 +2970,12 @@ replay: ifname[0] = '\0'; ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) + if (ifm->ifi_index > 0) { dev = __dev_get_by_index(net, ifm->ifi_index); - else { + } else if (ifm->ifi_index < 0) { + NL_SET_ERR_MSG(extack, "ifindex can't be negative"); + return -EINVAL; + } else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); else @@ -3436,7 +3439,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_ifindex = dev->ifindex; ndm->ndm_state = ndm_state; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr)) goto nla_put_failure; if (vid) if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) @@ -3450,10 +3453,10 @@ nla_put_failure: return -EMSGSIZE; } -static inline size_t rtnl_fdb_nlmsg_size(void) +static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + - nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(dev->addr_len) + /* NDA_LLADDR */ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ 0; } @@ -3465,7 +3468,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC); if (!skb) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4178fc28c277..e03cd719b86b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1953,6 +1953,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) insp = list; } else { /* Eaten partially. */ + if (skb_is_gso(skb) && !list->head_frag && + skb_headlen(list)) + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ @@ -3543,21 +3546,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; - skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); - struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; + struct sk_buff *frag_skb; + skb_frag_t *frag; __be16 proto; bool csum, sg; - int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; - int pos; + int nfrags, pos; int dummy; if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && @@ -3635,6 +3637,13 @@ normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); + if (skb_orphan_frags(head_skb, GFP_ATOMIC)) + return ERR_PTR(-ENOMEM); + + nfrags = skb_shinfo(head_skb)->nr_frags; + frag = skb_shinfo(head_skb)->frags; + frag_skb = head_skb; + do { struct sk_buff *nskb; skb_frag_t *nskb_frag; @@ -3659,6 +3668,10 @@ normal: (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); + nskb = skb_clone(list_skb, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -3677,12 +3690,8 @@ normal: frag++; } - nskb = skb_clone(list_skb, GFP_ATOMIC); list_skb = list_skb->next; - if (unlikely(!nskb)) - goto err; - if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); goto err; @@ -3747,12 +3756,16 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { + if (skb_orphan_frags(list_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, list_skb, + GFP_ATOMIC)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -3766,10 +3779,6 @@ normal: i--; frag--; } - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, - GFP_ATOMIC)) - goto err; list_skb = list_skb->next; } @@ -4442,6 +4451,11 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); + + if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) { + kfree_skb(skb); + return; + } } if (!skb) return; diff --git a/net/core/sock.c b/net/core/sock.c index cd23a8e4556c..62d169bcfcfa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -496,7 +496,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; @@ -632,7 +632,8 @@ bool sk_mc_loop(struct sock *sk) return false; if (!sk) return true; - switch (sk->sk_family) { + /* IPV6_ADDRFORM can change sk->sk_family under us. */ + switch (READ_ONCE(sk->sk_family)) { case AF_INET: return inet_sk(sk)->mc_loop; #if IS_ENABLED(CONFIG_IPV6) @@ -1319,7 +1320,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, if (!sock->ops->set_peek_off) return -EOPNOTSUPP; - v.val = sk->sk_peek_off; + v.val = READ_ONCE(sk->sk_peek_off); break; case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); @@ -1349,7 +1350,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: - v.val = sk->sk_ll_usec; + v.val = READ_ONCE(sk->sk_ll_usec); break; #endif @@ -1795,7 +1796,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps; if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; @@ -1810,6 +1810,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } } sk->sk_gso_max_segs = max_segs; + sk_dst_set(sk, dst); } EXPORT_SYMBOL_GPL(sk_setup_caps); @@ -1939,13 +1940,24 @@ kuid_t sock_i_uid(struct sock *sk) } EXPORT_SYMBOL(sock_i_uid); -unsigned long sock_i_ino(struct sock *sk) +unsigned long __sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock_bh(&sk->sk_callback_lock); + read_lock(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock_bh(&sk->sk_callback_lock); + read_unlock(&sk->sk_callback_lock); + return ino; +} +EXPORT_SYMBOL(__sock_i_ino); + +unsigned long sock_i_ino(struct sock *sk) +{ + unsigned long ino; + + local_bh_disable(); + ino = __sock_i_ino(sk); + local_bh_enable(); return ino; } EXPORT_SYMBOL(sock_i_ino); @@ -2060,9 +2072,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) break; - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) break; timeo = schedule_timeout(timeo); } @@ -2090,7 +2102,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; err = -EPIPE; - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto failure; if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf) @@ -2527,7 +2539,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); - if (sk_under_memory_pressure(sk) && + if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } @@ -2548,7 +2560,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); return 0; } EXPORT_SYMBOL_GPL(sk_set_peek_off); diff --git a/net/core/stream.c b/net/core/stream.c index 7b411a91a81c..cd60746877b1 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -32,7 +32,7 @@ void sk_stream_write_space(struct sock *sk) struct socket *sock = sk->sk_socket; struct socket_wq *wq; - if (sk_stream_is_writeable(sk) && sock) { + if (__sk_stream_is_writeable(sk, 1) && sock) { clear_bit(SOCK_NOSPACE, &sock->flags); rcu_read_lock(); @@ -196,6 +196,12 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); + /* Next, the error queue. + * We need to use queue lock, because other threads might + * add packets to the queue without socket lock being held. + */ + skb_queue_purge(&sk->sk_error_queue); + /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); @@ -203,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk) sk_mem_reclaim(sk); WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index aec3c724665f..579f39e0d02e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -291,6 +291,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len); +void dccp_destruct_common(struct sock *sk); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); void dccp_destroy_sock(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index c021d5dde8f7..5281ac3260f6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -247,12 +247,12 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - /* Only need dccph_dport & dccph_sport which are the first - * 4 bytes in dccp header. - * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. - */ - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return; + dh = (struct dccp_hdr *)(skb->data + offset); + if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) + return; + iph = (struct iphdr *)skb->data; dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, @@ -612,9 +612,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); @@ -622,6 +619,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->ireq_family = AF_INET; ireq->ir_iif = sk->sk_bound_dev_if; + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + /* * Step 3: Process LISTEN state * diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 72803e1ea10a..72ceefbf2312 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -71,7 +71,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; + const struct ipv6hdr *hdr; const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; @@ -80,12 +80,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - /* Only need dccph_dport & dccph_sport which are the first - * 4 bytes in dccp header. - * Our caller (icmpv6_notify()) already pulled 8 bytes for us. - */ - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); - BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + if (!pskb_may_pull(skb, offset + sizeof(*dh))) + return; + dh = (struct dccp_hdr *)(skb->data + offset); + if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) + return; + hdr = (const struct ipv6hdr *)skb->data; dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, @@ -349,15 +349,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -541,11 +541,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; @@ -605,7 +603,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) @@ -669,7 +667,6 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); @@ -1003,6 +1000,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { #endif }; +static void dccp_v6_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); + inet6_sock_destruct(sk); +} + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1015,17 +1018,12 @@ static int dccp_v6_init_sock(struct sock *sk) if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_destruct = dccp_v6_sk_destruct; } return err; } -static void dccp_v6_destroy_sock(struct sock *sk) -{ - dccp_destroy_sock(sk); - inet6_destroy_sock(sk); -} - static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; @@ -1048,7 +1046,7 @@ static struct proto dccp_v6_prot = { .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v6_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), diff --git a/net/dccp/options.c b/net/dccp/options.c index 4e40db017e19..3c464d63b0bb 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -60,7 +60,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; - unsigned char *uninitialized_var(value); + unsigned char *value; u32 elapsed_time; __be32 opt_val; int rc; diff --git a/net/dccp/output.c b/net/dccp/output.c index 91a15b3c4915..d872dd1cfb5e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -189,7 +189,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; - dp->dccps_mss_cache = cur_mps; + WRITE_ONCE(dp->dccps_mss_cache, cur_mps); return cur_mps; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index dbbcf50aea35..c4ea0159ce2e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -174,12 +174,18 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); -static void dccp_sk_destruct(struct sock *sk) +void dccp_destruct_common(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; +} +EXPORT_SYMBOL_GPL(dccp_destruct_common); + +static void dccp_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); inet_sock_destruct(sk); } @@ -322,11 +328,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect); __poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask; struct sock *sk = sock->sk; + __poll_t mask; + u8 shutdown; + int state; sock_poll_wait(file, sock, wait); - if (sk->sk_state == DCCP_LISTEN) + + state = inet_sk_state_load(sk); + if (state == DCCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events @@ -335,20 +345,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, */ mask = 0; - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask = EPOLLERR; + shutdown = READ_ONCE(sk->sk_shutdown); - if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) + if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected? */ - if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { + if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { if (atomic_read(&sk->sk_rmem_alloc) > 0) mask |= EPOLLIN | EPOLLRDNORM; - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { + if (!(shutdown & SEND_SHUTDOWN)) { if (sk_stream_is_writeable(sk)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ @@ -366,7 +377,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, } return mask; } - EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) @@ -642,7 +652,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, return dccp_getsockopt_service(sk, len, (__be32 __user *)optval, optlen); case DCCP_SOCKOPT_GET_CUR_MPS: - val = dp->dccps_mss_cache; + val = READ_ONCE(dp->dccps_mss_cache); break; case DCCP_SOCKOPT_AVAILABLE_CCIDS: return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); @@ -764,7 +774,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) trace_dccp_probe(sk, len); - if (len > dp->dccps_mss_cache) + if (len > READ_ONCE(dp->dccps_mss_cache)) return -EMSGSIZE; lock_sock(sk); @@ -797,6 +807,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out_discard; } + /* We need to check dccps_mss_cache after socket is locked. */ + if (len > dp->dccps_mss_cache) { + rc = -EMSGSIZE; + goto out_discard; + } + skb_reserve(skb, sk->sk_prot->max_header); rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc != 0) diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig deleted file mode 100644 index dcc74956badd..000000000000 --- a/net/decnet/Kconfig +++ /dev/null @@ -1,42 +0,0 @@ -# -# DECnet configuration -# -config DECNET - tristate "DECnet Support" - ---help--- - The DECnet networking protocol was used in many products made by - Digital (now Compaq). It provides reliable stream and sequenced - packet communications over which run a variety of services similar - to those which run over TCP/IP. - - To find some tools to use with the kernel layer support, please - look at Patrick Caulfield's web site: - <http://linux-decnet.sourceforge.net/>. - - More detailed documentation is available in - <file:Documentation/networking/decnet.txt>. - - Be sure to say Y to "/proc file system support" and "Sysctl support" - below when using DECnet, since you will need sysctl support to aid - in configuration at run time. - - The DECnet code is also available as a module ( = code which can be - inserted in and removed from the running kernel whenever you want). - The module is called decnet. - -config DECNET_ROUTER - bool "DECnet: router support" - depends on DECNET - select FIB_RULES - ---help--- - Add support for turning your DECnet Endnode into a level 1 or 2 - router. This is an experimental, but functional option. If you - do say Y here, then make sure that you also say Y to "Kernel/User - network link driver", "Routing messages" and "Network packet - filtering". The first two are required to allow configuration via - rtnetlink (you will need Alexey Kuznetsov's iproute2 package - from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet - filtering" option will be required for the forthcoming routing daemon - to work. - - See <file:Documentation/networking/decnet.txt> for more information. diff --git a/net/decnet/Makefile b/net/decnet/Makefile deleted file mode 100644 index 07b38e441b2d..000000000000 --- a/net/decnet/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -obj-$(CONFIG_DECNET) += decnet.o - -decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \ - dn_route.o dn_dev.o dn_neigh.o dn_timer.o -decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o -decnet-y += sysctl_net_decnet.o - -obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/decnet/README b/net/decnet/README deleted file mode 100644 index 60e7ec88c81f..000000000000 --- a/net/decnet/README +++ /dev/null @@ -1,8 +0,0 @@ - Linux DECnet Project - ====================== - -The documentation for this kernel subsystem is available in the -Documentation/networking subdirectory of this distribution and also -on line at http://www.chygwyn.com/DECnet/ - -Steve Whitehouse <SteveW@ACM.org> diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c deleted file mode 100644 index cc7077105969..000000000000 --- a/net/decnet/af_decnet.c +++ /dev/null @@ -1,2408 +0,0 @@ - -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Socket Layer Interface - * - * Authors: Eduardo Marcelo Serrat <emserrat@geocities.com> - * Patrick Caulfield <patrick@pandh.demon.co.uk> - * - * Changes: - * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's - * version of the code. Original copyright preserved - * below. - * Steve Whitehouse: Some bug fixes, cleaning up some code to make it - * compatible with my routing layer. - * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick - * Caulfield. - * Steve Whitehouse: Further bug fixes, checking module code still works - * with new routing layer. - * Steve Whitehouse: Additional set/get_sockopt() calls. - * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new - * code. - * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like - * way. Didn't manage it entirely, but its better. - * Steve Whitehouse: ditto for sendmsg(). - * Steve Whitehouse: A selection of bug fixes to various things. - * Steve Whitehouse: Added TIOCOUTQ ioctl. - * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username. - * Steve Whitehouse: Fixes to connect() error returns. - * Patrick Caulfield: Fixes to delayed acceptance logic. - * David S. Miller: New socket locking - * Steve Whitehouse: Socket list hashing/locking - * Arnaldo C. Melo: use capable, not suser - * Steve Whitehouse: Removed unused code. Fix to use sk->allocation - * when required. - * Patrick Caulfield: /proc/net/decnet now has object name/number - * Steve Whitehouse: Fixed local port allocation, hashed sk list - * Matthew Wilcox: Fixes for dn_ioctl() - * Steve Whitehouse: New connect/accept logic to allow timeouts and - * prepare for sendpage etc. - */ - - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - -HISTORY: - -Version Kernel Date Author/Comments -------- ------ ---- --------------- -Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat - (emserrat@geocities.com) - - First Development of DECnet Socket La- - yer for Linux. Only supports outgoing - connections. - -Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield - (patrick@pandh.demon.co.uk) - - Port to new kernel development version. - -Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - _ - Added support for incoming connections - so we can start developing server apps - on Linux. - - - Module Support -Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - _ - Added support for X11R6.4. Now we can - use DECnet transport for X on Linux!!! - - -Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat - (emserrat@geocities.com) - Removed bugs on flow control - Removed bugs on incoming accessdata - order - - -Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat - dn_recvmsg fixes - - Patrick J. Caulfield - dn_bind fixes -*******************************************************************************/ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/sched/signal.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/inet.h> -#include <linux/route.h> -#include <linux/netfilter.h> -#include <linux/seq_file.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <net/flow.h> -#include <asm/ioctls.h> -#include <linux/capability.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/jiffies.h> -#include <net/net_namespace.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/fib_rules.h> -#include <net/tcp.h> -#include <net/dn.h> -#include <net/dn_nsp.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> -#include <net/dn_fib.h> -#include <net/dn_neigh.h> - -struct dn_sock { - struct sock sk; - struct dn_scp scp; -}; - -static void dn_keepalive(struct sock *sk); - -#define DN_SK_HASH_SHIFT 8 -#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT) -#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) - - -static const struct proto_ops dn_proto_ops; -static DEFINE_RWLOCK(dn_hash_lock); -static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; -static struct hlist_head dn_wild_sk; -static atomic_long_t decnet_memory_allocated; - -static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); -static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); - -static struct hlist_head *dn_find_list(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->addr.sdn_flags & SDF_WILD) - return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - - return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK]; -} - -/* - * Valid ports are those greater than zero and not already in use. - */ -static int check_port(__le16 port) -{ - struct sock *sk; - - if (port == 0) - return -1; - - sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { - struct dn_scp *scp = DN_SK(sk); - if (scp->addrloc == port) - return -1; - } - return 0; -} - -static unsigned short port_alloc(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); -static unsigned short port = 0x2000; - unsigned short i_port = port; - - while(check_port(cpu_to_le16(++port)) != 0) { - if (port == i_port) - return 0; - } - - scp->addrloc = cpu_to_le16(port); - - return 1; -} - -/* - * Since this is only ever called from user - * level, we don't need a write_lock() version - * of this. - */ -static int dn_hash_sock(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct hlist_head *list; - int rv = -EUSERS; - - BUG_ON(sk_hashed(sk)); - - write_lock_bh(&dn_hash_lock); - - if (!scp->addrloc && !port_alloc(sk)) - goto out; - - rv = -EADDRINUSE; - if ((list = dn_find_list(sk)) == NULL) - goto out; - - sk_add_node(sk, list); - rv = 0; -out: - write_unlock_bh(&dn_hash_lock); - return rv; -} - -static void dn_unhash_sock(struct sock *sk) -{ - write_lock(&dn_hash_lock); - sk_del_node_init(sk); - write_unlock(&dn_hash_lock); -} - -static void dn_unhash_sock_bh(struct sock *sk) -{ - write_lock_bh(&dn_hash_lock); - sk_del_node_init(sk); - write_unlock_bh(&dn_hash_lock); -} - -static struct hlist_head *listen_hash(struct sockaddr_dn *addr) -{ - int i; - unsigned int hash = addr->sdn_objnum; - - if (hash == 0) { - hash = addr->sdn_objnamel; - for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) { - hash ^= addr->sdn_objname[i]; - hash ^= (hash << 3); - } - } - - return &dn_sk_hash[hash & DN_SK_HASH_MASK]; -} - -/* - * Called to transform a socket from bound (i.e. with a local address) - * into a listening socket (doesn't need a local port number) and rehashes - * based upon the object name/number. - */ -static void dn_rehash_sock(struct sock *sk) -{ - struct hlist_head *list; - struct dn_scp *scp = DN_SK(sk); - - if (scp->addr.sdn_flags & SDF_WILD) - return; - - write_lock_bh(&dn_hash_lock); - sk_del_node_init(sk); - DN_SK(sk)->addrloc = 0; - list = listen_hash(&DN_SK(sk)->addr); - sk_add_node(sk, list); - write_unlock_bh(&dn_hash_lock); -} - -int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type) -{ - int len = 2; - - *buf++ = type; - - switch (type) { - case 0: - *buf++ = sdn->sdn_objnum; - break; - case 1: - *buf++ = 0; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 3 + le16_to_cpu(sdn->sdn_objnamel); - break; - case 2: - memset(buf, 0, 5); - buf += 5; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 7 + le16_to_cpu(sdn->sdn_objnamel); - break; - } - - return len; -} - -/* - * On reception of usernames, we handle types 1 and 0 for destination - * addresses only. Types 2 and 4 are used for source addresses, but the - * UIC, GIC are ignored and they are both treated the same way. Type 3 - * is never used as I've no idea what its purpose might be or what its - * format is. - */ -int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt) -{ - unsigned char type; - int size = len; - int namel = 12; - - sdn->sdn_objnum = 0; - sdn->sdn_objnamel = cpu_to_le16(0); - memset(sdn->sdn_objname, 0, DN_MAXOBJL); - - if (len < 2) - return -1; - - len -= 2; - *fmt = *data++; - type = *data++; - - switch (*fmt) { - case 0: - sdn->sdn_objnum = type; - return 2; - case 1: - namel = 16; - break; - case 2: - len -= 4; - data += 4; - break; - case 4: - len -= 8; - data += 8; - break; - default: - return -1; - } - - len -= 1; - - if (len < 0) - return -1; - - sdn->sdn_objnamel = cpu_to_le16(*data++); - len -= le16_to_cpu(sdn->sdn_objnamel); - - if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel)) - return -1; - - memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel)); - - return size - len; -} - -struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) -{ - struct hlist_head *list = listen_hash(addr); - struct sock *sk; - - read_lock(&dn_hash_lock); - sk_for_each(sk, list) { - struct dn_scp *scp = DN_SK(sk); - if (sk->sk_state != TCP_LISTEN) - continue; - if (scp->addr.sdn_objnum) { - if (scp->addr.sdn_objnum != addr->sdn_objnum) - continue; - } else { - if (addr->sdn_objnum) - continue; - if (scp->addr.sdn_objnamel != addr->sdn_objnamel) - continue; - if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0) - continue; - } - sock_hold(sk); - read_unlock(&dn_hash_lock); - return sk; - } - - sk = sk_head(&dn_wild_sk); - if (sk) { - if (sk->sk_state == TCP_LISTEN) - sock_hold(sk); - else - sk = NULL; - } - - read_unlock(&dn_hash_lock); - return sk; -} - -struct sock *dn_find_by_skb(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sock *sk; - struct dn_scp *scp; - - read_lock(&dn_hash_lock); - sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { - scp = DN_SK(sk); - if (cb->src != dn_saddr2dn(&scp->peer)) - continue; - if (cb->dst_port != scp->addrloc) - continue; - if (scp->addrrem && (cb->src_port != scp->addrrem)) - continue; - sock_hold(sk); - goto found; - } - sk = NULL; -found: - read_unlock(&dn_hash_lock); - return sk; -} - - - -static void dn_destruct(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - skb_queue_purge(&scp->data_xmit_queue); - skb_queue_purge(&scp->other_xmit_queue); - skb_queue_purge(&scp->other_receive_queue); - - dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); -} - -static unsigned long dn_memory_pressure; - -static void dn_enter_memory_pressure(struct sock *sk) -{ - if (!dn_memory_pressure) { - dn_memory_pressure = 1; - } -} - -static struct proto dn_proto = { - .name = "NSP", - .owner = THIS_MODULE, - .enter_memory_pressure = dn_enter_memory_pressure, - .memory_pressure = &dn_memory_pressure, - .memory_allocated = &decnet_memory_allocated, - .sysctl_mem = sysctl_decnet_mem, - .sysctl_wmem = sysctl_decnet_wmem, - .sysctl_rmem = sysctl_decnet_rmem, - .max_header = DN_MAX_NSP_DATA_HEADER + 64, - .obj_size = sizeof(struct dn_sock), -}; - -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) -{ - struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); - - if (!sk) - goto out; - - if (sock) - sock->ops = &dn_proto_ops; - sock_init_data(sock, sk); - - sk->sk_backlog_rcv = dn_nsp_backlog_rcv; - sk->sk_destruct = dn_destruct; - sk->sk_no_check_tx = 1; - sk->sk_family = PF_DECnet; - sk->sk_protocol = 0; - sk->sk_allocation = gfp; - sk->sk_sndbuf = sysctl_decnet_wmem[1]; - sk->sk_rcvbuf = sysctl_decnet_rmem[1]; - - /* Initialization of DECnet Session Control Port */ - scp = DN_SK(sk); - scp->state = DN_O; /* Open */ - scp->numdat = 1; /* Next data seg to tx */ - scp->numoth = 1; /* Next oth data to tx */ - scp->ackxmt_dat = 0; /* Last data seg ack'ed */ - scp->ackxmt_oth = 0; /* Last oth data ack'ed */ - scp->ackrcv_dat = 0; /* Highest data ack recv*/ - scp->ackrcv_oth = 0; /* Last oth data ack rec*/ - scp->flowrem_sw = DN_SEND; - scp->flowloc_sw = DN_SEND; - scp->flowrem_dat = 0; - scp->flowrem_oth = 1; - scp->flowloc_dat = 0; - scp->flowloc_oth = 1; - scp->services_rem = 0; - scp->services_loc = 1 | NSP_FC_NONE; - scp->info_rem = 0; - scp->info_loc = 0x03; /* NSP version 4.1 */ - scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */ - scp->nonagle = 0; - scp->multi_ireq = 1; - scp->accept_mode = ACC_IMMED; - scp->addr.sdn_family = AF_DECnet; - scp->peer.sdn_family = AF_DECnet; - scp->accessdata.acc_accl = 5; - memcpy(scp->accessdata.acc_acc, "LINUX", 5); - - scp->max_window = NSP_MAX_WINDOW; - scp->snd_window = NSP_MIN_WINDOW; - scp->nsp_srtt = NSP_INITIAL_SRTT; - scp->nsp_rttvar = NSP_INITIAL_RTTVAR; - scp->nsp_rxtshift = 0; - - skb_queue_head_init(&scp->data_xmit_queue); - skb_queue_head_init(&scp->other_xmit_queue); - skb_queue_head_init(&scp->other_receive_queue); - - scp->persist = 0; - scp->persist_fxn = NULL; - scp->keepalive = 10 * HZ; - scp->keepalive_fxn = dn_keepalive; - - dn_start_slow_timer(sk); -out: - return sk; -} - -/* - * Keepalive timer. - * FIXME: Should respond to SO_KEEPALIVE etc. - */ -static void dn_keepalive(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - /* - * By checking the other_data transmit queue is empty - * we are double checking that we are not sending too - * many of these keepalive frames. - */ - if (skb_queue_empty(&scp->other_xmit_queue)) - dn_nsp_send_link(sk, DN_NOCHANGE, 0); -} - - -/* - * Timer for shutdown/destroyed sockets. - * When socket is dead & no packets have been sent for a - * certain amount of time, they are removed by this - * routine. Also takes care of sending out DI & DC - * frames at correct times. - */ -int dn_destroy_timer(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - scp->persist = dn_nsp_persist(sk); - - switch (scp->state) { - case DN_DI: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_di_count) - scp->state = DN_CN; - return 0; - - case DN_DR: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_dr_count) - scp->state = DN_DRC; - return 0; - - case DN_DN: - if (scp->nsp_rxtshift < decnet_dn_count) { - /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - GFP_ATOMIC); - return 0; - } - } - - scp->persist = (HZ * decnet_time_wait); - - if (sk->sk_socket) - return 0; - - if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) { - dn_unhash_sock(sk); - sock_put(sk); - return 1; - } - - return 0; -} - -static void dn_destroy_sock(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - scp->nsp_rxtshift = 0; /* reset back off */ - - if (sk->sk_socket) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - } - - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_DN: - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - sk->sk_allocation); - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - case DN_CR: - scp->state = DN_DR; - goto disc_reject; - case DN_RUN: - scp->state = DN_DI; - /* fall through */ - case DN_DI: - case DN_DR: -disc_reject: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); - /* fall through */ - case DN_NC: - case DN_NR: - case DN_RJ: - case DN_DIC: - case DN_CN: - case DN_DRC: - case DN_CI: - case DN_CD: - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - default: - printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); - /* fall through */ - case DN_O: - dn_stop_slow_timer(sk); - - dn_unhash_sock_bh(sk); - sock_put(sk); - - break; - } -} - -char *dn_addr2asc(__u16 addr, char *buf) -{ - unsigned short node, area; - - node = addr & 0x03ff; - area = addr >> 10; - sprintf(buf, "%hd.%hd", area, node); - - return buf; -} - - - -static int dn_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - if (protocol < 0 || protocol > SK_PROTOCOL_MAX) - return -EINVAL; - - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - - switch (sock->type) { - case SOCK_SEQPACKET: - if (protocol != DNPROTO_NSP) - return -EPROTONOSUPPORT; - break; - case SOCK_STREAM: - break; - default: - return -ESOCKTNOSUPPORT; - } - - - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) - return -ENOBUFS; - - sk->sk_protocol = protocol; - - return 0; -} - - -static int -dn_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk) { - sock_orphan(sk); - sock_hold(sk); - lock_sock(sk); - dn_destroy_sock(sk); - release_sock(sk); - sock_put(sk); - } - - return 0; -} - -static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr; - struct net_device *dev, *ldev; - int rv; - - if (addr_len != sizeof(struct sockaddr_dn)) - return -EINVAL; - - if (saddr->sdn_family != AF_DECnet) - return -EINVAL; - - if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2)) - return -EINVAL; - - if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL) - return -EINVAL; - - if (saddr->sdn_flags & ~SDF_WILD) - return -EINVAL; - - if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum || - (saddr->sdn_flags & SDF_WILD))) - return -EACCES; - - if (!(saddr->sdn_flags & SDF_WILD)) { - if (le16_to_cpu(saddr->sdn_nodeaddrl)) { - rcu_read_lock(); - ldev = NULL; - for_each_netdev_rcu(&init_net, dev) { - if (!dev->dn_ptr) - continue; - if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { - ldev = dev; - break; - } - } - rcu_read_unlock(); - if (ldev == NULL) - return -EADDRNOTAVAIL; - } - } - - rv = -EINVAL; - lock_sock(sk); - if (sock_flag(sk, SOCK_ZAPPED)) { - memcpy(&scp->addr, saddr, addr_len); - sock_reset_flag(sk, SOCK_ZAPPED); - - rv = dn_hash_sock(sk); - if (rv) - sock_set_flag(sk, SOCK_ZAPPED); - } - release_sock(sk); - - return rv; -} - - -static int dn_auto_bind(struct socket *sock) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int rv; - - sock_reset_flag(sk, SOCK_ZAPPED); - - scp->addr.sdn_flags = 0; - scp->addr.sdn_objnum = 0; - - /* - * This stuff is to keep compatibility with Eduardo's - * patch. I hope I can dispense with it shortly... - */ - if ((scp->accessdata.acc_accl != 0) && - (scp->accessdata.acc_accl <= 12)) { - - scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl); - memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel)); - - scp->accessdata.acc_accl = 0; - memset(scp->accessdata.acc_acc, 0, 40); - } - /* End of compatibility stuff */ - - scp->addr.sdn_add.a_len = cpu_to_le16(2); - rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr); - if (rv == 0) { - rv = dn_hash_sock(sk); - if (rv) - sock_set_flag(sk, SOCK_ZAPPED); - } - - return rv; -} - -static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) -{ - struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err; - - if (scp->state != DN_CR) - return -EINVAL; - - scp->state = DN_CC; - scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); - dn_send_conn_conf(sk, allocation); - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - if (scp->state == DN_CC) - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - lock_sock(sk); - err = 0; - if (scp->state == DN_RUN) - break; - err = sock_error(sk); - if (err) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); - if (err == 0) { - sk->sk_socket->state = SS_CONNECTED; - } else if (scp->state != DN_CC) { - sk->sk_socket->state = SS_UNCONNECTED; - } - return err; -} - -static int dn_wait_run(struct sock *sk, long *timeo) -{ - struct dn_scp *scp = DN_SK(sk); - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; - - if (scp->state == DN_RUN) - goto out; - - if (!*timeo) - return -EALREADY; - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - if (scp->state == DN_CI || scp->state == DN_CC) - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - lock_sock(sk); - err = 0; - if (scp->state == DN_RUN) - break; - err = sock_error(sk); - if (err) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -ETIMEDOUT; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); -out: - if (err == 0) { - sk->sk_socket->state = SS_CONNECTED; - } else if (scp->state != DN_CI && scp->state != DN_CC) { - sk->sk_socket->state = SS_UNCONNECTED; - } - return err; -} - -static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags) -{ - struct socket *sock = sk->sk_socket; - struct dn_scp *scp = DN_SK(sk); - int err = -EISCONN; - struct flowidn fld; - struct dst_entry *dst; - - if (sock->state == SS_CONNECTED) - goto out; - - if (sock->state == SS_CONNECTING) { - err = 0; - if (scp->state == DN_RUN) { - sock->state = SS_CONNECTED; - goto out; - } - err = -ECONNREFUSED; - if (scp->state != DN_CI && scp->state != DN_CC) { - sock->state = SS_UNCONNECTED; - goto out; - } - return dn_wait_run(sk, timeo); - } - - err = -EINVAL; - if (scp->state != DN_O) - goto out; - - if (addr == NULL || addrlen != sizeof(struct sockaddr_dn)) - goto out; - if (addr->sdn_family != AF_DECnet) - goto out; - if (addr->sdn_flags & SDF_WILD) - goto out; - - if (sock_flag(sk, SOCK_ZAPPED)) { - err = dn_auto_bind(sk->sk_socket); - if (err) - goto out; - } - - memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); - - err = -EHOSTUNREACH; - memset(&fld, 0, sizeof(fld)); - fld.flowidn_oif = sk->sk_bound_dev_if; - fld.daddr = dn_saddr2dn(&scp->peer); - fld.saddr = dn_saddr2dn(&scp->addr); - dn_sk_ports_copy(&fld, scp); - fld.flowidn_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) - goto out; - dst = __sk_dst_get(sk); - sk->sk_route_caps = dst->dev->features; - sock->state = SS_CONNECTING; - scp->state = DN_CI; - scp->segsize_loc = dst_metric_advmss(dst); - - dn_nsp_send_conninit(sk, NSP_CI); - err = -EINPROGRESS; - if (*timeo) { - err = dn_wait_run(sk, timeo); - } -out: - return err; -} - -static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags) -{ - struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr; - struct sock *sk = sock->sk; - int err; - long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - - lock_sock(sk); - err = __dn_connect(sk, addr, addrlen, &timeo, 0); - release_sock(sk); - - return err; -} - -static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags) -{ - struct dn_scp *scp = DN_SK(sk); - - switch (scp->state) { - case DN_RUN: - return 0; - case DN_CR: - return dn_confirm_accept(sk, timeo, sk->sk_allocation); - case DN_CI: - case DN_CC: - return dn_wait_run(sk, timeo); - case DN_O: - return __dn_connect(sk, addr, addrlen, timeo, flags); - } - - return -EINVAL; -} - - -static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc) -{ - unsigned char *ptr = skb->data; - - acc->acc_userl = *ptr++; - memcpy(&acc->acc_user, ptr, acc->acc_userl); - ptr += acc->acc_userl; - - acc->acc_passl = *ptr++; - memcpy(&acc->acc_pass, ptr, acc->acc_passl); - ptr += acc->acc_passl; - - acc->acc_accl = *ptr++; - memcpy(&acc->acc_acc, ptr, acc->acc_accl); - - skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3); - -} - -static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) -{ - unsigned char *ptr = skb->data; - u16 len = *ptr++; /* yes, it's 8bit on the wire */ - - BUG_ON(len > 16); /* we've checked the contents earlier */ - opt->opt_optl = cpu_to_le16(len); - opt->opt_status = 0; - memcpy(opt->opt_data, ptr, len); - skb_pull(skb, len + 1); -} - -static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - struct sk_buff *skb = NULL; - int err = 0; - - add_wait_queue(sk_sleep(sk), &wait); - for(;;) { - release_sock(sk); - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) { - *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo); - skb = skb_dequeue(&sk->sk_receive_queue); - } - lock_sock(sk); - if (skb != NULL) - break; - err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - break; - err = sock_intr_errno(*timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!*timeo) - break; - } - remove_wait_queue(sk_sleep(sk), &wait); - - return skb == NULL ? ERR_PTR(err) : skb; -} - -static int dn_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) -{ - struct sock *sk = sock->sk, *newsk; - struct sk_buff *skb = NULL; - struct dn_skb_cb *cb; - unsigned char menuver; - int err = 0; - unsigned char type; - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - struct dst_entry *dst; - - lock_sock(sk); - - if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) { - release_sock(sk); - return -EINVAL; - } - - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) { - skb = dn_wait_for_connect(sk, &timeo); - if (IS_ERR(skb)) { - release_sock(sk); - return PTR_ERR(skb); - } - } - - cb = DN_SKB_CB(skb); - sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); - if (newsk == NULL) { - release_sock(sk); - kfree_skb(skb); - return -ENOBUFS; - } - release_sock(sk); - - dst = skb_dst(skb); - sk_dst_set(newsk, dst); - skb_dst_set(skb, NULL); - - DN_SK(newsk)->state = DN_CR; - DN_SK(newsk)->addrrem = cb->src_port; - DN_SK(newsk)->services_rem = cb->services; - DN_SK(newsk)->info_rem = cb->info; - DN_SK(newsk)->segsize_rem = cb->segsize; - DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode; - - if (DN_SK(newsk)->segsize_rem < 230) - DN_SK(newsk)->segsize_rem = 230; - - if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE) - DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd; - - newsk->sk_state = TCP_LISTEN; - memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn)); - - /* - * If we are listening on a wild socket, we don't want - * the newly created socket on the wrong hash queue. - */ - DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD; - - skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type)); - skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type)); - *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src; - *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst; - - menuver = *skb->data; - skb_pull(skb, 1); - - if (menuver & DN_MENUVER_ACC) - dn_access_copy(skb, &(DN_SK(newsk)->accessdata)); - - if (menuver & DN_MENUVER_USR) - dn_user_copy(skb, &(DN_SK(newsk)->conndata_in)); - - if (menuver & DN_MENUVER_PRX) - DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY; - - if (menuver & DN_MENUVER_UIC) - DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY; - - kfree_skb(skb); - - memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out), - sizeof(struct optdata_dn)); - memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out), - sizeof(struct optdata_dn)); - - lock_sock(newsk); - err = dn_hash_sock(newsk); - if (err == 0) { - sock_reset_flag(newsk, SOCK_ZAPPED); - dn_send_conn_ack(newsk); - - /* - * Here we use sk->sk_allocation since although the conn conf is - * for the newsk, the context is the old socket. - */ - if (DN_SK(newsk)->accept_mode == ACC_IMMED) - err = dn_confirm_accept(newsk, &timeo, - sk->sk_allocation); - } - release_sock(newsk); - return err; -} - - -static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) -{ - struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr; - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - - lock_sock(sk); - - if (peer) { - if ((sock->state != SS_CONNECTED && - sock->state != SS_CONNECTING) && - scp->accept_mode == ACC_IMMED) { - release_sock(sk); - return -ENOTCONN; - } - - memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn)); - } else { - memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn)); - } - - release_sock(sk); - - return sizeof(struct sockaddr_dn); -} - - -static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - __poll_t mask = datagram_poll(file, sock, wait); - - if (!skb_queue_empty_lockless(&scp->other_receive_queue)) - mask |= EPOLLRDBAND; - - return mask; -} - -static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int err = -EOPNOTSUPP; - long amount = 0; - struct sk_buff *skb; - int val; - - switch(cmd) - { - case SIOCGIFADDR: - case SIOCSIFADDR: - return dn_dev_ioctl(cmd, (void __user *)arg); - - case SIOCATMARK: - lock_sock(sk); - val = !skb_queue_empty(&scp->other_receive_queue); - if (scp->state != DN_RUN) - val = -ENOTCONN; - release_sock(sk); - return val; - - case TIOCOUTQ: - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - err = put_user(amount, (int __user *)arg); - break; - - case TIOCINQ: - lock_sock(sk); - skb = skb_peek(&scp->other_receive_queue); - if (skb) { - amount = skb->len; - } else { - skb_queue_walk(&sk->sk_receive_queue, skb) - amount += skb->len; - } - release_sock(sk); - err = put_user(amount, (int __user *)arg); - break; - - default: - err = -ENOIOCTLCMD; - break; - } - - return err; -} - -static int dn_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = -EINVAL; - - lock_sock(sk); - - if (sock_flag(sk, SOCK_ZAPPED)) - goto out; - - if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN)) - goto out; - - sk->sk_max_ack_backlog = backlog; - sk->sk_ack_backlog = 0; - sk->sk_state = TCP_LISTEN; - err = 0; - dn_rehash_sock(sk); - -out: - release_sock(sk); - - return err; -} - - -static int dn_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - int err = -ENOTCONN; - - lock_sock(sk); - - if (sock->state == SS_UNCONNECTED) - goto out; - - err = 0; - if (sock->state == SS_DISCONNECTING) - goto out; - - err = -EINVAL; - if (scp->state == DN_O) - goto out; - - if (how != SHUT_RDWR) - goto out; - - sk->sk_shutdown = SHUTDOWN_MASK; - dn_destroy_sock(sk); - err = 0; - -out: - release_sock(sk); - - return err; -} - -static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - int err; - - lock_sock(sk); - err = __dn_setsockopt(sock, level, optname, optval, optlen, 0); - release_sock(sk); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != DSO_LINKINFO && - optname != DSO_STREAM && optname != DSO_SEQPACKET) - err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); -#endif - - return err; -} - -static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - long timeo; - union { - struct optdata_dn opt; - struct accessdata_dn acc; - int mode; - unsigned long win; - int val; - unsigned char services; - unsigned char info; - } u; - int err; - - if (optlen && !optval) - return -EINVAL; - - if (optlen > sizeof(u)) - return -EINVAL; - - if (copy_from_user(&u, optval, optlen)) - return -EFAULT; - - switch (optname) { - case DSO_CONDATA: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if ((scp->state != DN_O) && (scp->state != DN_CR)) - return -EINVAL; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; - - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; - - memcpy(&scp->conndata_out, &u.opt, optlen); - break; - - case DSO_DISDATA: - if (sock->state != SS_CONNECTED && - scp->accept_mode == ACC_IMMED) - return -ENOTCONN; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; - - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; - - memcpy(&scp->discdata_out, &u.opt, optlen); - break; - - case DSO_CONACCESS: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; - - if (optlen != sizeof(struct accessdata_dn)) - return -EINVAL; - - if ((u.acc.acc_accl > DN_MAXACCL) || - (u.acc.acc_passl > DN_MAXACCL) || - (u.acc.acc_userl > DN_MAXACCL)) - return -EINVAL; - - memcpy(&scp->accessdata, &u.acc, optlen); - break; - - case DSO_ACCEPTMODE: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; - - if (optlen != sizeof(int)) - return -EINVAL; - - if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) - return -EINVAL; - - scp->accept_mode = (unsigned char)u.mode; - break; - - case DSO_CONACCEPT: - if (scp->state != DN_CR) - return -EINVAL; - timeo = sock_rcvtimeo(sk, 0); - err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); - return err; - - case DSO_CONREJECT: - if (scp->state != DN_CR) - return -EINVAL; - - scp->state = DN_DR; - sk->sk_shutdown = SHUTDOWN_MASK; - dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); - break; - - case DSO_MAXWINDOW: - if (optlen != sizeof(unsigned long)) - return -EINVAL; - if (u.win > NSP_MAX_WINDOW) - u.win = NSP_MAX_WINDOW; - if (u.win == 0) - return -EINVAL; - scp->max_window = u.win; - if (scp->snd_window > u.win) - scp->snd_window = u.win; - break; - - case DSO_NODELAY: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == TCP_NAGLE_CORK) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF; - /* if (scp->nonagle == 1) { Push pending frames } */ - break; - - case DSO_CORK: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == TCP_NAGLE_OFF) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK; - /* if (scp->nonagle == 0) { Push pending frames } */ - break; - - case DSO_SERVICES: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if ((u.services & ~NSP_FC_MASK) != 0x01) - return -EINVAL; - if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) - return -EINVAL; - scp->services_loc = u.services; - break; - - case DSO_INFO: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if (u.info & 0xfc) - return -EINVAL; - scp->info_loc = u.info; - break; - - case DSO_LINKINFO: - case DSO_STREAM: - case DSO_SEQPACKET: - default: - return -ENOPROTOOPT; - } - - return 0; -} - -static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - int err; - - lock_sock(sk); - err = __dn_getsockopt(sock, level, optname, optval, optlen, 0); - release_sock(sk); -#ifdef CONFIG_NETFILTER - if (err == -ENOPROTOOPT && optname != DSO_STREAM && - optname != DSO_SEQPACKET && optname != DSO_CONACCEPT && - optname != DSO_CONREJECT) { - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); - if (err >= 0) - err = put_user(len, optlen); - } -#endif - - return err; -} - -static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct linkinfo_dn link; - unsigned int r_len; - void *r_data = NULL; - unsigned int val; - - if(get_user(r_len , optlen)) - return -EFAULT; - - switch (optname) { - case DSO_CONDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->conndata_in; - break; - - case DSO_DISDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->discdata_in; - break; - - case DSO_CONACCESS: - if (r_len > sizeof(struct accessdata_dn)) - r_len = sizeof(struct accessdata_dn); - r_data = &scp->accessdata; - break; - - case DSO_ACCEPTMODE: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->accept_mode; - break; - - case DSO_LINKINFO: - if (r_len > sizeof(struct linkinfo_dn)) - r_len = sizeof(struct linkinfo_dn); - - memset(&link, 0, sizeof(link)); - - switch (sock->state) { - case SS_CONNECTING: - link.idn_linkstate = LL_CONNECTING; - break; - case SS_DISCONNECTING: - link.idn_linkstate = LL_DISCONNECTING; - break; - case SS_CONNECTED: - link.idn_linkstate = LL_RUNNING; - break; - default: - link.idn_linkstate = LL_INACTIVE; - } - - link.idn_segsize = scp->segsize_rem; - r_data = &link; - break; - - case DSO_MAXWINDOW: - if (r_len > sizeof(unsigned long)) - r_len = sizeof(unsigned long); - r_data = &scp->max_window; - break; - - case DSO_NODELAY: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == TCP_NAGLE_OFF); - r_data = &val; - break; - - case DSO_CORK: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == TCP_NAGLE_CORK); - r_data = &val; - break; - - case DSO_SERVICES: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->services_rem; - break; - - case DSO_INFO: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->info_rem; - break; - - case DSO_STREAM: - case DSO_SEQPACKET: - case DSO_CONACCEPT: - case DSO_CONREJECT: - default: - return -ENOPROTOOPT; - } - - if (r_data) { - if (copy_to_user(optval, r_data, r_len)) - return -EFAULT; - if (put_user(r_len, optlen)) - return -EFAULT; - } - - return 0; -} - - -static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target) -{ - struct sk_buff *skb; - int len = 0; - - if (flags & MSG_OOB) - return !skb_queue_empty(q) ? 1 : 0; - - skb_queue_walk(q, skb) { - struct dn_skb_cb *cb = DN_SKB_CB(skb); - len += skb->len; - - if (cb->nsp_flags & 0x40) { - /* SOCK_SEQPACKET reads to EOM */ - if (sk->sk_type == SOCK_SEQPACKET) - return 1; - /* so does SOCK_STREAM unless WAITALL is specified */ - if (!(flags & MSG_WAITALL)) - return 1; - } - - /* minimum data length for read exceeded */ - if (len >= target) - return 1; - } - - return 0; -} - - -static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, - int flags) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - struct sk_buff_head *queue = &sk->sk_receive_queue; - size_t target = size > 1 ? 1 : 0; - size_t copied = 0; - int rv = 0; - struct sk_buff *skb, *n; - struct dn_skb_cb *cb = NULL; - unsigned char eor = 0; - long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - if (sock_flag(sk, SOCK_ZAPPED)) { - rv = -EADDRNOTAVAIL; - goto out; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN) { - rv = 0; - goto out; - } - - rv = dn_check_state(sk, NULL, 0, &timeo, flags); - if (rv) - goto out; - - if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) { - rv = -EOPNOTSUPP; - goto out; - } - - if (flags & MSG_OOB) - queue = &scp->other_receive_queue; - - if (flags & MSG_WAITALL) - target = size; - - - /* - * See if there is data ready to read, sleep if there isn't - */ - for(;;) { - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - if (sk->sk_err) - goto out; - - if (!skb_queue_empty(&scp->other_receive_queue)) { - if (!(flags & MSG_OOB)) { - msg->msg_flags |= MSG_OOB; - if (!scp->other_report) { - scp->other_report = 1; - goto out; - } - } - } - - if (scp->state != DN_RUN) - goto out; - - if (signal_pending(current)) { - rv = sock_intr_errno(timeo); - goto out; - } - - if (dn_data_ready(sk, queue, flags, target)) - break; - - if (flags & MSG_DONTWAIT) { - rv = -EWOULDBLOCK; - goto out; - } - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - } - - skb_queue_walk_safe(queue, skb, n) { - unsigned int chunk = skb->len; - cb = DN_SKB_CB(skb); - - if ((chunk + copied) > size) - chunk = size - copied; - - if (memcpy_to_msg(msg, skb->data, chunk)) { - rv = -EFAULT; - break; - } - copied += chunk; - - if (!(flags & MSG_PEEK)) - skb_pull(skb, chunk); - - eor = cb->nsp_flags & 0x40; - - if (skb->len == 0) { - skb_unlink(skb, queue); - kfree_skb(skb); - /* - * N.B. Don't refer to skb or cb after this point - * in loop. - */ - if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) { - scp->flowloc_sw = DN_SEND; - dn_nsp_send_link(sk, DN_SEND, 0); - } - } - - if (eor) { - if (sk->sk_type == SOCK_SEQPACKET) - break; - if (!(flags & MSG_WAITALL)) - break; - } - - if (flags & MSG_OOB) - break; - - if (copied >= target) - break; - } - - rv = copied; - - - if (eor && (sk->sk_type == SOCK_SEQPACKET)) - msg->msg_flags |= MSG_EOR; - -out: - if (rv == 0) - rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk); - - if ((rv >= 0) && msg->msg_name) { - __sockaddr_check_size(sizeof(struct sockaddr_dn)); - memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn)); - msg->msg_namelen = sizeof(struct sockaddr_dn); - } - - release_sock(sk); - - return rv; -} - - -static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags) -{ - unsigned char fctype = scp->services_rem & NSP_FC_MASK; - if (skb_queue_len(queue) >= scp->snd_window) - return 1; - if (fctype != NSP_FC_NONE) { - if (flags & MSG_OOB) { - if (scp->flowrem_oth == 0) - return 1; - } else { - if (scp->flowrem_dat == 0) - return 1; - } - } - return 0; -} - -/* - * The DECnet spec requires that the "routing layer" accepts packets which - * are at least 230 bytes in size. This excludes any headers which the NSP - * layer might add, so we always assume that we'll be using the maximal - * length header on data packets. The variation in length is due to the - * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't - * make much practical difference. - */ -unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu) -{ - unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER; - if (dev) { - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - mtu -= LL_RESERVED_SPACE(dev); - if (dn_db->use_long) - mtu -= 21; - else - mtu -= 6; - mtu -= DN_MAX_NSP_DATA_HEADER; - } else { - /* - * 21 = long header, 16 = guess at MAC header length - */ - mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16); - } - if (mtu > mss) - mss = mtu; - return mss; -} - -static inline unsigned int dn_current_mss(struct sock *sk, int flags) -{ - struct dst_entry *dst = __sk_dst_get(sk); - struct dn_scp *scp = DN_SK(sk); - int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem); - - /* Other data messages are limited to 16 bytes per packet */ - if (flags & MSG_OOB) - return 16; - - /* This works out the maximum size of segment we can send out */ - if (dst) { - u32 mtu = dst_mtu(dst); - mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now); - } - - return mss_now; -} - -/* - * N.B. We get the timeout wrong here, but then we always did get it - * wrong before and this is another step along the road to correcting - * it. It ought to get updated each time we pass through the routine, - * but in practise it probably doesn't matter too much for now. - */ -static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, - unsigned long datalen, int noblock, - int *errcode) -{ - struct sk_buff *skb = sock_alloc_send_skb(sk, datalen, - noblock, errcode); - if (skb) { - skb->protocol = htons(ETH_P_DNA_RT); - skb->pkt_type = PACKET_OUTGOING; - } - return skb; -} - -static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - struct sock *sk = sock->sk; - struct dn_scp *scp = DN_SK(sk); - size_t mss; - struct sk_buff_head *queue = &scp->data_xmit_queue; - int flags = msg->msg_flags; - int err = 0; - size_t sent = 0; - int addr_len = msg->msg_namelen; - DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name); - struct sk_buff *skb = NULL; - struct dn_skb_cb *cb; - size_t len; - unsigned char fctype; - long timeo; - - if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT)) - return -EOPNOTSUPP; - - if (addr_len && (addr_len != sizeof(struct sockaddr_dn))) - return -EINVAL; - - lock_sock(sk); - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* - * The only difference between stream sockets and sequenced packet - * sockets is that the stream sockets always behave as if MSG_EOR - * has been set. - */ - if (sock->type == SOCK_STREAM) { - if (flags & MSG_EOR) { - err = -EINVAL; - goto out; - } - flags |= MSG_EOR; - } - - - err = dn_check_state(sk, addr, addr_len, &timeo, flags); - if (err) - goto out_err; - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - err = -EPIPE; - if (!(flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); - goto out_err; - } - - if ((flags & MSG_TRYHARD) && sk->sk_dst_cache) - dst_negative_advice(sk); - - mss = scp->segsize_rem; - fctype = scp->services_rem & NSP_FC_MASK; - - mss = dn_current_mss(sk, flags); - - if (flags & MSG_OOB) { - queue = &scp->other_xmit_queue; - if (size > mss) { - err = -EMSGSIZE; - goto out; - } - } - - scp->persist_fxn = dn_nsp_xmit_timeout; - - while(sent < size) { - err = sock_error(sk); - if (err) - goto out; - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - goto out; - } - - /* - * Calculate size that we wish to send. - */ - len = size - sent; - - if (len > mss) - len = mss; - - /* - * Wait for queue size to go down below the window - * size. - */ - if (dn_queue_too_long(scp, queue, flags)) { - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - if (flags & MSG_DONTWAIT) { - err = -EWOULDBLOCK; - goto out; - } - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - !dn_queue_too_long(scp, queue, flags), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - continue; - } - - /* - * Get a suitably sized skb. - * 64 is a bit of a hack really, but its larger than any - * link-layer headers and has served us well as a good - * guess as to their real length. - */ - skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER, - flags & MSG_DONTWAIT, &err); - - if (err) - break; - - if (!skb) - continue; - - cb = DN_SKB_CB(skb); - - skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER); - - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - err = -EFAULT; - goto out; - } - - if (flags & MSG_OOB) { - cb->nsp_flags = 0x30; - if (fctype != NSP_FC_NONE) - scp->flowrem_oth--; - } else { - cb->nsp_flags = 0x00; - if (scp->seg_total == 0) - cb->nsp_flags |= 0x20; - - scp->seg_total += len; - - if (((sent + len) == size) && (flags & MSG_EOR)) { - cb->nsp_flags |= 0x40; - scp->seg_total = 0; - if (fctype == NSP_FC_SCMC) - scp->flowrem_dat--; - } - if (fctype == NSP_FC_SRC) - scp->flowrem_dat--; - } - - sent += len; - dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB); - skb = NULL; - - scp->persist = dn_nsp_persist(sk); - - } -out: - - kfree_skb(skb); - - release_sock(sk); - - return sent ? sent : err; - -out_err: - err = sk_stream_error(sk, flags, err); - release_sock(sk); - return err; -} - -static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - dn_dev_up(dev); - break; - case NETDEV_DOWN: - dn_dev_down(dev); - break; - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block dn_dev_notifier = { - .notifier_call = dn_device_event, -}; - -static struct packet_type dn_dix_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_DNA_RT), - .func = dn_route_rcv, -}; - -#ifdef CONFIG_PROC_FS -struct dn_iter_state { - int bucket; -}; - -static struct sock *dn_socket_get_first(struct seq_file *seq) -{ - struct dn_iter_state *state = seq->private; - struct sock *n = NULL; - - for(state->bucket = 0; - state->bucket < DN_SK_HASH_SIZE; - ++state->bucket) { - n = sk_head(&dn_sk_hash[state->bucket]); - if (n) - break; - } - - return n; -} - -static struct sock *dn_socket_get_next(struct seq_file *seq, - struct sock *n) -{ - struct dn_iter_state *state = seq->private; - - n = sk_next(n); -try_again: - if (n) - goto out; - if (++state->bucket >= DN_SK_HASH_SIZE) - goto out; - n = sk_head(&dn_sk_hash[state->bucket]); - goto try_again; -out: - return n; -} - -static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos) -{ - struct sock *sk = dn_socket_get_first(seq); - - if (sk) { - while(*pos && (sk = dn_socket_get_next(seq, sk))) - --*pos; - } - return *pos ? NULL : sk; -} - -static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc; - read_lock_bh(&dn_hash_lock); - rc = socket_get_idx(seq, &pos); - if (!rc) { - read_unlock_bh(&dn_hash_lock); - } - return rc; -} - -static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos) -{ - return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - void *rc; - - if (v == SEQ_START_TOKEN) { - rc = dn_socket_get_idx(seq, 0); - goto out; - } - - rc = dn_socket_get_next(seq, v); - if (rc) - goto out; - read_unlock_bh(&dn_hash_lock); -out: - ++*pos; - return rc; -} - -static void dn_socket_seq_stop(struct seq_file *seq, void *v) -{ - if (v && v != SEQ_START_TOKEN) - read_unlock_bh(&dn_hash_lock); -} - -#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126) - -static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) -{ - int i; - - switch (le16_to_cpu(dn->sdn_objnamel)) { - case 0: - sprintf(buf, "%d", dn->sdn_objnum); - break; - default: - for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { - buf[i] = dn->sdn_objname[i]; - if (IS_NOT_PRINTABLE(buf[i])) - buf[i] = '.'; - } - buf[i] = 0; - } -} - -static char *dn_state2asc(unsigned char state) -{ - switch (state) { - case DN_O: - return "OPEN"; - case DN_CR: - return " CR"; - case DN_DR: - return " DR"; - case DN_DRC: - return " DRC"; - case DN_CC: - return " CC"; - case DN_CI: - return " CI"; - case DN_NR: - return " NR"; - case DN_NC: - return " NC"; - case DN_CD: - return " CD"; - case DN_RJ: - return " RJ"; - case DN_RUN: - return " RUN"; - case DN_DI: - return " DI"; - case DN_DIC: - return " DIC"; - case DN_DN: - return " DN"; - case DN_CL: - return " CL"; - case DN_CN: - return " CN"; - } - - return "????"; -} - -static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - char buf1[DN_ASCBUF_LEN]; - char buf2[DN_ASCBUF_LEN]; - char local_object[DN_MAXOBJL+3]; - char remote_object[DN_MAXOBJL+3]; - - dn_printable_object(&scp->addr, local_object); - dn_printable_object(&scp->peer, remote_object); - - seq_printf(seq, - "%6s/%04X %04d:%04d %04d:%04d %01d %-16s " - "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n", - dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1), - scp->addrloc, - scp->numdat, - scp->numoth, - scp->ackxmt_dat, - scp->ackxmt_oth, - scp->flowloc_sw, - local_object, - dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2), - scp->addrrem, - scp->numdat_rcv, - scp->numoth_rcv, - scp->ackrcv_dat, - scp->ackrcv_oth, - scp->flowrem_sw, - remote_object, - dn_state2asc(scp->state), - ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER")); -} - -static int dn_socket_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) { - seq_puts(seq, "Local Remote\n"); - } else { - dn_socket_format_entry(seq, v); - } - return 0; -} - -static const struct seq_operations dn_socket_seq_ops = { - .start = dn_socket_seq_start, - .next = dn_socket_seq_next, - .stop = dn_socket_seq_stop, - .show = dn_socket_seq_show, -}; -#endif - -static const struct net_proto_family dn_family_ops = { - .family = AF_DECnet, - .create = dn_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops dn_proto_ops = { - .family = AF_DECnet, - .owner = THIS_MODULE, - .release = dn_release, - .bind = dn_bind, - .connect = dn_connect, - .socketpair = sock_no_socketpair, - .accept = dn_accept, - .getname = dn_getname, - .poll = dn_poll, - .ioctl = dn_ioctl, - .listen = dn_listen, - .shutdown = dn_shutdown, - .setsockopt = dn_setsockopt, - .getsockopt = dn_getsockopt, - .sendmsg = dn_sendmsg, - .recvmsg = dn_recvmsg, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -MODULE_DESCRIPTION("The Linux DECnet Network Protocol"); -MODULE_AUTHOR("Linux DECnet Project Team"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_DECnet); - -static const char banner[] __initconst = KERN_INFO -"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; - -static int __init decnet_init(void) -{ - int rc; - - printk(banner); - - rc = proto_register(&dn_proto, 1); - if (rc != 0) - goto out; - - dn_neigh_init(); - dn_dev_init(); - dn_route_init(); - dn_fib_init(); - - sock_register(&dn_family_ops); - dev_add_pack(&dn_dix_packet_type); - register_netdevice_notifier(&dn_dev_notifier); - - proc_create_seq_private("decnet", 0444, init_net.proc_net, - &dn_socket_seq_ops, sizeof(struct dn_iter_state), - NULL); - dn_register_sysctl(); -out: - return rc; - -} -module_init(decnet_init); - -/* - * Prevent DECnet module unloading until its fixed properly. - * Requires an audit of the code to check for memory leaks and - * initialisation problems etc. - */ -#if 0 -static void __exit decnet_exit(void) -{ - sock_unregister(AF_DECnet); - rtnl_unregister_all(PF_DECnet); - dev_remove_pack(&dn_dix_packet_type); - - dn_unregister_sysctl(); - - unregister_netdevice_notifier(&dn_dev_notifier); - - dn_route_cleanup(); - dn_dev_cleanup(); - dn_neigh_cleanup(); - dn_fib_cleanup(); - - remove_proc_entry("decnet", init_net.proc_net); - - proto_unregister(&dn_proto); - - rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ -} -module_exit(decnet_exit); -#endif diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c deleted file mode 100644 index 3235540f6adf..000000000000 --- a/net/decnet/dn_dev.c +++ /dev/null @@ -1,1438 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Device Layer - * - * Authors: Steve Whitehouse <SteveW@ACM.org> - * Eduardo Marcelo Serrat <emserrat@geocities.com> - * - * Changes: - * Steve Whitehouse : Devices now see incoming frames so they - * can mark on who it came from. - * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour - * can now have a device specific setup func. - * Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/ - * Steve Whitehouse : Fixed bug which sometimes killed timer - * Steve Whitehouse : Multiple ifaddr support - * Steve Whitehouse : SIOCGIFCONF is now a compile time option - * Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding - * Steve Whitehouse : Removed timer1 - it's a user space issue now - * Patrick Caulfield : Fixed router hello message format - * Steve Whitehouse : Got rid of constant sizes for blksize for - * devices. All mtu based now. - */ - -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/if_addr.h> -#include <linux/if_arp.h> -#include <linux/if_ether.h> -#include <linux/skbuff.h> -#include <linux/sysctl.h> -#include <linux/notifier.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/uaccess.h> -#include <net/net_namespace.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/fib_rules.h> -#include <net/netlink.h> -#include <net/dn.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> -#include <net/dn_neigh.h> -#include <net/dn_fib.h> - -#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) - -static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; -static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; -static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00}; -static unsigned char dn_eco_version[3] = {0x02,0x00,0x00}; - -extern struct neigh_table dn_neigh_table; - -/* - * decnet_address is kept in network order. - */ -__le16 decnet_address = 0; - -static DEFINE_SPINLOCK(dndev_lock); -static struct net_device *decnet_default_device; -static BLOCKING_NOTIFIER_HEAD(dnaddr_chain); - -static struct dn_dev *dn_dev_create(struct net_device *dev, int *err); -static void dn_dev_delete(struct net_device *dev); -static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa); - -static int dn_eth_up(struct net_device *); -static void dn_eth_down(struct net_device *); -static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa); -static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa); - -static struct dn_dev_parms dn_dev_list[] = { -{ - .type = ARPHRD_ETHER, /* Ethernet */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ethernet", - .up = dn_eth_up, - .down = dn_eth_down, - .timer3 = dn_send_brd_hello, -}, -{ - .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ipgre", - .timer3 = dn_send_brd_hello, -}, -#if 0 -{ - .type = ARPHRD_X25, /* Bog standard X.25 */ - .mode = DN_DEV_UCAST, - .state = DN_DEV_S_DS, - .t2 = 1, - .t3 = 120, - .name = "x25", - .timer3 = dn_send_ptp_hello, -}, -#endif -#if 0 -{ - .type = ARPHRD_PPP, /* DECnet over PPP */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "ppp", - .timer3 = dn_send_brd_hello, -}, -#endif -{ - .type = ARPHRD_DDCMP, /* DECnet over DDCMP */ - .mode = DN_DEV_UCAST, - .state = DN_DEV_S_DS, - .t2 = 1, - .t3 = 120, - .name = "ddcmp", - .timer3 = dn_send_ptp_hello, -}, -{ - .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */ - .mode = DN_DEV_BCAST, - .state = DN_DEV_S_RU, - .t2 = 1, - .t3 = 10, - .name = "loopback", - .timer3 = dn_send_brd_hello, -} -}; - -#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) - -#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x) - -#ifdef CONFIG_SYSCTL - -static int min_t2[] = { 1 }; -static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */ -static int min_t3[] = { 1 }; -static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */ - -static int min_priority[1]; -static int max_priority[] = { 127 }; /* From DECnet spec */ - -static int dn_forwarding_proc(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -static struct dn_dev_sysctl_table { - struct ctl_table_header *sysctl_header; - struct ctl_table dn_dev_vars[5]; -} dn_dev_sysctl = { - NULL, - { - { - .procname = "forwarding", - .data = (void *)DN_DEV_PARMS_OFFSET(forwarding), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = dn_forwarding_proc, - }, - { - .procname = "priority", - .data = (void *)DN_DEV_PARMS_OFFSET(priority), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_priority, - .extra2 = &max_priority - }, - { - .procname = "t2", - .data = (void *)DN_DEV_PARMS_OFFSET(t2), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t2, - .extra2 = &max_t2 - }, - { - .procname = "t3", - .data = (void *)DN_DEV_PARMS_OFFSET(t3), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_t3, - .extra2 = &max_t3 - }, - { } - }, -}; - -static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) -{ - struct dn_dev_sysctl_table *t; - int i; - - char path[sizeof("net/decnet/conf/") + IFNAMSIZ]; - - t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) - return; - - for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) { - long offset = (long)t->dn_dev_vars[i].data; - t->dn_dev_vars[i].data = ((char *)parms) + offset; - } - - snprintf(path, sizeof(path), "net/decnet/conf/%s", - dev? dev->name : parms->name); - - t->dn_dev_vars[0].extra1 = (void *)dev; - - t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars); - if (t->sysctl_header == NULL) - kfree(t); - else - parms->sysctl = t; -} - -static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) -{ - if (parms->sysctl) { - struct dn_dev_sysctl_table *t = parms->sysctl; - parms->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); - } -} - -static int dn_forwarding_proc(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ -#ifdef CONFIG_DECNET_ROUTER - struct net_device *dev = table->extra1; - struct dn_dev *dn_db; - int err; - int tmp, old; - - if (table->extra1 == NULL) - return -EINVAL; - - dn_db = rcu_dereference_raw(dev->dn_ptr); - old = dn_db->parms.forwarding; - - err = proc_dointvec(table, write, buffer, lenp, ppos); - - if ((err >= 0) && write) { - if (dn_db->parms.forwarding < 0) - dn_db->parms.forwarding = 0; - if (dn_db->parms.forwarding > 2) - dn_db->parms.forwarding = 2; - /* - * What an ugly hack this is... its works, just. It - * would be nice if sysctl/proc were just that little - * bit more flexible so I don't have to write a special - * routine, or suffer hacks like this - SJW - */ - tmp = dn_db->parms.forwarding; - dn_db->parms.forwarding = old; - if (dn_db->parms.down) - dn_db->parms.down(dev); - dn_db->parms.forwarding = tmp; - if (dn_db->parms.up) - dn_db->parms.up(dev); - } - - return err; -#else - return -EINVAL; -#endif -} - -#else /* CONFIG_SYSCTL */ -static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) -{ -} -static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms) -{ -} - -#endif /* CONFIG_SYSCTL */ - -static inline __u16 mtu2blksize(struct net_device *dev) -{ - u32 blksize = dev->mtu; - if (blksize > 0xffff) - blksize = 0xffff; - - if (dev->type == ARPHRD_ETHER || - dev->type == ARPHRD_PPP || - dev->type == ARPHRD_IPGRE || - dev->type == ARPHRD_LOOPBACK) - blksize -= 2; - - return (__u16)blksize; -} - -static struct dn_ifaddr *dn_dev_alloc_ifa(void) -{ - struct dn_ifaddr *ifa; - - ifa = kzalloc(sizeof(*ifa), GFP_KERNEL); - - return ifa; -} - -static void dn_dev_free_ifa(struct dn_ifaddr *ifa) -{ - kfree_rcu(ifa, rcu); -} - -static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy) -{ - struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap); - unsigned char mac_addr[6]; - struct net_device *dev = dn_db->dev; - - ASSERT_RTNL(); - - *ifap = ifa1->ifa_next; - - if (dn_db->dev->type == ARPHRD_ETHER) { - if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) { - dn_dn2eth(mac_addr, ifa1->ifa_local); - dev_mc_del(dev, mac_addr); - } - } - - dn_ifaddr_notify(RTM_DELADDR, ifa1); - blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1); - if (destroy) { - dn_dev_free_ifa(ifa1); - - if (dn_db->ifa_list == NULL) - dn_dev_delete(dn_db->dev); - } -} - -static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa) -{ - struct net_device *dev = dn_db->dev; - struct dn_ifaddr *ifa1; - unsigned char mac_addr[6]; - - ASSERT_RTNL(); - - /* Check for duplicates */ - for (ifa1 = rtnl_dereference(dn_db->ifa_list); - ifa1 != NULL; - ifa1 = rtnl_dereference(ifa1->ifa_next)) { - if (ifa1->ifa_local == ifa->ifa_local) - return -EEXIST; - } - - if (dev->type == ARPHRD_ETHER) { - if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) { - dn_dn2eth(mac_addr, ifa->ifa_local); - dev_mc_add(dev, mac_addr); - } - } - - ifa->ifa_next = dn_db->ifa_list; - rcu_assign_pointer(dn_db->ifa_list, ifa); - - dn_ifaddr_notify(RTM_NEWADDR, ifa); - blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); - - return 0; -} - -static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - int rv; - - if (dn_db == NULL) { - int err; - dn_db = dn_dev_create(dev, &err); - if (dn_db == NULL) - return err; - } - - ifa->ifa_dev = dn_db; - - if (dev->flags & IFF_LOOPBACK) - ifa->ifa_scope = RT_SCOPE_HOST; - - rv = dn_dev_insert_ifa(dn_db, ifa); - if (rv) - dn_dev_free_ifa(ifa); - return rv; -} - - -int dn_dev_ioctl(unsigned int cmd, void __user *arg) -{ - char buffer[DN_IFREQ_SIZE]; - struct ifreq *ifr = (struct ifreq *)buffer; - struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; - struct dn_dev *dn_db; - struct net_device *dev; - struct dn_ifaddr *ifa = NULL; - struct dn_ifaddr __rcu **ifap = NULL; - int ret = 0; - - if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) - return -EFAULT; - ifr->ifr_name[IFNAMSIZ-1] = 0; - - dev_load(&init_net, ifr->ifr_name); - - switch (cmd) { - case SIOCGIFADDR: - break; - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - if (sdn->sdn_family != AF_DECnet) - return -EINVAL; - break; - default: - return -EINVAL; - } - - rtnl_lock(); - - if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { - ret = -ENODEV; - goto done; - } - - if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) { - for (ifap = &dn_db->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; - ifap = &ifa->ifa_next) - if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) - break; - } - - if (ifa == NULL && cmd != SIOCSIFADDR) { - ret = -EADDRNOTAVAIL; - goto done; - } - - switch (cmd) { - case SIOCGIFADDR: - *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; - goto rarok; - - case SIOCSIFADDR: - if (!ifa) { - if ((ifa = dn_dev_alloc_ifa()) == NULL) { - ret = -ENOBUFS; - break; - } - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - } else { - if (ifa->ifa_local == dn_saddr2dn(sdn)) - break; - dn_dev_del_ifa(dn_db, ifap, 0); - } - - ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); - - ret = dn_dev_set_ifa(dev, ifa); - } -done: - rtnl_unlock(); - - return ret; -rarok: - if (copy_to_user(arg, ifr, DN_IFREQ_SIZE)) - ret = -EFAULT; - goto done; -} - -struct net_device *dn_dev_get_default(void) -{ - struct net_device *dev; - - spin_lock(&dndev_lock); - dev = decnet_default_device; - if (dev) { - if (dev->dn_ptr) - dev_hold(dev); - else - dev = NULL; - } - spin_unlock(&dndev_lock); - - return dev; -} - -int dn_dev_set_default(struct net_device *dev, int force) -{ - struct net_device *old = NULL; - int rv = -EBUSY; - if (!dev->dn_ptr) - return -ENODEV; - - spin_lock(&dndev_lock); - if (force || decnet_default_device == NULL) { - old = decnet_default_device; - decnet_default_device = dev; - rv = 0; - } - spin_unlock(&dndev_lock); - - if (old) - dev_put(old); - return rv; -} - -static void dn_dev_check_default(struct net_device *dev) -{ - spin_lock(&dndev_lock); - if (dev == decnet_default_device) { - decnet_default_device = NULL; - } else { - dev = NULL; - } - spin_unlock(&dndev_lock); - - if (dev) - dev_put(dev); -} - -/* - * Called with RTNL - */ -static struct dn_dev *dn_dev_by_index(int ifindex) -{ - struct net_device *dev; - struct dn_dev *dn_dev = NULL; - - dev = __dev_get_by_index(&init_net, ifindex); - if (dev) - dn_dev = rtnl_dereference(dev->dn_ptr); - - return dn_dev; -} - -static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { - [IFA_ADDRESS] = { .type = NLA_U16 }, - [IFA_LOCAL] = { .type = NLA_U16 }, - [IFA_LABEL] = { .type = NLA_STRING, - .len = IFNAMSIZ - 1 }, - [IFA_FLAGS] = { .type = NLA_U32 }, -}; - -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct nlattr *tb[IFA_MAX+1]; - struct dn_dev *dn_db; - struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa; - struct dn_ifaddr __rcu **ifap; - int err = -EINVAL; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - goto errout; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); - if (err < 0) - goto errout; - - err = -ENODEV; - ifm = nlmsg_data(nlh); - if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL) - goto errout; - - err = -EADDRNOTAVAIL; - for (ifap = &dn_db->ifa_list; - (ifa = rtnl_dereference(*ifap)) != NULL; - ifap = &ifa->ifa_next) { - if (tb[IFA_LOCAL] && - nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) - continue; - - if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) - continue; - - dn_dev_del_ifa(dn_db, ifap, 1); - return 0; - } - -errout: - return err; -} - -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct nlattr *tb[IFA_MAX+1]; - struct net_device *dev; - struct dn_dev *dn_db; - struct ifaddrmsg *ifm; - struct dn_ifaddr *ifa; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); - if (err < 0) - return err; - - if (tb[IFA_LOCAL] == NULL) - return -EINVAL; - - ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) - return -ENODEV; - - if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) { - dn_db = dn_dev_create(dev, &err); - if (!dn_db) - return err; - } - - if ((ifa = dn_dev_alloc_ifa()) == NULL) - return -ENOBUFS; - - if (tb[IFA_ADDRESS] == NULL) - tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - - ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]); - ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]); - ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : - ifm->ifa_flags; - ifa->ifa_scope = ifm->ifa_scope; - ifa->ifa_dev = dn_db; - - if (tb[IFA_LABEL]) - nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); - else - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - - err = dn_dev_insert_ifa(dn_db, ifa); - if (err) - dn_dev_free_ifa(ifa); - - return err; -} - -static inline size_t dn_ifaddr_nlmsg_size(void) -{ - return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) - + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ - + nla_total_size(2) /* IFA_ADDRESS */ - + nla_total_size(2) /* IFA_LOCAL */ - + nla_total_size(4); /* IFA_FLAGS */ -} - -static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 portid, u32 seq, int event, unsigned int flags) -{ - struct ifaddrmsg *ifm; - struct nlmsghdr *nlh; - u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); - if (nlh == NULL) - return -EMSGSIZE; - - ifm = nlmsg_data(nlh); - ifm->ifa_family = AF_DECnet; - ifm->ifa_prefixlen = 16; - ifm->ifa_flags = ifa_flags; - ifm->ifa_scope = ifa->ifa_scope; - ifm->ifa_index = ifa->ifa_dev->dev->ifindex; - - if ((ifa->ifa_address && - nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) || - (ifa->ifa_local && - nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) || - (ifa->ifa_label[0] && - nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || - nla_put_u32(skb, IFA_FLAGS, ifa_flags)) - goto nla_put_failure; - nlmsg_end(skb, nlh); - return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); - return; -errout: - if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); -} - -static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - int idx, dn_idx = 0, skip_ndevs, skip_naddr; - struct net_device *dev; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - - if (!net_eq(net, &init_net)) - return 0; - - skip_ndevs = cb->args[0]; - skip_naddr = cb->args[1]; - - idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (idx < skip_ndevs) - goto cont; - else if (idx > skip_ndevs) { - /* Only skip over addresses for first dev dumped - * in this iteration (idx == skip_ndevs) */ - skip_naddr = 0; - } - - if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL) - goto cont; - - for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa; - ifa = rcu_dereference(ifa->ifa_next), dn_idx++) { - if (dn_idx < skip_naddr) - continue; - - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWADDR, - NLM_F_MULTI) < 0) - goto done; - } -cont: - idx++; - } -done: - rcu_read_unlock(); - cb->args[0] = idx; - cb->args[1] = dn_idx; - - return skb->len; -} - -static int dn_dev_get_first(struct net_device *dev, __le16 *addr) -{ - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int rv = -ENODEV; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) - goto out; - - ifa = rcu_dereference(dn_db->ifa_list); - if (ifa != NULL) { - *addr = ifa->ifa_local; - rv = 0; - } -out: - rcu_read_unlock(); - return rv; -} - -/* - * Find a default address to bind to. - * - * This is one of those areas where the initial VMS concepts don't really - * map onto the Linux concepts, and since we introduced multiple addresses - * per interface we have to cope with slightly odd ways of finding out what - * "our address" really is. Mostly it's not a problem; for this we just guess - * a sensible default. Eventually the routing code will take care of all the - * nasties for us I hope. - */ -int dn_dev_bind_default(__le16 *addr) -{ - struct net_device *dev; - int rv; - dev = dn_dev_get_default(); -last_chance: - if (dev) { - rv = dn_dev_get_first(dev, addr); - dev_put(dev); - if (rv == 0 || dev == init_net.loopback_dev) - return rv; - } - dev = init_net.loopback_dev; - dev_hold(dev); - goto last_chance; -} - -static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct endnode_hello_message *msg; - struct sk_buff *skb = NULL; - __le16 *pktlen; - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) - return; - - skb->dev = dev; - - msg = skb_put(skb, sizeof(*msg)); - - msg->msgflg = 0x0D; - memcpy(msg->tiver, dn_eco_version, 3); - dn_dn2eth(msg->id, ifa->ifa_local); - msg->iinfo = DN_RT_INFO_ENDN; - msg->blksize = cpu_to_le16(mtu2blksize(dev)); - msg->area = 0x00; - memset(msg->seed, 0, 8); - memcpy(msg->neighbor, dn_hiord, ETH_ALEN); - - if (dn_db->router) { - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; - dn_dn2eth(msg->neighbor, dn->addr); - } - - msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3); - msg->mpd = 0x00; - msg->datalen = 0x02; - memset(msg->data, 0xAA, 2); - - pktlen = skb_push(skb, 2); - *pktlen = cpu_to_le16(skb->len - 2); - - skb_reset_network_header(skb); - - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id); -} - - -#define DRDELAY (5 * HZ) - -static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa) -{ - /* First check time since device went up */ - if (time_before(jiffies, dn_db->uptime + DRDELAY)) - return 0; - - /* If there is no router, then yes... */ - if (!dn_db->router) - return 1; - - /* otherwise only if we have a higher priority or.. */ - if (dn->priority < dn_db->parms.priority) - return 1; - - /* if we have equal priority and a higher node number */ - if (dn->priority != dn_db->parms.priority) - return 0; - - if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local)) - return 1; - - return 0; -} - -static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - int n; - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; - struct sk_buff *skb; - size_t size; - unsigned char *ptr; - unsigned char *i1, *i2; - __le16 *pktlen; - char *src; - - if (mtu2blksize(dev) < (26 + 7)) - return; - - n = mtu2blksize(dev) - 26; - n /= 7; - - if (n > 32) - n = 32; - - size = 2 + 26 + 7 * n; - - if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL) - return; - - skb->dev = dev; - ptr = skb_put(skb, size); - - *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH; - *ptr++ = 2; /* ECO */ - *ptr++ = 0; - *ptr++ = 0; - dn_dn2eth(ptr, ifa->ifa_local); - src = ptr; - ptr += ETH_ALEN; - *ptr++ = dn_db->parms.forwarding == 1 ? - DN_RT_INFO_L1RT : DN_RT_INFO_L2RT; - *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev)); - ptr += 2; - *ptr++ = dn_db->parms.priority; /* Priority */ - *ptr++ = 0; /* Area: Reserved */ - *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3); - ptr += 2; - *ptr++ = 0; /* MPD: Reserved */ - i1 = ptr++; - memset(ptr, 0, 7); /* Name: Reserved */ - ptr += 7; - i2 = ptr++; - - n = dn_neigh_elist(dev, ptr, n); - - *i2 = 7 * n; - *i1 = 8 + *i2; - - skb_trim(skb, (27 + *i2)); - - pktlen = skb_push(skb, 2); - *pktlen = cpu_to_le16(skb->len - 2); - - skb_reset_network_header(skb); - - if (dn_am_i_a_router(dn, dn_db, ifa)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - if (skb2) { - dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src); - } - } - - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src); -} - -static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dn_send_endnode_hello(dev, ifa); - else - dn_send_router_hello(dev, ifa); -} - -static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa) -{ - int tdlen = 16; - int size = dev->hard_header_len + 2 + 4 + tdlen; - struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC); - int i; - unsigned char *ptr; - char src[ETH_ALEN]; - - if (skb == NULL) - return ; - - skb->dev = dev; - skb_push(skb, dev->hard_header_len); - ptr = skb_put(skb, 2 + 4 + tdlen); - - *ptr++ = DN_RT_PKT_HELO; - *((__le16 *)ptr) = ifa->ifa_local; - ptr += 2; - *ptr++ = tdlen; - - for(i = 0; i < tdlen; i++) - *ptr++ = 0252; - - dn_dn2eth(src, ifa->ifa_local); - dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src); -} - -static int dn_eth_up(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dev_mc_add(dev, dn_rt_all_end_mcast); - else - dev_mc_add(dev, dn_rt_all_rt_mcast); - - dn_db->use_long = 1; - - return 0; -} - -static void dn_eth_down(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.forwarding == 0) - dev_mc_del(dev, dn_rt_all_end_mcast); - else - dev_mc_del(dev, dn_rt_all_rt_mcast); -} - -static void dn_dev_set_timer(struct net_device *dev); - -static void dn_dev_timer_func(struct timer_list *t) -{ - struct dn_dev *dn_db = from_timer(dn_db, t, timer); - struct net_device *dev; - struct dn_ifaddr *ifa; - - rcu_read_lock(); - dev = dn_db->dev; - if (dn_db->t3 <= dn_db->parms.t2) { - if (dn_db->parms.timer3) { - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa; - ifa = rcu_dereference(ifa->ifa_next)) { - if (!(ifa->ifa_flags & IFA_F_SECONDARY)) - dn_db->parms.timer3(dev, ifa); - } - } - dn_db->t3 = dn_db->parms.t3; - } else { - dn_db->t3 -= dn_db->parms.t2; - } - rcu_read_unlock(); - dn_dev_set_timer(dev); -} - -static void dn_dev_set_timer(struct net_device *dev) -{ - struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr); - - if (dn_db->parms.t2 > dn_db->parms.t3) - dn_db->parms.t2 = dn_db->parms.t3; - - dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); - - add_timer(&dn_db->timer); -} - -static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) -{ - int i; - struct dn_dev_parms *p = dn_dev_list; - struct dn_dev *dn_db; - - for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) { - if (p->type == dev->type) - break; - } - - *err = -ENODEV; - if (i == DN_DEV_LIST_SIZE) - return NULL; - - *err = -ENOBUFS; - if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL) - return NULL; - - memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); - - rcu_assign_pointer(dev->dn_ptr, dn_db); - dn_db->dev = dev; - timer_setup(&dn_db->timer, dn_dev_timer_func, 0); - - dn_db->uptime = jiffies; - - dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); - if (!dn_db->neigh_parms) { - RCU_INIT_POINTER(dev->dn_ptr, NULL); - kfree(dn_db); - return NULL; - } - - if (dn_db->parms.up) { - if (dn_db->parms.up(dev) < 0) { - neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); - dev->dn_ptr = NULL; - kfree(dn_db); - return NULL; - } - } - - dn_dev_sysctl_register(dev, &dn_db->parms); - - dn_dev_set_timer(dev); - - *err = 0; - return dn_db; -} - - -/* - * This processes a device up event. We only start up - * the loopback device & ethernet devices with correct - * MAC addresses automatically. Others must be started - * specifically. - * - * FIXME: How should we configure the loopback address ? If we could dispense - * with using decnet_address here and for autobind, it will be one less thing - * for users to worry about setting up. - */ - -void dn_dev_up(struct net_device *dev) -{ - struct dn_ifaddr *ifa; - __le16 addr = decnet_address; - int maybe_default = 0; - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) - return; - - /* - * Need to ensure that loopback device has a dn_db attached to it - * to allow creation of neighbours against it, even though it might - * not have a local address of its own. Might as well do the same for - * all autoconfigured interfaces. - */ - if (dn_db == NULL) { - int err; - dn_db = dn_dev_create(dev, &err); - if (dn_db == NULL) - return; - } - - if (dev->type == ARPHRD_ETHER) { - if (memcmp(dev->dev_addr, dn_hiord, 4) != 0) - return; - addr = dn_eth2dn(dev->dev_addr); - maybe_default = 1; - } - - if (addr == 0) - return; - - if ((ifa = dn_dev_alloc_ifa()) == NULL) - return; - - ifa->ifa_local = ifa->ifa_address = addr; - ifa->ifa_flags = 0; - ifa->ifa_scope = RT_SCOPE_UNIVERSE; - strcpy(ifa->ifa_label, dev->name); - - dn_dev_set_ifa(dev, ifa); - - /* - * Automagically set the default device to the first automatically - * configured ethernet card in the system. - */ - if (maybe_default) { - dev_hold(dev); - if (dn_dev_set_default(dev, 0)) - dev_put(dev); - } -} - -static void dn_dev_delete(struct net_device *dev) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - - if (dn_db == NULL) - return; - - del_timer_sync(&dn_db->timer); - dn_dev_sysctl_unregister(&dn_db->parms); - dn_dev_check_default(dev); - neigh_ifdown(&dn_neigh_table, dev); - - if (dn_db->parms.down) - dn_db->parms.down(dev); - - dev->dn_ptr = NULL; - - neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); - neigh_ifdown(&dn_neigh_table, dev); - - if (dn_db->router) - neigh_release(dn_db->router); - if (dn_db->peer) - neigh_release(dn_db->peer); - - kfree(dn_db); -} - -void dn_dev_down(struct net_device *dev) -{ - struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr); - struct dn_ifaddr *ifa; - - if (dn_db == NULL) - return; - - while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) { - dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); - dn_dev_free_ifa(ifa); - } - - dn_dev_delete(dev); -} - -void dn_dev_init_pkt(struct sk_buff *skb) -{ -} - -void dn_dev_veri_pkt(struct sk_buff *skb) -{ -} - -void dn_dev_hello(struct sk_buff *skb) -{ -} - -void dn_dev_devices_off(void) -{ - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(&init_net, dev) - dn_dev_down(dev); - rtnl_unlock(); - -} - -void dn_dev_devices_on(void) -{ - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(&init_net, dev) { - if (dev->flags & IFF_UP) - dn_dev_up(dev); - } - rtnl_unlock(); -} - -int register_dnaddr_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&dnaddr_chain, nb); -} - -int unregister_dnaddr_notifier(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&dnaddr_chain, nb); -} - -#ifdef CONFIG_PROC_FS -static inline int is_dn_dev(struct net_device *dev) -{ - return dev->dn_ptr != NULL; -} - -static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - int i; - struct net_device *dev; - - rcu_read_lock(); - - if (*pos == 0) - return SEQ_START_TOKEN; - - i = 1; - for_each_netdev_rcu(&init_net, dev) { - if (!is_dn_dev(dev)) - continue; - - if (i++ == *pos) - return dev; - } - - return NULL; -} - -static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net_device *dev; - - ++*pos; - - dev = v; - if (v == SEQ_START_TOKEN) - dev = net_device_entry(&init_net.dev_base_head); - - for_each_netdev_continue_rcu(&init_net, dev) { - if (!is_dn_dev(dev)) - continue; - - return dev; - } - - return NULL; -} - -static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static char *dn_type2asc(char type) -{ - switch (type) { - case DN_DEV_BCAST: - return "B"; - case DN_DEV_UCAST: - return "U"; - case DN_DEV_MPOINT: - return "M"; - } - - return "?"; -} - -static int dn_dev_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n"); - else { - struct net_device *dev = v; - char peer_buf[DN_ASCBUF_LEN]; - char router_buf[DN_ASCBUF_LEN]; - struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr); - - seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" - " %04hu %03d %02x %-10s %-7s %-7s\n", - dev->name ? dev->name : "???", - dn_type2asc(dn_db->parms.mode), - 0, 0, - dn_db->t3, dn_db->parms.t3, - mtu2blksize(dev), - dn_db->parms.priority, - dn_db->parms.state, dn_db->parms.name, - dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "", - dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); - } - return 0; -} - -static const struct seq_operations dn_dev_seq_ops = { - .start = dn_dev_seq_start, - .next = dn_dev_seq_next, - .stop = dn_dev_seq_stop, - .show = dn_dev_seq_show, -}; -#endif /* CONFIG_PROC_FS */ - -static int addr[2]; -module_param_array(addr, int, NULL, 0444); -MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); - -void __init dn_dev_init(void) -{ - if (addr[0] > 63 || addr[0] < 0) { - printk(KERN_ERR "DECnet: Area must be between 0 and 63"); - return; - } - - if (addr[1] > 1023 || addr[1] < 0) { - printk(KERN_ERR "DECnet: Node must be between 0 and 1023"); - return; - } - - decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]); - - dn_dev_devices_on(); - - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR, - dn_nl_newaddr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR, - dn_nl_deladdr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR, - NULL, dn_nl_dump_ifaddr, 0); - - proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops); - -#ifdef CONFIG_SYSCTL - { - int i; - for(i = 0; i < DN_DEV_LIST_SIZE; i++) - dn_dev_sysctl_register(NULL, &dn_dev_list[i]); - } -#endif /* CONFIG_SYSCTL */ -} - -void __exit dn_dev_cleanup(void) -{ -#ifdef CONFIG_SYSCTL - { - int i; - for(i = 0; i < DN_DEV_LIST_SIZE; i++) - dn_dev_sysctl_unregister(&dn_dev_list[i]); - } -#endif /* CONFIG_SYSCTL */ - - remove_proc_entry("decnet_dev", init_net.proc_net); - - dn_dev_devices_off(); -} diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c deleted file mode 100644 index f78fe58eafc8..000000000000 --- a/net/decnet/dn_fib.c +++ /dev/null @@ -1,799 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Glue/Info List) - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * - * - * Changes: - * Alexey Kuznetsov : SMP locking changes - * Steve Whitehouse : Rewrote it... Well to be more correct, I - * copied most of it from the ipv4 fib code. - * Steve Whitehouse : Updated it in style and fixed a few bugs - * which were fixed in the ipv4 code since - * this code was copied from it. - * - */ -#include <linux/string.h> -#include <linux/net.h> -#include <linux/socket.h> -#include <linux/slab.h> -#include <linux/sockios.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/netdevice.h> -#include <linux/timer.h> -#include <linux/spinlock.h> -#include <linux/atomic.h> -#include <linux/uaccess.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/fib_rules.h> -#include <net/dn.h> -#include <net/dn_route.h> -#include <net/dn_fib.h> -#include <net/dn_neigh.h> -#include <net/dn_dev.h> -#include <net/nexthop.h> - -#define RT_MIN_TABLE 1 - -#define for_fib_info() { struct dn_fib_info *fi;\ - for(fi = dn_fib_info_list; fi; fi = fi->fib_next) -#define endfor_fib_info() } - -#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define endfor_nexthops(fi) } - -static DEFINE_SPINLOCK(dn_fib_multipath_lock); -static struct dn_fib_info *dn_fib_info_list; -static DEFINE_SPINLOCK(dn_fib_info_lock); - -static struct -{ - int error; - u8 scope; -} dn_fib_props[RTN_MAX+1] = { - [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE }, - [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE }, - [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST }, - [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, - [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE }, - [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE }, - [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE }, - [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE }, - [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE }, - [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, -}; - -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); -static int dn_fib_sync_up(struct net_device *dev); - -void dn_fib_free_info(struct dn_fib_info *fi) -{ - if (fi->fib_dead == 0) { - printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n"); - return; - } - - change_nexthops(fi) { - if (nh->nh_dev) - dev_put(nh->nh_dev); - nh->nh_dev = NULL; - } endfor_nexthops(fi); - kfree(fi); -} - -void dn_fib_release_info(struct dn_fib_info *fi) -{ - spin_lock(&dn_fib_info_lock); - if (fi && --fi->fib_treeref == 0) { - if (fi->fib_next) - fi->fib_next->fib_prev = fi->fib_prev; - if (fi->fib_prev) - fi->fib_prev->fib_next = fi->fib_next; - if (fi == dn_fib_info_list) - dn_fib_info_list = fi->fib_next; - fi->fib_dead = 1; - dn_fib_info_put(fi); - } - spin_unlock(&dn_fib_info_lock); -} - -static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) -{ - const struct dn_fib_nh *onh = ofi->fib_nh; - - for_nexthops(fi) { - if (nh->nh_oif != onh->nh_oif || - nh->nh_gw != onh->nh_gw || - nh->nh_scope != onh->nh_scope || - nh->nh_weight != onh->nh_weight || - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) - return -1; - onh++; - } endfor_nexthops(fi); - return 0; -} - -static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi) -{ - for_fib_info() { - if (fi->fib_nhs != nfi->fib_nhs) - continue; - if (nfi->fib_protocol == fi->fib_protocol && - nfi->fib_prefsrc == fi->fib_prefsrc && - nfi->fib_priority == fi->fib_priority && - memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 && - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && - (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0)) - return fi; - } endfor_fib_info(); - return NULL; -} - -static int dn_fib_count_nhs(const struct nlattr *attr) -{ - struct rtnexthop *nhp = nla_data(attr); - int nhs = 0, nhlen = nla_len(attr); - - while (rtnh_ok(nhp, nhlen)) { - nhs++; - nhp = rtnh_next(nhp, &nhlen); - } - - /* leftover implies invalid nexthop configuration, discard it */ - return nhlen > 0 ? 0 : nhs; -} - -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, - const struct rtmsg *r) -{ - struct rtnexthop *nhp = nla_data(attr); - int nhlen = nla_len(attr); - - change_nexthops(fi) { - int attrlen; - - if (!rtnh_ok(nhp, nhlen)) - return -EINVAL; - - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - - attrlen = rtnh_attrlen(nhp); - if (attrlen > 0) { - struct nlattr *gw_attr; - - gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); - nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; - } - - nhp = rtnh_next(nhp, &nhlen); - } endfor_nexthops(fi); - - return 0; -} - - -static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh) -{ - int err; - - if (nh->nh_gw) { - struct flowidn fld; - struct dn_fib_res res; - - if (nh->nh_flags&RTNH_F_ONLINK) { - struct net_device *dev; - - if (r->rtm_scope >= RT_SCOPE_LINK) - return -EINVAL; - if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) - return -EINVAL; - if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) - return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - nh->nh_dev = dev; - dev_hold(dev); - nh->nh_scope = RT_SCOPE_LINK; - return 0; - } - - memset(&fld, 0, sizeof(fld)); - fld.daddr = nh->nh_gw; - fld.flowidn_oif = nh->nh_oif; - fld.flowidn_scope = r->rtm_scope + 1; - - if (fld.flowidn_scope < RT_SCOPE_LINK) - fld.flowidn_scope = RT_SCOPE_LINK; - - if ((err = dn_fib_lookup(&fld, &res)) != 0) - return err; - - err = -EINVAL; - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) - goto out; - nh->nh_scope = res.scope; - nh->nh_oif = DN_FIB_RES_OIF(res); - nh->nh_dev = DN_FIB_RES_DEV(res); - if (nh->nh_dev == NULL) - goto out; - dev_hold(nh->nh_dev); - err = -ENETDOWN; - if (!(nh->nh_dev->flags & IFF_UP)) - goto out; - err = 0; -out: - dn_fib_res_put(&res); - return err; - } else { - struct net_device *dev; - - if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) - return -EINVAL; - - dev = __dev_get_by_index(&init_net, nh->nh_oif); - if (dev == NULL || dev->dn_ptr == NULL) - return -ENODEV; - if (!(dev->flags&IFF_UP)) - return -ENETDOWN; - nh->nh_dev = dev; - dev_hold(nh->nh_dev); - nh->nh_scope = RT_SCOPE_HOST; - } - - return 0; -} - - -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], - const struct nlmsghdr *nlh, int *errp) -{ - int err; - struct dn_fib_info *fi = NULL; - struct dn_fib_info *ofi; - int nhs = 1; - - if (r->rtm_type > RTN_MAX) - goto err_inval; - - if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) - goto err_inval; - - if (attrs[RTA_MULTIPATH] && - (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) - goto err_inval; - - fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); - err = -ENOBUFS; - if (fi == NULL) - goto failure; - - fi->fib_protocol = r->rtm_protocol; - fi->fib_nhs = nhs; - fi->fib_flags = r->rtm_flags; - - if (attrs[RTA_PRIORITY]) - fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); - - if (attrs[RTA_METRICS]) { - struct nlattr *attr; - int rem; - - nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { - int type = nla_type(attr); - - if (type) { - if (type > RTAX_MAX || type == RTAX_CC_ALGO || - nla_len(attr) < 4) - goto err_inval; - - fi->fib_metrics[type-1] = nla_get_u32(attr); - } - } - } - - if (attrs[RTA_PREFSRC]) - fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); - - if (attrs[RTA_MULTIPATH]) { - if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) - goto failure; - - if (attrs[RTA_OIF] && - fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) - goto err_inval; - - if (attrs[RTA_GATEWAY] && - fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) - goto err_inval; - } else { - struct dn_fib_nh *nh = fi->fib_nh; - - if (attrs[RTA_OIF]) - nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); - - if (attrs[RTA_GATEWAY]) - nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); - - nh->nh_flags = r->rtm_flags; - nh->nh_weight = 1; - } - - if (r->rtm_type == RTN_NAT) { - if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) - goto err_inval; - - fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); - goto link_it; - } - - if (dn_fib_props[r->rtm_type].error) { - if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) - goto err_inval; - - goto link_it; - } - - if (r->rtm_scope > RT_SCOPE_HOST) - goto err_inval; - - if (r->rtm_scope == RT_SCOPE_HOST) { - struct dn_fib_nh *nh = fi->fib_nh; - - /* Local address is added */ - if (nhs != 1 || nh->nh_gw) - goto err_inval; - nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); - err = -ENODEV; - if (nh->nh_dev == NULL) - goto failure; - } else { - change_nexthops(fi) { - if ((err = dn_fib_check_nh(r, fi, nh)) != 0) - goto failure; - } endfor_nexthops(fi) - } - - if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || - fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) - if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) - goto err_inval; - } - -link_it: - if ((ofi = dn_fib_find_info(fi)) != NULL) { - fi->fib_dead = 1; - dn_fib_free_info(fi); - ofi->fib_treeref++; - return ofi; - } - - fi->fib_treeref++; - refcount_set(&fi->fib_clntref, 1); - spin_lock(&dn_fib_info_lock); - fi->fib_next = dn_fib_info_list; - fi->fib_prev = NULL; - if (dn_fib_info_list) - dn_fib_info_list->fib_prev = fi; - dn_fib_info_list = fi; - spin_unlock(&dn_fib_info_lock); - return fi; - -err_inval: - err = -EINVAL; - -failure: - *errp = err; - if (fi) { - fi->fib_dead = 1; - dn_fib_free_info(fi); - } - - return NULL; -} - -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res) -{ - int err = dn_fib_props[type].error; - - if (err == 0) { - if (fi->fib_flags & RTNH_F_DEAD) - return 1; - - res->fi = fi; - - switch (type) { - case RTN_NAT: - DN_FIB_RES_RESET(*res); - refcount_inc(&fi->fib_clntref); - return 0; - case RTN_UNICAST: - case RTN_LOCAL: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!fld->flowidn_oif || - fld->flowidn_oif == nh->nh_oif) - break; - } - if (nhsel < fi->fib_nhs) { - res->nh_sel = nhsel; - refcount_inc(&fi->fib_clntref); - return 0; - } - endfor_nexthops(fi); - res->fi = NULL; - return 1; - default: - net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", - type); - res->fi = NULL; - return -EINVAL; - } - } - return err; -} - -void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) -{ - struct dn_fib_info *fi = res->fi; - int w; - - spin_lock_bh(&dn_fib_multipath_lock); - if (fi->fib_power <= 0) { - int power = 0; - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - power += nh->nh_weight; - nh->nh_power = nh->nh_weight; - } - } endfor_nexthops(fi); - fi->fib_power = power; - if (power < 0) { - spin_unlock_bh(&dn_fib_multipath_lock); - res->nh_sel = 0; - return; - } - } - - w = jiffies % fi->fib_power; - - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { - if ((w -= nh->nh_power) <= 0) { - nh->nh_power--; - fi->fib_power--; - res->nh_sel = nhsel; - spin_unlock_bh(&dn_fib_multipath_lock); - return; - } - } - } endfor_nexthops(fi); - res->nh_sel = 0; - spin_unlock_bh(&dn_fib_multipath_lock); -} - -static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) -{ - if (attrs[RTA_TABLE]) - table = nla_get_u32(attrs[RTA_TABLE]); - - return table; -} - -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct dn_fib_table *tb; - struct rtmsg *r = nlmsg_data(nlh); - struct nlattr *attrs[RTA_MAX+1]; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); - if (err < 0) - return err; - - tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); - if (!tb) - return -ESRCH; - - return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); -} - -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct dn_fib_table *tb; - struct rtmsg *r = nlmsg_data(nlh); - struct nlattr *attrs[RTA_MAX+1]; - int err; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); - if (err < 0) - return err; - - tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); - if (!tb) - return -ENOBUFS; - - return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); -} - -static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) -{ - struct dn_fib_table *tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct { - struct nlattr hdr; - __le16 dst; - } dst_attr = { - .dst = dst, - }; - struct { - struct nlattr hdr; - __le16 prefsrc; - } prefsrc_attr = { - .prefsrc = ifa->ifa_local, - }; - struct { - struct nlattr hdr; - u32 oif; - } oif_attr = { - .oif = ifa->ifa_dev->dev->ifindex, - }; - struct nlattr *attrs[RTA_MAX+1] = { - [RTA_DST] = (struct nlattr *) &dst_attr, - [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, - [RTA_OIF] = (struct nlattr *) &oif_attr, - }; - - memset(&req.rtm, 0, sizeof(req.rtm)); - - if (type == RTN_UNICAST) - tb = dn_fib_get_table(RT_MIN_TABLE, 1); - else - tb = dn_fib_get_table(RT_TABLE_LOCAL, 1); - - if (tb == NULL) - return; - - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; - - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->n; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); - else - tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); -} - -static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) -{ - - fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); - -#if 0 - if (!(dev->flags&IFF_UP)) - return; - /* In the future, we will want to add default routes here */ - -#endif -} - -static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) -{ - int found_it = 0; - struct net_device *dev; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa2; - - ASSERT_RTNL(); - - /* Scan device list */ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) - continue; - for (ifa2 = rcu_dereference(dn_db->ifa_list); - ifa2 != NULL; - ifa2 = rcu_dereference(ifa2->ifa_next)) { - if (ifa2->ifa_local == ifa->ifa_local) { - found_it = 1; - break; - } - } - } - rcu_read_unlock(); - - if (found_it == 0) { - fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); - - if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) { - if (dn_fib_sync_down(ifa->ifa_local, NULL, 0)) - dn_fib_flush(); - } - } -} - -static void dn_fib_disable_addr(struct net_device *dev, int force) -{ - if (dn_fib_sync_down(0, dev, force)) - dn_fib_flush(); - dn_rt_cache_flush(0); - neigh_ifdown(&dn_neigh_table, dev); -} - -static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr; - - switch (event) { - case NETDEV_UP: - dn_fib_add_ifaddr(ifa); - dn_fib_sync_up(ifa->ifa_dev->dev); - dn_rt_cache_flush(-1); - break; - case NETDEV_DOWN: - dn_fib_del_ifaddr(ifa); - if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { - dn_fib_disable_addr(ifa->ifa_dev->dev, 1); - } else { - dn_rt_cache_flush(-1); - } - break; - } - return NOTIFY_DONE; -} - -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) -{ - int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; - - for_fib_info() { - /* - * This makes no sense for DECnet.... we will almost - * certainly have more than one local address the same - * over all our interfaces. It needs thinking about - * some more. - */ - if (local && fi->fib_prefsrc == local) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } else if (dev && fi->fib_nhs) { - int dead = 0; - - change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) - dead++; - else if (nh->nh_dev == dev && - nh->nh_scope != scope) { - spin_lock_bh(&dn_fib_multipath_lock); - nh->nh_flags |= RTNH_F_DEAD; - fi->fib_power -= nh->nh_power; - nh->nh_power = 0; - spin_unlock_bh(&dn_fib_multipath_lock); - dead++; - } - } endfor_nexthops(fi) - if (dead == fi->fib_nhs) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } - } - } endfor_fib_info(); - return ret; -} - - -static int dn_fib_sync_up(struct net_device *dev) -{ - int ret = 0; - - if (!(dev->flags&IFF_UP)) - return 0; - - for_fib_info() { - int alive = 0; - - change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - alive++; - continue; - } - if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) - continue; - if (nh->nh_dev != dev || dev->dn_ptr == NULL) - continue; - alive++; - spin_lock_bh(&dn_fib_multipath_lock); - nh->nh_power = 0; - nh->nh_flags &= ~RTNH_F_DEAD; - spin_unlock_bh(&dn_fib_multipath_lock); - } endfor_nexthops(fi); - - if (alive > 0) { - fi->fib_flags &= ~RTNH_F_DEAD; - ret++; - } - } endfor_fib_info(); - return ret; -} - -static struct notifier_block dn_fib_dnaddr_notifier = { - .notifier_call = dn_fib_dnaddr_event, -}; - -void __exit dn_fib_cleanup(void) -{ - dn_fib_table_cleanup(); - dn_fib_rules_cleanup(); - - unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier); -} - - -void __init dn_fib_init(void) -{ - dn_fib_table_init(); - dn_fib_rules_init(); - - register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE, - dn_fib_rtm_newroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE, - dn_fib_rtm_delroute, NULL, 0); -} diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c deleted file mode 100644 index 94b306f6d551..000000000000 --- a/net/decnet/dn_neigh.c +++ /dev/null @@ -1,605 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Neighbour Functions (Adjacency Database and - * On-Ethernet Cache) - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * - * - * Changes: - * Steve Whitehouse : Fixed router listing routine - * Steve Whitehouse : Added error_report functions - * Steve Whitehouse : Added default router detection - * Steve Whitehouse : Hop counts in outgoing messages - * Steve Whitehouse : Fixed src/dst in outgoing messages so - * forwarding now stands a good chance of - * working. - * Steve Whitehouse : Fixed neighbour states (for now anyway). - * Steve Whitehouse : Made error_report functions dummies. This - * is not the right place to return skbs. - * Steve Whitehouse : Convert to seq_file - * - */ - -#include <linux/net.h> -#include <linux/module.h> -#include <linux/socket.h> -#include <linux/if_arp.h> -#include <linux/slab.h> -#include <linux/if_ether.h> -#include <linux/init.h> -#include <linux/proc_fs.h> -#include <linux/string.h> -#include <linux/netfilter_decnet.h> -#include <linux/spinlock.h> -#include <linux/seq_file.h> -#include <linux/rcupdate.h> -#include <linux/jhash.h> -#include <linux/atomic.h> -#include <net/net_namespace.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/dn.h> -#include <net/dn_dev.h> -#include <net/dn_neigh.h> -#include <net/dn_route.h> - -static int dn_neigh_construct(struct neighbour *); -static void dn_neigh_error_report(struct neighbour *, struct sk_buff *); -static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb); - -/* - * Operations for adding the link layer header. - */ -static const struct neigh_ops dn_neigh_ops = { - .family = AF_DECnet, - .error_report = dn_neigh_error_report, - .output = dn_neigh_output, - .connected_output = dn_neigh_output, -}; - -static u32 dn_neigh_hash(const void *pkey, - const struct net_device *dev, - __u32 *hash_rnd) -{ - return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]); -} - -static bool dn_key_eq(const struct neighbour *neigh, const void *pkey) -{ - return neigh_key_eq16(neigh, pkey); -} - -struct neigh_table dn_neigh_table = { - .family = PF_DECnet, - .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)), - .key_len = sizeof(__le16), - .protocol = cpu_to_be16(ETH_P_DNA_RT), - .hash = dn_neigh_hash, - .key_eq = dn_key_eq, - .constructor = dn_neigh_construct, - .id = "dn_neigh_cache", - .parms ={ - .tbl = &dn_neigh_table, - .reachable_time = 30 * HZ, - .data = { - [NEIGH_VAR_MCAST_PROBES] = 0, - [NEIGH_VAR_UCAST_PROBES] = 0, - [NEIGH_VAR_APP_PROBES] = 0, - [NEIGH_VAR_RETRANS_TIME] = 1 * HZ, - [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, - [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, - [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, - [NEIGH_VAR_PROXY_QLEN] = 0, - [NEIGH_VAR_ANYCAST_DELAY] = 0, - [NEIGH_VAR_PROXY_DELAY] = 0, - [NEIGH_VAR_LOCKTIME] = 1 * HZ, - }, - }, - .gc_interval = 30 * HZ, - .gc_thresh1 = 128, - .gc_thresh2 = 512, - .gc_thresh3 = 1024, -}; - -static int dn_neigh_construct(struct neighbour *neigh) -{ - struct net_device *dev = neigh->dev; - struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); - struct dn_dev *dn_db; - struct neigh_parms *parms; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - if (dn_db == NULL) { - rcu_read_unlock(); - return -EINVAL; - } - - parms = dn_db->neigh_parms; - if (!parms) { - rcu_read_unlock(); - return -EINVAL; - } - - __neigh_parms_put(neigh->parms); - neigh->parms = neigh_parms_clone(parms); - rcu_read_unlock(); - - neigh->ops = &dn_neigh_ops; - neigh->nud_state = NUD_NOARP; - neigh->output = neigh->ops->connected_output; - - if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT)) - memcpy(neigh->ha, dev->broadcast, dev->addr_len); - else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK)) - dn_dn2eth(neigh->ha, dn->addr); - else { - net_dbg_ratelimited("Trying to create neigh for hw %d\n", - dev->type); - return -EINVAL; - } - - /* - * Make an estimate of the remote block size by assuming that its - * two less then the device mtu, which it true for ethernet (and - * other things which support long format headers) since there is - * an extra length field (of 16 bits) which isn't part of the - * ethernet headers and which the DECnet specs won't admit is part - * of the DECnet routing headers either. - * - * If we over estimate here its no big deal, the NSP negotiations - * will prevent us from sending packets which are too large for the - * remote node to handle. In any case this figure is normally updated - * by a hello message in most cases. - */ - dn->blksize = dev->mtu - 2; - - return 0; -} - -static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb) -{ - printk(KERN_DEBUG "dn_neigh_error_report: called\n"); - kfree_skb(skb); -} - -static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct net_device *dev = neigh->dev; - char mac_addr[ETH_ALEN]; - unsigned int seq; - int err; - - dn_dn2eth(mac_addr, rt->rt_local_src); - do { - seq = read_seqbegin(&neigh->ha_lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, mac_addr, skb->len); - } while (read_seqretry(&neigh->ha_lock, seq)); - - if (err >= 0) - err = dev_queue_xmit(skb); - else { - kfree_skb(skb); - err = -EINVAL; - } - return err; -} - -static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = rt->n; - - return neigh->output(neigh, skb); -} - -/* - * For talking to broadcast devices: Ethernet & PPP - */ -static int dn_long_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; - unsigned char *data; - struct dn_long_packet *lp; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_long_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_long_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_long_packet) + 3); - lp = (struct dn_long_packet *)(data+3); - - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - *(data + 2) = 1 | DN_RT_F_PF; /* Padding */ - - lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS)); - lp->d_area = lp->d_subarea = 0; - dn_dn2eth(lp->d_id, cb->dst); - lp->s_area = lp->s_subarea = 0; - dn_dn2eth(lp->s_id, cb->src); - lp->nl2 = 0; - lp->visit_ct = cb->hops & 0x3f; - lp->s_class = 0; - lp->pt = 0; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -/* - * For talking to pointopoint and multidrop devices: DDCMP and X.25 - */ -static int dn_short_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; - struct dn_short_packet *sp; - unsigned char *data; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_short_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_short_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - sp = (struct dn_short_packet *)(data+2); - - sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst; - sp->srcnode = cb->src; - sp->forward = cb->hops & 0x3f; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -/* - * For talking to DECnet phase III nodes - * Phase 3 output is the same as short output, execpt that - * it clears the area bits before transmission. - */ -static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, - struct sk_buff *skb) -{ - struct net_device *dev = neigh->dev; - int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; - struct dn_short_packet *sp; - unsigned char *data; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - if (skb_headroom(skb) < headroom) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); - if (skb2 == NULL) { - net_crit_ratelimited("dn_phase3_output: no memory\n"); - kfree_skb(skb); - return -ENOBUFS; - } - consume_skb(skb); - skb = skb2; - net_info_ratelimited("dn_phase3_output: Increasing headroom\n"); - } - - data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = cpu_to_le16(skb->len - 2); - sp = (struct dn_short_packet *)(data + 2); - - sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst & cpu_to_le16(0x03ff); - sp->srcnode = cb->src & cpu_to_le16(0x03ff); - sp->forward = cb->hops & 0x3f; - - skb_reset_network_header(skb); - - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, - &init_net, sk, skb, NULL, neigh->dev, - dn_neigh_output_packet); -} - -int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *neigh = rt->n; - struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); - struct dn_dev *dn_db; - bool use_long; - - rcu_read_lock(); - dn_db = rcu_dereference(neigh->dev->dn_ptr); - if (dn_db == NULL) { - rcu_read_unlock(); - return -EINVAL; - } - use_long = dn_db->use_long; - rcu_read_unlock(); - - if (dn->flags & DN_NDFLAG_P3) - return dn_phase3_output(neigh, sk, skb); - if (use_long) - return dn_long_output(neigh, sk, skb); - else - return dn_short_output(neigh, sk, skb); -} - -/* - * Unfortunately, the neighbour code uses the device in its hash - * function, so we don't get any advantage from it. This function - * basically does a neigh_lookup(), but without comparing the device - * field. This is required for the On-Ethernet cache - */ - -/* - * Pointopoint link receives a hello message - */ -void dn_neigh_pointopoint_hello(struct sk_buff *skb) -{ - kfree_skb(skb); -} - -/* - * Ethernet router hello message received - */ -int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; - - struct neighbour *neigh; - struct dn_neigh *dn; - struct dn_dev *dn_db; - __le16 src; - - src = dn_eth2dn(msg->id); - - neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - - dn = container_of(neigh, struct dn_neigh, n); - - if (neigh) { - write_lock(&neigh->lock); - - neigh->used = jiffies; - dn_db = rcu_dereference(neigh->dev->dn_ptr); - - if (!(neigh->nud_state & NUD_PERMANENT)) { - neigh->updated = jiffies; - - if (neigh->dev->type == ARPHRD_ETHER) - memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - - dn->blksize = le16_to_cpu(msg->blksize); - dn->priority = msg->priority; - - dn->flags &= ~DN_NDFLAG_P3; - - switch (msg->iinfo & DN_RT_INFO_TYPE) { - case DN_RT_INFO_L1RT: - dn->flags &=~DN_NDFLAG_R2; - dn->flags |= DN_NDFLAG_R1; - break; - case DN_RT_INFO_L2RT: - dn->flags |= DN_NDFLAG_R2; - } - } - - /* Only use routers in our area */ - if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) { - if (!dn_db->router) { - dn_db->router = neigh_clone(neigh); - } else { - if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority) - neigh_release(xchg(&dn_db->router, neigh_clone(neigh))); - } - } - write_unlock(&neigh->lock); - neigh_release(neigh); - } - - kfree_skb(skb); - return 0; -} - -/* - * Endnode hello message received - */ -int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; - struct neighbour *neigh; - struct dn_neigh *dn; - __le16 src; - - src = dn_eth2dn(msg->id); - - neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - - dn = container_of(neigh, struct dn_neigh, n); - - if (neigh) { - write_lock(&neigh->lock); - - neigh->used = jiffies; - - if (!(neigh->nud_state & NUD_PERMANENT)) { - neigh->updated = jiffies; - - if (neigh->dev->type == ARPHRD_ETHER) - memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2); - dn->blksize = le16_to_cpu(msg->blksize); - dn->priority = 0; - } - - write_unlock(&neigh->lock); - neigh_release(neigh); - } - - kfree_skb(skb); - return 0; -} - -static char *dn_find_slot(char *base, int max, int priority) -{ - int i; - unsigned char *min = NULL; - - base += 6; /* skip first id */ - - for(i = 0; i < max; i++) { - if (!min || (*base < *min)) - min = base; - base += 7; /* find next priority */ - } - - if (!min) - return NULL; - - return (*min < priority) ? (min - 6) : NULL; -} - -struct elist_cb_state { - struct net_device *dev; - unsigned char *ptr; - unsigned char *rs; - int t, n; -}; - -static void neigh_elist_cb(struct neighbour *neigh, void *_info) -{ - struct elist_cb_state *s = _info; - struct dn_neigh *dn; - - if (neigh->dev != s->dev) - return; - - dn = container_of(neigh, struct dn_neigh, n); - if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) - return; - - if (s->t == s->n) - s->rs = dn_find_slot(s->ptr, s->n, dn->priority); - else - s->t++; - if (s->rs == NULL) - return; - - dn_dn2eth(s->rs, dn->addr); - s->rs += 6; - *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0; - *(s->rs) |= dn->priority; - s->rs++; -} - -int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) -{ - struct elist_cb_state state; - - state.dev = dev; - state.t = 0; - state.n = n; - state.ptr = ptr; - state.rs = ptr; - - neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state); - - return state.t; -} - - -#ifdef CONFIG_PROC_FS - -static inline void dn_neigh_format_entry(struct seq_file *seq, - struct neighbour *n) -{ - struct dn_neigh *dn = container_of(n, struct dn_neigh, n); - char buf[DN_ASCBUF_LEN]; - - read_lock(&n->lock); - seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n", - dn_addr2asc(le16_to_cpu(dn->addr), buf), - (dn->flags&DN_NDFLAG_R1) ? "1" : "-", - (dn->flags&DN_NDFLAG_R2) ? "2" : "-", - (dn->flags&DN_NDFLAG_P3) ? "3" : "-", - dn->n.nud_state, - refcount_read(&dn->n.refcnt), - dn->blksize, - (dn->n.dev) ? dn->n.dev->name : "?"); - read_unlock(&n->lock); -} - -static int dn_neigh_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) { - seq_puts(seq, "Addr Flags State Use Blksize Dev\n"); - } else { - dn_neigh_format_entry(seq, v); - } - - return 0; -} - -static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) -{ - return neigh_seq_start(seq, pos, &dn_neigh_table, - NEIGH_SEQ_NEIGH_ONLY); -} - -static const struct seq_operations dn_neigh_seq_ops = { - .start = dn_neigh_seq_start, - .next = neigh_seq_next, - .stop = neigh_seq_stop, - .show = dn_neigh_seq_show, -}; -#endif - -void __init dn_neigh_init(void) -{ - neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table); - proc_create_net("decnet_neigh", 0444, init_net.proc_net, - &dn_neigh_seq_ops, sizeof(struct neigh_seq_state)); -} - -void __exit dn_neigh_cleanup(void) -{ - remove_proc_entry("decnet_neigh", init_net.proc_net); - neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table); -} diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c deleted file mode 100644 index 2fb5e055ba25..000000000000 --- a/net/decnet/dn_nsp_in.c +++ /dev/null @@ -1,914 +0,0 @@ -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Network Services Protocol (Input) - * - * Author: Eduardo Marcelo Serrat <emserrat@geocities.com> - * - * Changes: - * - * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from - * original dn_nsp.c. - * Steve Whitehouse: Updated to work with my new routing architecture. - * Steve Whitehouse: Add changes from Eduardo Serrat's patches. - * Steve Whitehouse: Put all ack handling code in a common routine. - * Steve Whitehouse: Put other common bits into dn_nsp_rx() - * Steve Whitehouse: More checks on skb->len to catch bogus packets - * Fixed various race conditions and possible nasties. - * Steve Whitehouse: Now handles returned conninit frames. - * David S. Miller: New socket locking - * Steve Whitehouse: Fixed lockup when socket filtering was enabled. - * Paul Koning: Fix to push CC sockets into RUN when acks are - * received. - * Steve Whitehouse: - * Patrick Caulfield: Checking conninits for correctness & sending of error - * responses. - * Steve Whitehouse: Added backlog congestion level return codes. - * Patrick Caulfield: - * Steve Whitehouse: Added flow control support (outbound) - * Steve Whitehouse: Prepare for nonlinear skbs - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*******************************************************************************/ - -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/inet.h> -#include <linux/route.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <net/tcp_states.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/termios.h> -#include <linux/interrupt.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/netfilter_decnet.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/dn.h> -#include <net/dn_nsp.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> - -extern int decnet_log_martians; - -static void dn_log_martian(struct sk_buff *skb, const char *msg) -{ - if (decnet_log_martians) { - char *devname = skb->dev ? skb->dev->name : "???"; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", - msg, devname, - le16_to_cpu(cb->src), - le16_to_cpu(cb->dst), - le16_to_cpu(cb->src_port), - le16_to_cpu(cb->dst_port)); - } -} - -/* - * For this function we've flipped the cross-subchannel bit - * if the message is an otherdata or linkservice message. Thus - * we can use it to work out what to update. - */ -static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short type = ((ack >> 12) & 0x0003); - int wakeup = 0; - - switch (type) { - case 0: /* ACK - Data */ - if (dn_after(ack, scp->ackrcv_dat)) { - scp->ackrcv_dat = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, - &scp->data_xmit_queue, - ack); - } - break; - case 1: /* NAK - Data */ - break; - case 2: /* ACK - OtherData */ - if (dn_after(ack, scp->ackrcv_oth)) { - scp->ackrcv_oth = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, - &scp->other_xmit_queue, - ack); - } - break; - case 3: /* NAK - OtherData */ - break; - } - - if (wakeup && !sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); -} - -/* - * This function is a universal ack processor. - */ -static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) -{ - __le16 *ptr = (__le16 *)skb->data; - int len = 0; - unsigned short ack; - - if (skb->len < 2) - return len; - - if ((ack = le16_to_cpu(*ptr)) & 0x8000) { - skb_pull(skb, 2); - ptr++; - len += 2; - if ((ack & 0x4000) == 0) { - if (oth) - ack ^= 0x2000; - dn_ack(sk, skb, ack); - } - } - - if (skb->len < 2) - return len; - - if ((ack = le16_to_cpu(*ptr)) & 0x8000) { - skb_pull(skb, 2); - len += 2; - if ((ack & 0x4000) == 0) { - if (oth) - ack ^= 0x2000; - dn_ack(sk, skb, ack); - } - } - - return len; -} - - -/** - * dn_check_idf - Check an image data field format is correct. - * @pptr: Pointer to pointer to image data - * @len: Pointer to length of image data - * @max: The maximum allowed length of the data in the image data field - * @follow_on: Check that this many bytes exist beyond the end of the image data - * - * Returns: 0 if ok, -1 on error - */ -static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on) -{ - unsigned char *ptr = *pptr; - unsigned char flen = *ptr++; - - (*len)--; - if (flen > max) - return -1; - if ((flen + follow_on) > *len) - return -1; - - *len -= flen; - *pptr = ptr + flen; - return 0; -} - -/* - * Table of reason codes to pass back to node which sent us a badly - * formed message, plus text messages for the log. A zero entry in - * the reason field means "don't reply" otherwise a disc init is sent with - * the specified reason code. - */ -static struct { - unsigned short reason; - const char *text; -} ci_err_table[] = { - { 0, "CI: Truncated message" }, - { NSP_REASON_ID, "CI: Destination username error" }, - { NSP_REASON_ID, "CI: Destination username type" }, - { NSP_REASON_US, "CI: Source username error" }, - { 0, "CI: Truncated at menuver" }, - { 0, "CI: Truncated before access or user data" }, - { NSP_REASON_IO, "CI: Access data format error" }, - { NSP_REASON_IO, "CI: User data format error" } -}; - -/* - * This function uses a slightly different lookup method - * to find its sockets, since it searches on object name/number - * rather than port numbers. Various tests are done to ensure that - * the incoming data is in the correct format before it is queued to - * a socket. - */ -static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data; - struct sockaddr_dn dstaddr; - struct sockaddr_dn srcaddr; - unsigned char type = 0; - int dstlen; - int srclen; - unsigned char *ptr; - int len; - int err = 0; - unsigned char menuver; - - memset(&dstaddr, 0, sizeof(struct sockaddr_dn)); - memset(&srcaddr, 0, sizeof(struct sockaddr_dn)); - - /* - * 1. Decode & remove message header - */ - cb->src_port = msg->srcaddr; - cb->dst_port = msg->dstaddr; - cb->services = msg->services; - cb->info = msg->info; - cb->segsize = le16_to_cpu(msg->segsize); - - if (!pskb_may_pull(skb, sizeof(*msg))) - goto err_out; - - skb_pull(skb, sizeof(*msg)); - - len = skb->len; - ptr = skb->data; - - /* - * 2. Check destination end username format - */ - dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type); - err++; - if (dstlen < 0) - goto err_out; - - err++; - if (type > 1) - goto err_out; - - len -= dstlen; - ptr += dstlen; - - /* - * 3. Check source end username format - */ - srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type); - err++; - if (srclen < 0) - goto err_out; - - len -= srclen; - ptr += srclen; - err++; - if (len < 1) - goto err_out; - - menuver = *ptr; - ptr++; - len--; - - /* - * 4. Check that optional data actually exists if menuver says it does - */ - err++; - if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1)) - goto err_out; - - /* - * 5. Check optional access data format - */ - err++; - if (menuver & DN_MENUVER_ACC) { - if (dn_check_idf(&ptr, &len, 39, 1)) - goto err_out; - if (dn_check_idf(&ptr, &len, 39, 1)) - goto err_out; - if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0)) - goto err_out; - } - - /* - * 6. Check optional user data format - */ - err++; - if (menuver & DN_MENUVER_USR) { - if (dn_check_idf(&ptr, &len, 16, 0)) - goto err_out; - } - - /* - * 7. Look up socket based on destination end username - */ - return dn_sklist_find_listener(&dstaddr); -err_out: - dn_log_martian(skb, ci_err_table[err].text); - *reason = ci_err_table[err].reason; - return NULL; -} - - -static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb) -{ - if (sk_acceptq_is_full(sk)) { - kfree_skb(skb); - return; - } - - sk->sk_ack_backlog++; - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_state_change(sk); -} - -static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - unsigned char *ptr; - - if (skb->len < 4) - goto out; - - ptr = skb->data; - cb->services = *ptr++; - cb->info = *ptr++; - cb->segsize = le16_to_cpu(*(__le16 *)ptr); - - if ((scp->state == DN_CI) || (scp->state == DN_CD)) { - scp->persist = 0; - scp->addrrem = cb->src_port; - sk->sk_state = TCP_ESTABLISHED; - scp->state = DN_RUN; - scp->services_rem = cb->services; - scp->info_rem = cb->info; - scp->segsize_rem = cb->segsize; - - if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE) - scp->max_window = decnet_no_fc_max_cwnd; - - if (skb->len > 0) { - u16 dlen = *skb->data; - if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = cpu_to_le16(dlen); - skb_copy_from_linear_data_offset(skb, 1, - scp->conndata_in.opt_data, dlen); - } - } - dn_nsp_send_link(sk, DN_NOCHANGE, 0); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - -out: - kfree_skb(skb); -} - -static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) { - scp->state = DN_CD; - scp->persist = 0; - } - - kfree_skb(skb); -} - -static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned short reason; - - if (skb->len < 2) - goto out; - - reason = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - scp->discdata_in.opt_status = cpu_to_le16(reason); - scp->discdata_in.opt_optl = 0; - memset(scp->discdata_in.opt_data, 0, 16); - - if (skb->len > 0) { - u16 dlen = *skb->data; - if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = cpu_to_le16(dlen); - skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen); - } - } - - scp->addrrem = cb->src_port; - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_CI: - case DN_CD: - scp->state = DN_RJ; - sk->sk_err = ECONNREFUSED; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - scp->state = DN_DN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - } - - if (!sock_flag(sk, SOCK_DEAD)) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - sk->sk_state_change(sk); - } - - /* - * It appears that its possible for remote machines to send disc - * init messages with no port identifier if we are in the CI and - * possibly also the CD state. Obviously we shouldn't reply with - * a message if we don't know what the end point is. - */ - if (scp->addrrem) { - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC); - } - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - -out: - kfree_skb(skb); -} - -/* - * disc_conf messages are also called no_resources or no_link - * messages depending upon the "reason" field. - */ -static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short reason; - - if (skb->len != 2) - goto out; - - reason = le16_to_cpu(*(__le16 *)skb->data); - - sk->sk_state = TCP_CLOSE; - - switch (scp->state) { - case DN_CI: - scp->state = DN_NR; - break; - case DN_DR: - if (reason == NSP_REASON_DC) - scp->state = DN_DRC; - if (reason == NSP_REASON_NL) - scp->state = DN_CN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - /* fall through */ - case DN_CC: - scp->state = DN_CN; - } - - if (!sock_flag(sk, SOCK_DEAD)) { - if (sk->sk_socket->state != SS_UNCONNECTED) - sk->sk_socket->state = SS_DISCONNECTING; - sk->sk_state_change(sk); - } - - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - -out: - kfree_skb(skb); -} - -static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short segnum; - unsigned char lsflags; - signed char fcval; - int wake_up = 0; - char *ptr = skb->data; - unsigned char fctype = scp->services_rem & NSP_FC_MASK; - - if (skb->len != 4) - goto out; - - segnum = le16_to_cpu(*(__le16 *)ptr); - ptr += 2; - lsflags = *(unsigned char *)ptr++; - fcval = *ptr; - - /* - * Here we ignore erronous packets which should really - * should cause a connection abort. It is not critical - * for now though. - */ - if (lsflags & 0xf8) - goto out; - - if (seq_next(scp->numoth_rcv, segnum)) { - seq_add(&scp->numoth_rcv, 1); - switch(lsflags & 0x04) { /* FCVAL INT */ - case 0x00: /* Normal Request */ - switch(lsflags & 0x03) { /* FCVAL MOD */ - case 0x00: /* Request count */ - if (fcval < 0) { - unsigned char p_fcval = -fcval; - if ((scp->flowrem_dat > p_fcval) && - (fctype == NSP_FC_SCMC)) { - scp->flowrem_dat -= p_fcval; - } - } else if (fcval > 0) { - scp->flowrem_dat += fcval; - wake_up = 1; - } - break; - case 0x01: /* Stop outgoing data */ - scp->flowrem_sw = DN_DONTSEND; - break; - case 0x02: /* Ok to start again */ - scp->flowrem_sw = DN_SEND; - dn_nsp_output(sk); - wake_up = 1; - } - break; - case 0x04: /* Interrupt Request */ - if (fcval > 0) { - scp->flowrem_oth += fcval; - wake_up = 1; - } - break; - } - if (wake_up && !sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - - dn_nsp_send_oth_ack(sk); - -out: - kfree_skb(skb); -} - -/* - * Copy of sock_queue_rcv_skb (from sock.h) without - * bh_lock_sock() (its already held when this is called) which - * also allows data and other data to be queued to a socket. - */ -static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue) -{ - int err; - - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) { - err = -ENOMEM; - goto out; - } - - err = sk_filter(sk, skb); - if (err) - goto out; - - skb_set_owner_r(skb, sk); - skb_queue_tail(queue, skb); - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); -out: - return err; -} - -static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short segnum; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int queued = 0; - - if (skb->len < 2) - goto out; - - cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - if (seq_next(scp->numoth_rcv, segnum)) { - - if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) { - seq_add(&scp->numoth_rcv, 1); - scp->other_report = 0; - queued = 1; - } - } - - dn_nsp_send_oth_ack(sk); -out: - if (!queued) - kfree_skb(skb); -} - -static void dn_nsp_data(struct sock *sk, struct sk_buff *skb) -{ - int queued = 0; - unsigned short segnum; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - - if (skb->len < 2) - goto out; - - cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); - skb_pull(skb, 2); - - if (seq_next(scp->numdat_rcv, segnum)) { - if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) { - seq_add(&scp->numdat_rcv, 1); - queued = 1; - } - - if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) { - scp->flowloc_sw = DN_DONTSEND; - dn_nsp_send_link(sk, DN_DONTSEND, 0); - } - } - - dn_nsp_send_data_ack(sk); -out: - if (!queued) - kfree_skb(skb); -} - -/* - * If one of our conninit messages is returned, this function - * deals with it. It puts the socket into the NO_COMMUNICATION - * state. - */ -static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) { - scp->state = DN_NC; - sk->sk_state = TCP_CLOSE; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - } - - kfree_skb(skb); -} - -static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int ret = NET_RX_DROP; - - /* Must not reply to returned packets */ - if (cb->rt_flags & DN_RT_F_RTS) - goto out; - - if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) { - switch (cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: /* (Retransmitted) Connect Init */ - dn_nsp_return_disc(skb, NSP_DISCINIT, reason); - ret = NET_RX_SUCCESS; - break; - case 0x20: /* Connect Confirm */ - dn_nsp_return_disc(skb, NSP_DISCCONF, reason); - ret = NET_RX_SUCCESS; - break; - } - } - -out: - kfree_skb(skb); - return ret; -} - -static int dn_nsp_rx_packet(struct net *net, struct sock *sk2, - struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sock *sk = NULL; - unsigned char *ptr = (unsigned char *)skb->data; - unsigned short reason = NSP_REASON_NL; - - if (!pskb_may_pull(skb, 2)) - goto free_out; - - skb_reset_transport_header(skb); - cb->nsp_flags = *ptr++; - - if (decnet_debug_level & 2) - printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags); - - if (cb->nsp_flags & 0x83) - goto free_out; - - /* - * Filter out conninits and useless packet types - */ - if ((cb->nsp_flags & 0x0c) == 0x08) { - switch (cb->nsp_flags & 0x70) { - case 0x00: /* NOP */ - case 0x70: /* Reserved */ - case 0x50: /* Reserved, Phase II node init */ - goto free_out; - case 0x10: - case 0x60: - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) - goto free_out; - sk = dn_find_listener(skb, &reason); - goto got_it; - } - } - - if (!pskb_may_pull(skb, 3)) - goto free_out; - - /* - * Grab the destination address. - */ - cb->dst_port = *(__le16 *)ptr; - cb->src_port = 0; - ptr += 2; - - /* - * If not a connack, grab the source address too. - */ - if (pskb_may_pull(skb, 5)) { - cb->src_port = *(__le16 *)ptr; - ptr += 2; - skb_pull(skb, 5); - } - - /* - * Returned packets... - * Swap src & dst and look up in the normal way. - */ - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { - swap(cb->dst_port, cb->src_port); - swap(cb->dst, cb->src); - } - - /* - * Find the socket to which this skb is destined. - */ - sk = dn_find_by_skb(skb); -got_it: - if (sk != NULL) { - struct dn_scp *scp = DN_SK(sk); - - /* Reset backoff */ - scp->nsp_rxtshift = 0; - - /* - * We linearize everything except data segments here. - */ - if (cb->nsp_flags & ~0x60) { - if (unlikely(skb_linearize(skb))) - goto free_out; - } - - return sk_receive_skb(sk, skb, 0); - } - - return dn_nsp_no_socket(skb, reason); - -free_out: - kfree_skb(skb); - return NET_RX_DROP; -} - -int dn_nsp_rx(struct sk_buff *skb) -{ - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, - &init_net, NULL, skb, skb->dev, NULL, - dn_nsp_rx_packet); -} - -/* - * This is the main receive routine for sockets. It is called - * from the above when the socket is not busy, and also from - * sock_release() when there is a backlog queued up. - */ -int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - if (cb->rt_flags & DN_RT_F_RTS) { - if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68) - dn_returned_conn_init(sk, skb); - else - kfree_skb(skb); - return NET_RX_SUCCESS; - } - - /* - * Control packet. - */ - if ((cb->nsp_flags & 0x0c) == 0x08) { - switch (cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: - dn_nsp_conn_init(sk, skb); - break; - case 0x20: - dn_nsp_conn_conf(sk, skb); - break; - case 0x30: - dn_nsp_disc_init(sk, skb); - break; - case 0x40: - dn_nsp_disc_conf(sk, skb); - break; - } - - } else if (cb->nsp_flags == 0x24) { - /* - * Special for connacks, 'cos they don't have - * ack data or ack otherdata info. - */ - dn_nsp_conn_ack(sk, skb); - } else { - int other = 1; - - /* both data and ack frames can kick a CC socket into RUN */ - if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) { - scp->state = DN_RUN; - sk->sk_state = TCP_ESTABLISHED; - sk->sk_state_change(sk); - } - - if ((cb->nsp_flags & 0x1c) == 0) - other = 0; - if (cb->nsp_flags == 0x04) - other = 0; - - /* - * Read out ack data here, this applies equally - * to data, other data, link serivce and both - * ack data and ack otherdata. - */ - dn_process_ack(sk, skb, other); - - /* - * If we've some sort of data here then call a - * suitable routine for dealing with it, otherwise - * the packet is an ack and can be discarded. - */ - if ((cb->nsp_flags & 0x0c) == 0) { - - if (scp->state != DN_RUN) - goto free_out; - - switch (cb->nsp_flags) { - case 0x10: /* LS */ - dn_nsp_linkservice(sk, skb); - break; - case 0x30: /* OD */ - dn_nsp_otherdata(sk, skb); - break; - default: - dn_nsp_data(sk, skb); - } - - } else { /* Ack, chuck it out here */ -free_out: - kfree_skb(skb); - } - } - - return NET_RX_SUCCESS; -} diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c deleted file mode 100644 index a1779de6bd9c..000000000000 --- a/net/decnet/dn_nsp_out.c +++ /dev/null @@ -1,703 +0,0 @@ -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Network Services Protocol (Output) - * - * Author: Eduardo Marcelo Serrat <emserrat@geocities.com> - * - * Changes: - * - * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from - * original dn_nsp.c. - * Steve Whitehouse: Updated to work with my new routing architecture. - * Steve Whitehouse: Added changes from Eduardo Serrat's patches. - * Steve Whitehouse: Now conninits have the "return" bit set. - * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL! - * Moved output state machine into one function - * Steve Whitehouse: New output state machine - * Paul Koning: Connect Confirm message fix. - * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets. - * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean - * Steve Whitehouse: Moved dn_nsp_send() in here from route.h - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*******************************************************************************/ - -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/string.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/inet.h> -#include <linux/route.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <linux/fcntl.h> -#include <linux/mm.h> -#include <linux/termios.h> -#include <linux/interrupt.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/if_packet.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/dn.h> -#include <net/dn_nsp.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> - - -static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; - -static void dn_nsp_send(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - struct dn_scp *scp = DN_SK(sk); - struct dst_entry *dst; - struct flowidn fld; - - skb_reset_transport_header(skb); - scp->stamp = jiffies; - - dst = sk_dst_check(sk, 0); - if (dst) { -try_again: - skb_dst_set(skb, dst); - dst_output(&init_net, skb->sk, skb); - return; - } - - memset(&fld, 0, sizeof(fld)); - fld.flowidn_oif = sk->sk_bound_dev_if; - fld.saddr = dn_saddr2dn(&scp->addr); - fld.daddr = dn_saddr2dn(&scp->peer); - dn_sk_ports_copy(&fld, scp); - fld.flowidn_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) { - dst = sk_dst_get(sk); - sk->sk_route_caps = dst->dev->features; - goto try_again; - } - - sk->sk_err = EHOSTUNREACH; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); -} - - -/* - * If sk == NULL, then we assume that we are supposed to be making - * a routing layer skb. If sk != NULL, then we are supposed to be - * creating an skb for the NSP layer. - * - * The eventual aim is for each socket to have a cached header size - * for its outgoing packets, and to set hdr from this when sk != NULL. - */ -struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri) -{ - struct sk_buff *skb; - int hdr = 64; - - if ((skb = alloc_skb(size + hdr, pri)) == NULL) - return NULL; - - skb->protocol = htons(ETH_P_DNA_RT); - skb->pkt_type = PACKET_OUTGOING; - - if (sk) - skb_set_owner_w(skb, sk); - - skb_reserve(skb, hdr); - - return skb; -} - -/* - * Calculate persist timer based upon the smoothed round - * trip time and the variance. Backoff according to the - * nsp_backoff[] array. - */ -unsigned long dn_nsp_persist(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1; - - t *= nsp_backoff[scp->nsp_rxtshift]; - - if (t < HZ) t = HZ; - if (t > (600*HZ)) t = (600*HZ); - - if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT) - scp->nsp_rxtshift++; - - /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */ - - return t; -} - -/* - * This is called each time we get an estimate for the rtt - * on the link. - */ -static void dn_nsp_rtt(struct sock *sk, long rtt) -{ - struct dn_scp *scp = DN_SK(sk); - long srtt = (long)scp->nsp_srtt; - long rttvar = (long)scp->nsp_rttvar; - long delta; - - /* - * If the jiffies clock flips over in the middle of timestamp - * gathering this value might turn out negative, so we make sure - * that is it always positive here. - */ - if (rtt < 0) - rtt = -rtt; - /* - * Add new rtt to smoothed average - */ - delta = ((rtt << 3) - srtt); - srtt += (delta >> 3); - if (srtt >= 1) - scp->nsp_srtt = (unsigned long)srtt; - else - scp->nsp_srtt = 1; - - /* - * Add new rtt varience to smoothed varience - */ - delta >>= 1; - rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2); - if (rttvar >= 1) - scp->nsp_rttvar = (unsigned long)rttvar; - else - scp->nsp_rttvar = 1; - - /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */ -} - -/** - * dn_nsp_clone_and_send - Send a data packet by cloning it - * @skb: The packet to clone and transmit - * @gfp: memory allocation flag - * - * Clone a queued data or other data packet and transmit it. - * - * Returns: The number of times the packet has been sent previously - */ -static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb, - gfp_t gfp) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct sk_buff *skb2; - int ret = 0; - - if ((skb2 = skb_clone(skb, gfp)) != NULL) { - ret = cb->xmit_count; - cb->xmit_count++; - cb->stamp = jiffies; - skb2->sk = skb->sk; - dn_nsp_send(skb2); - } - - return ret; -} - -/** - * dn_nsp_output - Try and send something from socket queues - * @sk: The socket whose queues are to be investigated - * - * Try and send the packet on the end of the data and other data queues. - * Other data gets priority over data, and if we retransmit a packet we - * reduce the window by dividing it in two. - * - */ -void dn_nsp_output(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb; - unsigned int reduce_win = 0; - - /* - * First we check for otherdata/linkservice messages - */ - if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL) - reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC); - - /* - * If we may not send any data, we don't. - * If we are still trying to get some other data down the - * channel, we don't try and send any data. - */ - if (reduce_win || (scp->flowrem_sw != DN_SEND)) - goto recalc_window; - - if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL) - reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC); - - /* - * If we've sent any frame more than once, we cut the - * send window size in half. There is always a minimum - * window size of one available. - */ -recalc_window: - if (reduce_win) { - scp->snd_window >>= 1; - if (scp->snd_window < NSP_MIN_WINDOW) - scp->snd_window = NSP_MIN_WINDOW; - } -} - -int dn_nsp_xmit_timeout(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - dn_nsp_output(sk); - - if (!skb_queue_empty(&scp->data_xmit_queue) || - !skb_queue_empty(&scp->other_xmit_queue)) - scp->persist = dn_nsp_persist(sk); - - return 0; -} - -static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len) -{ - unsigned char *ptr = skb_push(skb, len); - - BUG_ON(len < 5); - - *ptr++ = msgflag; - *((__le16 *)ptr) = scp->addrrem; - ptr += 2; - *((__le16 *)ptr) = scp->addrloc; - ptr += 2; - return (__le16 __force *)ptr; -} - -static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other) -{ - struct dn_scp *scp = DN_SK(sk); - unsigned short acknum = scp->numdat_rcv & 0x0FFF; - unsigned short ackcrs = scp->numoth_rcv & 0x0FFF; - __le16 *ptr; - - BUG_ON(hlen < 9); - - scp->ackxmt_dat = acknum; - scp->ackxmt_oth = ackcrs; - acknum |= 0x8000; - ackcrs |= 0x8000; - - /* If this is an "other data/ack" message, swap acknum and ackcrs */ - if (other) - swap(acknum, ackcrs); - - /* Set "cross subchannel" bit in ackcrs */ - ackcrs |= 0x2000; - - ptr = dn_mk_common_header(scp, skb, msgflag, hlen); - - *ptr++ = cpu_to_le16(acknum); - *ptr++ = cpu_to_le16(ackcrs); - - return ptr; -} - -static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth); - - if (unlikely(oth)) { - cb->segnum = scp->numoth; - seq_add(&scp->numoth, 1); - } else { - cb->segnum = scp->numdat; - seq_add(&scp->numdat, 1); - } - *(ptr++) = cpu_to_le16(cb->segnum); - - return ptr; -} - -void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb, - gfp_t gfp, int oth) -{ - struct dn_scp *scp = DN_SK(sk); - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1; - - cb->xmit_count = 0; - dn_nsp_mk_data_header(sk, skb, oth); - - /* - * Slow start: If we have been idle for more than - * one RTT, then reset window to min size. - */ - if ((jiffies - scp->stamp) > t) - scp->snd_window = NSP_MIN_WINDOW; - - if (oth) - skb_queue_tail(&scp->other_xmit_queue, skb); - else - skb_queue_tail(&scp->data_xmit_queue, skb); - - if (scp->flowrem_sw != DN_SEND) - return; - - dn_nsp_clone_and_send(skb, gfp); -} - - -int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb2, *n, *ack = NULL; - int wakeup = 0; - int try_retrans = 0; - unsigned long reftime = cb->stamp; - unsigned long pkttime; - unsigned short xmit_count; - unsigned short segnum; - - skb_queue_walk_safe(q, skb2, n) { - struct dn_skb_cb *cb2 = DN_SKB_CB(skb2); - - if (dn_before_or_equal(cb2->segnum, acknum)) - ack = skb2; - - /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */ - - if (ack == NULL) - continue; - - /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */ - - /* Does _last_ packet acked have xmit_count > 1 */ - try_retrans = 0; - /* Remember to wake up the sending process */ - wakeup = 1; - /* Keep various statistics */ - pkttime = cb2->stamp; - xmit_count = cb2->xmit_count; - segnum = cb2->segnum; - /* Remove and drop ack'ed packet */ - skb_unlink(ack, q); - kfree_skb(ack); - ack = NULL; - - /* - * We don't expect to see acknowledgements for packets we - * haven't sent yet. - */ - WARN_ON(xmit_count == 0); - - /* - * If the packet has only been sent once, we can use it - * to calculate the RTT and also open the window a little - * further. - */ - if (xmit_count == 1) { - if (dn_equal(segnum, acknum)) - dn_nsp_rtt(sk, (long)(pkttime - reftime)); - - if (scp->snd_window < scp->max_window) - scp->snd_window++; - } - - /* - * Packet has been sent more than once. If this is the last - * packet to be acknowledged then we want to send the next - * packet in the send queue again (assumes the remote host does - * go-back-N error control). - */ - if (xmit_count > 1) - try_retrans = 1; - } - - if (try_retrans) - dn_nsp_output(sk); - - return wakeup; -} - -void dn_nsp_send_data_ack(struct sock *sk) -{ - struct sk_buff *skb = NULL; - - if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, 9); - dn_mk_ack_header(sk, skb, 0x04, 9, 0); - dn_nsp_send(skb); -} - -void dn_nsp_send_oth_ack(struct sock *sk) -{ - struct sk_buff *skb = NULL; - - if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL) - return; - - skb_reserve(skb, 9); - dn_mk_ack_header(sk, skb, 0x14, 9, 1); - dn_nsp_send(skb); -} - - -void dn_send_conn_ack (struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb = NULL; - struct nsp_conn_ack_msg *msg; - - if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL) - return; - - msg = skb_put(skb, 3); - msg->msgflg = 0x24; - msg->dstaddr = scp->addrrem; - - dn_nsp_send(skb); -} - -static int dn_nsp_retrans_conn_conf(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CC) - dn_send_conn_conf(sk, GFP_ATOMIC); - - return 0; -} - -void dn_send_conn_conf(struct sock *sk, gfp_t gfp) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb = NULL; - struct nsp_conn_init_msg *msg; - __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); - - if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) - return; - - msg = skb_put(skb, sizeof(*msg)); - msg->msgflg = 0x28; - msg->dstaddr = scp->addrrem; - msg->srcaddr = scp->addrloc; - msg->services = scp->services_loc; - msg->info = scp->info_loc; - msg->segsize = cpu_to_le16(scp->segsize_loc); - - skb_put_u8(skb, len); - - if (len > 0) - skb_put_data(skb, scp->conndata_out.opt_data, len); - - - dn_nsp_send(skb); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_retrans_conn_conf; -} - - -static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, gfp_t gfp, - struct dst_entry *dst, - int ddl, unsigned char *dd, __le16 rem, __le16 loc) -{ - struct sk_buff *skb = NULL; - int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0); - unsigned char *msg; - - if ((dst == NULL) || (rem == 0)) { - net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", - le16_to_cpu(rem), dst); - return; - } - - if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL) - return; - - msg = skb_put(skb, size); - *msg++ = msgflg; - *(__le16 *)msg = rem; - msg += 2; - *(__le16 *)msg = loc; - msg += 2; - *(__le16 *)msg = cpu_to_le16(reason); - msg += 2; - if (msgflg == NSP_DISCINIT) - *msg++ = ddl; - - if (ddl) { - memcpy(msg, dd, ddl); - } - - /* - * This doesn't go via the dn_nsp_send() function since we need - * to be able to send disc packets out which have no socket - * associations. - */ - skb_dst_set(skb, dst_clone(dst)); - dst_output(&init_net, skb->sk, skb); -} - - -void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, - unsigned short reason, gfp_t gfp) -{ - struct dn_scp *scp = DN_SK(sk); - int ddl = 0; - - if (msgflg == NSP_DISCINIT) - ddl = le16_to_cpu(scp->discdata_out.opt_optl); - - if (reason == 0) - reason = le16_to_cpu(scp->discdata_out.opt_status); - - dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl, - scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); -} - - -void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg, - unsigned short reason) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - int ddl = 0; - gfp_t gfp = GFP_ATOMIC; - - dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl, - NULL, cb->src_port, cb->dst_port); -} - - -void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval) -{ - struct dn_scp *scp = DN_SK(sk); - struct sk_buff *skb; - unsigned char *ptr; - gfp_t gfp = GFP_ATOMIC; - - if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL) - return; - - skb_reserve(skb, DN_MAX_NSP_DATA_HEADER); - ptr = skb_put(skb, 2); - DN_SKB_CB(skb)->nsp_flags = 0x10; - *ptr++ = lsflags; - *ptr = fcval; - - dn_nsp_queue_xmit(sk, skb, gfp, 1); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_xmit_timeout; -} - -static int dn_nsp_retrans_conninit(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->state == DN_CI) - dn_nsp_send_conninit(sk, NSP_RCI); - - return 0; -} - -void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) -{ - struct dn_scp *scp = DN_SK(sk); - struct nsp_conn_init_msg *msg; - unsigned char aux; - unsigned char menuver; - struct dn_skb_cb *cb; - unsigned char type = 1; - gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC; - struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation); - - if (!skb) - return; - - cb = DN_SKB_CB(skb); - msg = skb_put(skb, sizeof(*msg)); - - msg->msgflg = msgflg; - msg->dstaddr = 0x0000; /* Remote Node will assign it*/ - - msg->srcaddr = scp->addrloc; - msg->services = scp->services_loc; /* Requested flow control */ - msg->info = scp->info_loc; /* Version Number */ - msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */ - - if (scp->peer.sdn_objnum) - type = 0; - - skb_put(skb, dn_sockaddr2username(&scp->peer, - skb_tail_pointer(skb), type)); - skb_put(skb, dn_sockaddr2username(&scp->addr, - skb_tail_pointer(skb), 2)); - - menuver = DN_MENUVER_ACC | DN_MENUVER_USR; - if (scp->peer.sdn_flags & SDF_PROXY) - menuver |= DN_MENUVER_PRX; - if (scp->peer.sdn_flags & SDF_UICPROXY) - menuver |= DN_MENUVER_UIC; - - skb_put_u8(skb, menuver); /* Menu Version */ - - aux = scp->accessdata.acc_userl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_user, aux); - - aux = scp->accessdata.acc_passl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_pass, aux); - - aux = scp->accessdata.acc_accl; - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->accessdata.acc_acc, aux); - - aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); - skb_put_u8(skb, aux); - if (aux > 0) - skb_put_data(skb, scp->conndata_out.opt_data, aux); - - scp->persist = dn_nsp_persist(sk); - scp->persist_fxn = dn_nsp_retrans_conninit; - - cb->rt_flags = DN_RT_F_RQR; - - dn_nsp_send(skb); -} diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c deleted file mode 100644 index 658191fba94e..000000000000 --- a/net/decnet/dn_route.c +++ /dev/null @@ -1,1929 +0,0 @@ -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Functions (Endnode and Router) - * - * Authors: Steve Whitehouse <SteveW@ACM.org> - * Eduardo Marcelo Serrat <emserrat@geocities.com> - * - * Changes: - * Steve Whitehouse : Fixes to allow "intra-ethernet" and - * "return-to-sender" bits on outgoing - * packets. - * Steve Whitehouse : Timeouts for cached routes. - * Steve Whitehouse : Use dst cache for input routes too. - * Steve Whitehouse : Fixed error values in dn_send_skb. - * Steve Whitehouse : Rework routing functions to better fit - * DECnet routing design - * Alexey Kuznetsov : New SMP locking - * Steve Whitehouse : More SMP locking changes & dn_cache_dump() - * Steve Whitehouse : Prerouting NF hook, now really is prerouting. - * Fixed possible skb leak in rtnetlink funcs. - * Steve Whitehouse : Dave Miller's dynamic hash table sizing and - * Alexey Kuznetsov's finer grained locking - * from ipv4/route.c. - * Steve Whitehouse : Routing is now starting to look like a - * sensible set of code now, mainly due to - * my copying the IPv4 routing code. The - * hooks here are modified and will continue - * to evolve for a while. - * Steve Whitehouse : Real SMP at last :-) Also new netfilter - * stuff. Look out raw sockets your days - * are numbered! - * Steve Whitehouse : Added return-to-sender functions. Added - * backlog congestion level return codes. - * Steve Whitehouse : Fixed bug where routes were set up with - * no ref count on net devices. - * Steve Whitehouse : RCU for the route cache - * Steve Whitehouse : Preparations for the flow cache - * Steve Whitehouse : Prepare for nonlinear skbs - */ - -/****************************************************************************** - (c) 1995-1998 E.M. Serrat emserrat@geocities.com - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*******************************************************************************/ - -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/sockios.h> -#include <linux/net.h> -#include <linux/netdevice.h> -#include <linux/inet.h> -#include <linux/route.h> -#include <linux/in_route.h> -#include <linux/slab.h> -#include <net/sock.h> -#include <linux/mm.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/rtnetlink.h> -#include <linux/string.h> -#include <linux/netfilter_decnet.h> -#include <linux/rcupdate.h> -#include <linux/times.h> -#include <linux/export.h> -#include <asm/errno.h> -#include <net/net_namespace.h> -#include <net/netlink.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/fib_rules.h> -#include <net/dn.h> -#include <net/dn_dev.h> -#include <net/dn_nsp.h> -#include <net/dn_route.h> -#include <net/dn_neigh.h> -#include <net/dn_fib.h> - -struct dn_rt_hash_bucket -{ - struct dn_route __rcu *chain; - spinlock_t lock; -}; - -extern struct neigh_table dn_neigh_table; - - -static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00}; - -static const int dn_rt_min_delay = 2 * HZ; -static const int dn_rt_max_delay = 10 * HZ; -static const int dn_rt_mtu_expires = 10 * 60 * HZ; - -static unsigned long dn_rt_deadline; - -static int dn_dst_gc(struct dst_ops *ops); -static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); -static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_mtu(const struct dst_entry *dst); -static void dn_dst_destroy(struct dst_entry *); -static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); -static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu, - bool confirm_neigh); -static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb); -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr); -static int dn_route_input(struct sk_buff *); -static void dn_run_flush(struct timer_list *unused); - -static struct dn_rt_hash_bucket *dn_rt_hash_table; -static unsigned int dn_rt_hash_mask; - -static struct timer_list dn_route_timer; -static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush); -int decnet_dst_gc_interval = 2; - -static struct dst_ops dn_dst_ops = { - .family = PF_DECnet, - .gc_thresh = 128, - .gc = dn_dst_gc, - .check = dn_dst_check, - .default_advmss = dn_dst_default_advmss, - .mtu = dn_dst_mtu, - .cow_metrics = dst_cow_metrics_generic, - .destroy = dn_dst_destroy, - .ifdown = dn_dst_ifdown, - .negative_advice = dn_dst_negative_advice, - .link_failure = dn_dst_link_failure, - .update_pmtu = dn_dst_update_pmtu, - .redirect = dn_dst_redirect, - .neigh_lookup = dn_dst_neigh_lookup, -}; - -static void dn_dst_destroy(struct dst_entry *dst) -{ - struct dn_route *rt = (struct dn_route *) dst; - - if (rt->n) - neigh_release(rt->n); - dst_destroy_metrics_generic(dst); -} - -static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) -{ - if (how) { - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - - if (n && n->dev == dev) { - n->dev = dev_net(dev)->loopback_dev; - dev_hold(n->dev); - dev_put(dev); - } - } -} - -static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) -{ - __u16 tmp = (__u16 __force)(src ^ dst); - tmp ^= (tmp >> 3); - tmp ^= (tmp >> 5); - tmp ^= (tmp >> 10); - return dn_rt_hash_mask & (unsigned int)tmp; -} - -static void dn_dst_check_expire(struct timer_list *unused) -{ - int i; - struct dn_route *rt; - struct dn_route __rcu **rtp; - unsigned long now = jiffies; - unsigned long expire = 120 * HZ; - - for (i = 0; i <= dn_rt_hash_mask; i++) { - rtp = &dn_rt_hash_table[i].chain; - - spin_lock(&dn_rt_hash_table[i].lock); - while ((rt = rcu_dereference_protected(*rtp, - lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) > 1 || - (now - rt->dst.lastuse) < expire) { - rtp = &rt->dn_next; - continue; - } - *rtp = rt->dn_next; - rt->dn_next = NULL; - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } - spin_unlock(&dn_rt_hash_table[i].lock); - - if ((jiffies - now) > 0) - break; - } - - mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ); -} - -static int dn_dst_gc(struct dst_ops *ops) -{ - struct dn_route *rt; - struct dn_route __rcu **rtp; - int i; - unsigned long now = jiffies; - unsigned long expire = 10 * HZ; - - for (i = 0; i <= dn_rt_hash_mask; i++) { - - spin_lock_bh(&dn_rt_hash_table[i].lock); - rtp = &dn_rt_hash_table[i].chain; - - while ((rt = rcu_dereference_protected(*rtp, - lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) { - if (atomic_read(&rt->dst.__refcnt) > 1 || - (now - rt->dst.lastuse) < expire) { - rtp = &rt->dn_next; - continue; - } - *rtp = rt->dn_next; - rt->dn_next = NULL; - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - break; - } - spin_unlock_bh(&dn_rt_hash_table[i].lock); - } - - return 0; -} - -/* - * The decnet standards don't impose a particular minimum mtu, what they - * do insist on is that the routing layer accepts a datagram of at least - * 230 bytes long. Here we have to subtract the routing header length from - * 230 to get the minimum acceptable mtu. If there is no neighbour, then we - * assume the worst and use a long header size. - * - * We update both the mtu and the advertised mss (i.e. the segment size we - * advertise to the other end). - */ -static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ - struct dn_route *rt = (struct dn_route *) dst; - struct neighbour *n = rt->n; - u32 min_mtu = 230; - struct dn_dev *dn; - - dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; - - if (dn && dn->use_long == 0) - min_mtu -= 6; - else - min_mtu -= 21; - - if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { - if (!(dst_metric_locked(dst, RTAX_MTU))) { - dst_metric_set(dst, RTAX_MTU, mtu); - dst_set_expires(dst, dn_rt_mtu_expires); - } - if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { - u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS); - if (!existing_mss || existing_mss > mss) - dst_metric_set(dst, RTAX_ADVMSS, mss); - } - } -} - -static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - -/* - * When a route has been marked obsolete. (e.g. routing cache flush) - */ -static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie) -{ - return NULL; -} - -static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst) -{ - dst_release(dst); - return NULL; -} - -static void dn_dst_link_failure(struct sk_buff *skb) -{ -} - -static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2) -{ - return ((fl1->daddr ^ fl2->daddr) | - (fl1->saddr ^ fl2->saddr) | - (fl1->flowidn_mark ^ fl2->flowidn_mark) | - (fl1->flowidn_scope ^ fl2->flowidn_scope) | - (fl1->flowidn_oif ^ fl2->flowidn_oif) | - (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0; -} - -static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp) -{ - struct dn_route *rth; - struct dn_route __rcu **rthp; - unsigned long now = jiffies; - - rthp = &dn_rt_hash_table[hash].chain; - - spin_lock_bh(&dn_rt_hash_table[hash].lock); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { - if (compare_keys(&rth->fld, &rt->fld)) { - /* Put it first */ - *rthp = rth->dn_next; - rcu_assign_pointer(rth->dn_next, - dn_rt_hash_table[hash].chain); - rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - - dst_hold_and_use(&rth->dst, now); - spin_unlock_bh(&dn_rt_hash_table[hash].lock); - - dst_release_immediate(&rt->dst); - *rp = rth; - return 0; - } - rthp = &rth->dn_next; - } - - rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain); - rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - - dst_hold_and_use(&rt->dst, now); - spin_unlock_bh(&dn_rt_hash_table[hash].lock); - *rp = rt; - return 0; -} - -static void dn_run_flush(struct timer_list *unused) -{ - int i; - struct dn_route *rt, *next; - - for (i = 0; i < dn_rt_hash_mask; i++) { - spin_lock_bh(&dn_rt_hash_table[i].lock); - - if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL) - goto nothing_to_declare; - - for(; rt; rt = next) { - next = rcu_dereference_raw(rt->dn_next); - RCU_INIT_POINTER(rt->dn_next, NULL); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } - -nothing_to_declare: - spin_unlock_bh(&dn_rt_hash_table[i].lock); - } -} - -static DEFINE_SPINLOCK(dn_rt_flush_lock); - -void dn_rt_cache_flush(int delay) -{ - unsigned long now = jiffies; - int user_mode = !in_interrupt(); - - if (delay < 0) - delay = dn_rt_min_delay; - - spin_lock_bh(&dn_rt_flush_lock); - - if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) { - long tmo = (long)(dn_rt_deadline - now); - - if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay) - tmo = 0; - - if (delay > tmo) - delay = tmo; - } - - if (delay <= 0) { - spin_unlock_bh(&dn_rt_flush_lock); - dn_run_flush(NULL); - return; - } - - if (dn_rt_deadline == 0) - dn_rt_deadline = now + dn_rt_max_delay; - - dn_rt_flush_timer.expires = now + delay; - add_timer(&dn_rt_flush_timer); - spin_unlock_bh(&dn_rt_flush_lock); -} - -/** - * dn_return_short - Return a short packet to its sender - * @skb: The packet to return - * - */ -static int dn_return_short(struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - unsigned char *ptr; - __le16 *src; - __le16 *dst; - - /* Add back headers */ - skb_push(skb, skb->data - skb_network_header(skb)); - - if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) - return NET_RX_DROP; - - cb = DN_SKB_CB(skb); - /* Skip packet length and point to flags */ - ptr = skb->data + 2; - *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; - - dst = (__le16 *)ptr; - ptr += 2; - src = (__le16 *)ptr; - ptr += 2; - *ptr = 0; /* Zero hop count */ - - swap(*src, *dst); - - skb->pkt_type = PACKET_OUTGOING; - dn_rt_finish_output(skb, NULL, NULL); - return NET_RX_SUCCESS; -} - -/** - * dn_return_long - Return a long packet to its sender - * @skb: The long format packet to return - * - */ -static int dn_return_long(struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - unsigned char *ptr; - unsigned char *src_addr, *dst_addr; - unsigned char tmp[ETH_ALEN]; - - /* Add back all headers */ - skb_push(skb, skb->data - skb_network_header(skb)); - - if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) - return NET_RX_DROP; - - cb = DN_SKB_CB(skb); - /* Ignore packet length and point to flags */ - ptr = skb->data + 2; - - /* Skip padding */ - if (*ptr & DN_RT_F_PF) { - char padlen = (*ptr & ~DN_RT_F_PF); - ptr += padlen; - } - - *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; - ptr += 2; - dst_addr = ptr; - ptr += 8; - src_addr = ptr; - ptr += 6; - *ptr = 0; /* Zero hop count */ - - /* Swap source and destination */ - memcpy(tmp, src_addr, ETH_ALEN); - memcpy(src_addr, dst_addr, ETH_ALEN); - memcpy(dst_addr, tmp, ETH_ALEN); - - skb->pkt_type = PACKET_OUTGOING; - dn_rt_finish_output(skb, dst_addr, src_addr); - return NET_RX_SUCCESS; -} - -/** - * dn_route_rx_packet - Try and find a route for an incoming packet - * @skb: The packet to find a route for - * - * Returns: result of input function if route is found, error code otherwise - */ -static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb; - int err; - - if ((err = dn_route_input(skb)) == 0) - return dst_input(skb); - - cb = DN_SKB_CB(skb); - if (decnet_debug_level & 4) { - char *devname = skb->dev ? skb->dev->name : "???"; - - printk(KERN_DEBUG - "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", - (int)cb->rt_flags, devname, skb->len, - le16_to_cpu(cb->src), le16_to_cpu(cb->dst), - err, skb->pkt_type); - } - - if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) { - switch (cb->rt_flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_SHORT: - return dn_return_short(skb); - case DN_RT_PKT_LONG: - return dn_return_long(skb); - } - } - - kfree_skb(skb); - return NET_RX_DROP; -} - -static int dn_route_rx_long(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned char *ptr = skb->data; - - if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */ - goto drop_it; - - skb_pull(skb, 20); - skb_reset_transport_header(skb); - - /* Destination info */ - ptr += 2; - cb->dst = dn_eth2dn(ptr); - if (memcmp(ptr, dn_hiord_addr, 4) != 0) - goto drop_it; - ptr += 6; - - - /* Source info */ - ptr += 2; - cb->src = dn_eth2dn(ptr); - if (memcmp(ptr, dn_hiord_addr, 4) != 0) - goto drop_it; - ptr += 6; - /* Other junk */ - ptr++; - cb->hops = *ptr++; /* Visit Count */ - - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_rx_packet); - -drop_it: - kfree_skb(skb); - return NET_RX_DROP; -} - - - -static int dn_route_rx_short(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned char *ptr = skb->data; - - if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */ - goto drop_it; - - skb_pull(skb, 5); - skb_reset_transport_header(skb); - - cb->dst = *(__le16 *)ptr; - ptr += 2; - cb->src = *(__le16 *)ptr; - ptr += 2; - cb->hops = *ptr & 0x3f; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_rx_packet); - -drop_it: - kfree_skb(skb); - return NET_RX_DROP; -} - -static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - /* - * I know we drop the packet here, but thats considered success in - * this case - */ - kfree_skb(skb); - return NET_RX_SUCCESS; -} - -static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - dn_dev_hello(skb); - dn_neigh_pointopoint_hello(skb); - return NET_RX_SUCCESS; -} - -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) -{ - struct dn_skb_cb *cb; - unsigned char flags = 0; - __u16 len = le16_to_cpu(*(__le16 *)skb->data); - struct dn_dev *dn = rcu_dereference(dev->dn_ptr); - unsigned char padlen = 0; - - if (!net_eq(dev_net(dev), &init_net)) - goto dump_it; - - if (dn == NULL) - goto dump_it; - - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - goto out; - - if (!pskb_may_pull(skb, 3)) - goto dump_it; - - skb_pull(skb, 2); - - if (len > skb->len) - goto dump_it; - - skb_trim(skb, len); - - flags = *skb->data; - - cb = DN_SKB_CB(skb); - cb->stamp = jiffies; - cb->iif = dev->ifindex; - - /* - * If we have padding, remove it. - */ - if (flags & DN_RT_F_PF) { - padlen = flags & ~DN_RT_F_PF; - if (!pskb_may_pull(skb, padlen + 1)) - goto dump_it; - skb_pull(skb, padlen); - flags = *skb->data; - } - - skb_reset_network_header(skb); - - /* - * Weed out future version DECnet - */ - if (flags & DN_RT_F_VER) - goto dump_it; - - cb->rt_flags = flags; - - if (decnet_debug_level & 1) - printk(KERN_DEBUG - "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n", - (int)flags, (dev) ? dev->name : "???", len, skb->len, - padlen); - - if (flags & DN_RT_PKT_CNTL) { - if (unlikely(skb_linearize(skb))) - goto dump_it; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_INIT: - dn_dev_init_pkt(skb); - break; - case DN_RT_PKT_VERI: - dn_dev_veri_pkt(skb); - break; - } - - if (dn->parms.state != DN_DEV_S_RU) - goto dump_it; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_HELO: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_ptp_hello); - - case DN_RT_PKT_L1RT: - case DN_RT_PKT_L2RT: - return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - &init_net, NULL, skb, skb->dev, NULL, - dn_route_discard); - case DN_RT_PKT_ERTH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_neigh_router_hello); - - case DN_RT_PKT_EEDH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - &init_net, NULL, skb, skb->dev, NULL, - dn_neigh_endnode_hello); - } - } else { - if (dn->parms.state != DN_DEV_S_RU) - goto dump_it; - - skb_pull(skb, 1); /* Pull flags */ - - switch (flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_LONG: - return dn_route_rx_long(skb); - case DN_RT_PKT_SHORT: - return dn_route_rx_short(skb); - } - } - -dump_it: - kfree_skb(skb); -out: - return NET_RX_DROP; -} - -static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct dn_route *rt = (struct dn_route *)dst; - struct net_device *dev = dst->dev; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - int err = -EINVAL; - - if (rt->n == NULL) - goto error; - - skb->dev = dev; - - cb->src = rt->rt_saddr; - cb->dst = rt->rt_daddr; - - /* - * Always set the Intra-Ethernet bit on all outgoing packets - * originated on this node. Only valid flag from upper layers - * is return-to-sender-requested. Set hop count to 0 too. - */ - cb->rt_flags &= ~DN_RT_F_RQR; - cb->rt_flags |= DN_RT_F_IE; - cb->hops = 0; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, - &init_net, sk, skb, NULL, dev, - dn_to_neigh_output); - -error: - net_dbg_ratelimited("dn_output: This should not happen\n"); - - kfree_skb(skb); - - return err; -} - -static int dn_forward(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct dst_entry *dst = skb_dst(skb); - struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); - struct dn_route *rt; - int header_len; - struct net_device *dev = skb->dev; - - if (skb->pkt_type != PACKET_HOST) - goto drop; - - /* Ensure that we have enough space for headers */ - rt = (struct dn_route *)skb_dst(skb); - header_len = dn_db->use_long ? 21 : 6; - if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len)) - goto drop; - - /* - * Hop count exceeded. - */ - if (++cb->hops > 30) - goto drop; - - skb->dev = rt->dst.dev; - - /* - * If packet goes out same interface it came in on, then set - * the Intra-Ethernet bit. This has no effect for short - * packets, so we don't need to test for them here. - */ - cb->rt_flags &= ~DN_RT_F_IE; - if (rt->rt_flags & RTCF_DOREDIRECT) - cb->rt_flags |= DN_RT_F_IE; - - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, - &init_net, NULL, skb, dev, skb->dev, - dn_to_neigh_output); - -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -/* - * Used to catch bugs. This should never normally get - * called. - */ -static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", - le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); - - kfree_skb(skb); - - return NET_RX_DROP; -} - -static int dn_rt_bug(struct sk_buff *skb) -{ - struct dn_skb_cb *cb = DN_SKB_CB(skb); - - net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n", - le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); - - kfree_skb(skb); - - return NET_RX_DROP; -} - -static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) -{ - return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); -} - -static unsigned int dn_dst_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) -{ - return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); -} - -static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) -{ - struct dn_fib_info *fi = res->fi; - struct net_device *dev = rt->dst.dev; - unsigned int mss_metric; - struct neighbour *n; - - if (fi) { - if (DN_FIB_RES_GW(*res) && - DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_init_metrics(&rt->dst, fi->fib_metrics, true); - } - rt->rt_type = res->type; - - if (dev != NULL && rt->n == NULL) { - n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - rt->n = n; - } - - if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) - dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (mss_metric) { - unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (mss_metric > mss) - dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); - } - return 0; -} - -static inline int dn_match_addr(__le16 addr1, __le16 addr2) -{ - __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2); - int match = 16; - while(tmp) { - tmp >>= 1; - match--; - } - return match; -} - -static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) -{ - __le16 saddr = 0; - struct dn_dev *dn_db; - struct dn_ifaddr *ifa; - int best_match = 0; - int ret; - - rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); - for (ifa = rcu_dereference(dn_db->ifa_list); - ifa != NULL; - ifa = rcu_dereference(ifa->ifa_next)) { - if (ifa->ifa_scope > scope) - continue; - if (!daddr) { - saddr = ifa->ifa_local; - break; - } - ret = dn_match_addr(daddr, ifa->ifa_local); - if (ret > best_match) - saddr = ifa->ifa_local; - if (best_match == 0) - saddr = ifa->ifa_local; - } - rcu_read_unlock(); - - return saddr; -} - -static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res) -{ - return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope); -} - -static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res) -{ - __le16 mask = dnet_make_mask(res->prefixlen); - return (daddr&~mask)|res->fi->fib_nh->nh_gw; -} - -static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard) -{ - struct flowidn fld = { - .daddr = oldflp->daddr, - .saddr = oldflp->saddr, - .flowidn_scope = RT_SCOPE_UNIVERSE, - .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = LOOPBACK_IFINDEX, - .flowidn_oif = oldflp->flowidn_oif, - }; - struct dn_route *rt = NULL; - struct net_device *dev_out = NULL, *dev; - struct neighbour *neigh = NULL; - unsigned int hash; - unsigned int flags = 0; - struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST }; - int err; - int free_res = 0; - __le16 gateway = 0; - - if (decnet_debug_level & 16) - printk(KERN_DEBUG - "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), - le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, LOOPBACK_IFINDEX, - oldflp->flowidn_oif); - - /* If we have an output interface, verify its a DECnet device */ - if (oldflp->flowidn_oif) { - dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif); - err = -ENODEV; - if (dev_out && dev_out->dn_ptr == NULL) { - dev_put(dev_out); - dev_out = NULL; - } - if (dev_out == NULL) - goto out; - } - - /* If we have a source address, verify that its a local address */ - if (oldflp->saddr) { - err = -EADDRNOTAVAIL; - - if (dev_out) { - if (dn_dev_islocal(dev_out, oldflp->saddr)) - goto source_ok; - dev_put(dev_out); - goto out; - } - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (!dev->dn_ptr) - continue; - if (!dn_dev_islocal(dev, oldflp->saddr)) - continue; - if ((dev->flags & IFF_LOOPBACK) && - oldflp->daddr && - !dn_dev_islocal(dev, oldflp->daddr)) - continue; - - dev_out = dev; - break; - } - rcu_read_unlock(); - if (dev_out == NULL) - goto out; - dev_hold(dev_out); -source_ok: - ; - } - - /* No destination? Assume its local */ - if (!fld.daddr) { - fld.daddr = fld.saddr; - - if (dev_out) - dev_put(dev_out); - err = -EINVAL; - dev_out = init_net.loopback_dev; - if (!dev_out->dn_ptr) - goto out; - err = -EADDRNOTAVAIL; - dev_hold(dev_out); - if (!fld.daddr) { - fld.daddr = - fld.saddr = dnet_select_source(dev_out, 0, - RT_SCOPE_HOST); - if (!fld.daddr) - goto out; - } - fld.flowidn_oif = LOOPBACK_IFINDEX; - res.type = RTN_LOCAL; - goto make_route; - } - - if (decnet_debug_level & 16) - printk(KERN_DEBUG - "dn_route_output_slow: initial checks complete." - " dst=%04x src=%04x oif=%d try_hard=%d\n", - le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), - fld.flowidn_oif, try_hard); - - /* - * N.B. If the kernel is compiled without router support then - * dn_fib_lookup() will evaluate to non-zero so this if () block - * will always be executed. - */ - err = -ESRCH; - if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) { - struct dn_dev *dn_db; - if (err != -ESRCH) - goto out; - /* - * Here the fallback is basically the standard algorithm for - * routing in endnodes which is described in the DECnet routing - * docs - * - * If we are not trying hard, look in neighbour cache. - * The result is tested to ensure that if a specific output - * device/source address was requested, then we honour that - * here - */ - if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr); - if (neigh) { - if ((oldflp->flowidn_oif && - (neigh->dev->ifindex != oldflp->flowidn_oif)) || - (oldflp->saddr && - (!dn_dev_islocal(neigh->dev, - oldflp->saddr)))) { - neigh_release(neigh); - neigh = NULL; - } else { - if (dev_out) - dev_put(dev_out); - if (dn_dev_islocal(neigh->dev, fld.daddr)) { - dev_out = init_net.loopback_dev; - res.type = RTN_LOCAL; - } else { - dev_out = neigh->dev; - } - dev_hold(dev_out); - goto select_source; - } - } - } - - /* Not there? Perhaps its a local address */ - if (dev_out == NULL) - dev_out = dn_dev_get_default(); - err = -ENODEV; - if (dev_out == NULL) - goto out; - dn_db = rcu_dereference_raw(dev_out->dn_ptr); - if (!dn_db) - goto e_inval; - /* Possible improvement - check all devices for local addr */ - if (dn_dev_islocal(dev_out, fld.daddr)) { - dev_put(dev_out); - dev_out = init_net.loopback_dev; - dev_hold(dev_out); - res.type = RTN_LOCAL; - goto select_source; - } - /* Not local either.... try sending it to the default router */ - neigh = neigh_clone(dn_db->router); - BUG_ON(neigh && neigh->dev != dev_out); - - /* Ok then, we assume its directly connected and move on */ -select_source: - if (neigh) - gateway = ((struct dn_neigh *)neigh)->addr; - if (gateway == 0) - gateway = fld.daddr; - if (fld.saddr == 0) { - fld.saddr = dnet_select_source(dev_out, gateway, - res.type == RTN_LOCAL ? - RT_SCOPE_HOST : - RT_SCOPE_LINK); - if (fld.saddr == 0 && res.type != RTN_LOCAL) - goto e_addr; - } - fld.flowidn_oif = dev_out->ifindex; - goto make_route; - } - free_res = 1; - - if (res.type == RTN_NAT) - goto e_inval; - - if (res.type == RTN_LOCAL) { - if (!fld.saddr) - fld.saddr = fld.daddr; - if (dev_out) - dev_put(dev_out); - dev_out = init_net.loopback_dev; - dev_hold(dev_out); - if (!dev_out->dn_ptr) - goto e_inval; - fld.flowidn_oif = dev_out->ifindex; - if (res.fi) - dn_fib_info_put(res.fi); - res.fi = NULL; - goto make_route; - } - - if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) - dn_fib_select_multipath(&fld, &res); - - /* - * We could add some logic to deal with default routes here and - * get rid of some of the special casing above. - */ - - if (!fld.saddr) - fld.saddr = DN_FIB_RES_PREFSRC(res); - - if (dev_out) - dev_put(dev_out); - dev_out = DN_FIB_RES_DEV(res); - dev_hold(dev_out); - fld.flowidn_oif = dev_out->ifindex; - gateway = DN_FIB_RES_GW(res); - -make_route: - if (dev_out->flags & IFF_LOOPBACK) - flags |= RTCF_LOCAL; - - rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST); - if (rt == NULL) - goto e_nobufs; - - rt->dn_next = NULL; - memset(&rt->fld, 0, sizeof(rt->fld)); - rt->fld.saddr = oldflp->saddr; - rt->fld.daddr = oldflp->daddr; - rt->fld.flowidn_oif = oldflp->flowidn_oif; - rt->fld.flowidn_iif = 0; - rt->fld.flowidn_mark = oldflp->flowidn_mark; - - rt->rt_saddr = fld.saddr; - rt->rt_daddr = fld.daddr; - rt->rt_gateway = gateway ? gateway : fld.daddr; - rt->rt_local_src = fld.saddr; - - rt->rt_dst_map = fld.daddr; - rt->rt_src_map = fld.saddr; - - rt->n = neigh; - neigh = NULL; - - rt->dst.lastuse = jiffies; - rt->dst.output = dn_output; - rt->dst.input = dn_rt_bug; - rt->rt_flags = flags; - if (flags & RTCF_LOCAL) - rt->dst.input = dn_nsp_rx; - - err = dn_rt_set_next_hop(rt, &res); - if (err) - goto e_neighbour; - - hash = dn_hash(rt->fld.saddr, rt->fld.daddr); - /* dn_insert_route() increments dst->__refcnt */ - dn_insert_route(rt, hash, (struct dn_route **)pprt); - -done: - if (neigh) - neigh_release(neigh); - if (free_res) - dn_fib_res_put(&res); - if (dev_out) - dev_put(dev_out); -out: - return err; - -e_addr: - err = -EADDRNOTAVAIL; - goto done; -e_inval: - err = -EINVAL; - goto done; -e_nobufs: - err = -ENOBUFS; - goto done; -e_neighbour: - dst_release_immediate(&rt->dst); - goto e_nobufs; -} - - -/* - * N.B. The flags may be moved into the flowi at some future stage. - */ -static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags) -{ - unsigned int hash = dn_hash(flp->saddr, flp->daddr); - struct dn_route *rt = NULL; - - if (!(flags & MSG_TRYHARD)) { - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->dn_next)) { - if ((flp->daddr == rt->fld.daddr) && - (flp->saddr == rt->fld.saddr) && - (flp->flowidn_mark == rt->fld.flowidn_mark) && - dn_is_output_route(rt) && - (rt->fld.flowidn_oif == flp->flowidn_oif)) { - dst_hold_and_use(&rt->dst, jiffies); - rcu_read_unlock_bh(); - *pprt = &rt->dst; - return 0; - } - } - rcu_read_unlock_bh(); - } - - return dn_route_output_slow(pprt, flp, flags); -} - -static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags) -{ - int err; - - err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->flowidn_proto) { - *pprt = xfrm_lookup(&init_net, *pprt, - flowidn_to_flowi(flp), NULL, 0); - if (IS_ERR(*pprt)) { - err = PTR_ERR(*pprt); - *pprt = NULL; - } - } - return err; -} - -int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags) -{ - int err; - - err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->flowidn_proto) { - *pprt = xfrm_lookup(&init_net, *pprt, - flowidn_to_flowi(fl), sk, 0); - if (IS_ERR(*pprt)) { - err = PTR_ERR(*pprt); - *pprt = NULL; - } - } - return err; -} - -static int dn_route_input_slow(struct sk_buff *skb) -{ - struct dn_route *rt = NULL; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - struct net_device *in_dev = skb->dev; - struct net_device *out_dev = NULL; - struct dn_dev *dn_db; - struct neighbour *neigh = NULL; - unsigned int hash; - int flags = 0; - __le16 gateway = 0; - __le16 local_src = 0; - struct flowidn fld = { - .daddr = cb->dst, - .saddr = cb->src, - .flowidn_scope = RT_SCOPE_UNIVERSE, - .flowidn_mark = skb->mark, - .flowidn_iif = skb->dev->ifindex, - }; - struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; - int err = -EINVAL; - int free_res = 0; - - dev_hold(in_dev); - - if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL) - goto out; - - /* Zero source addresses are not allowed */ - if (fld.saddr == 0) - goto out; - - /* - * In this case we've just received a packet from a source - * outside ourselves pretending to come from us. We don't - * allow it any further to prevent routing loops, spoofing and - * other nasties. Loopback packets already have the dst attached - * so this only affects packets which have originated elsewhere. - */ - err = -ENOTUNIQ; - if (dn_dev_islocal(in_dev, cb->src)) - goto out; - - err = dn_fib_lookup(&fld, &res); - if (err) { - if (err != -ESRCH) - goto out; - /* - * Is the destination us ? - */ - if (!dn_dev_islocal(in_dev, cb->dst)) - goto e_inval; - - res.type = RTN_LOCAL; - } else { - __le16 src_map = fld.saddr; - free_res = 1; - - out_dev = DN_FIB_RES_DEV(res); - if (out_dev == NULL) { - net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n"); - goto e_inval; - } - dev_hold(out_dev); - - if (res.r) - src_map = fld.saddr; /* no NAT support for now */ - - gateway = DN_FIB_RES_GW(res); - if (res.type == RTN_NAT) { - fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res); - dn_fib_res_put(&res); - free_res = 0; - if (dn_fib_lookup(&fld, &res)) - goto e_inval; - free_res = 1; - if (res.type != RTN_UNICAST) - goto e_inval; - flags |= RTCF_DNAT; - gateway = fld.daddr; - } - fld.saddr = src_map; - } - - switch(res.type) { - case RTN_UNICAST: - /* - * Forwarding check here, we only check for forwarding - * being turned off, if you want to only forward intra - * area, its up to you to set the routing tables up - * correctly. - */ - if (dn_db->parms.forwarding == 0) - goto e_inval; - - if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) - dn_fib_select_multipath(&fld, &res); - - /* - * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT - * flag as a hint to set the intra-ethernet bit when - * forwarding. If we've got NAT in operation, we don't do - * this optimisation. - */ - if (out_dev == in_dev && !(flags & RTCF_NAT)) - flags |= RTCF_DOREDIRECT; - - local_src = DN_FIB_RES_PREFSRC(res); - - case RTN_BLACKHOLE: - case RTN_UNREACHABLE: - break; - case RTN_LOCAL: - flags |= RTCF_LOCAL; - fld.saddr = cb->dst; - fld.daddr = cb->src; - - /* Routing tables gave us a gateway */ - if (gateway) - goto make_route; - - /* Packet was intra-ethernet, so we know its on-link */ - if (cb->rt_flags & DN_RT_F_IE) { - gateway = cb->src; - goto make_route; - } - - /* Use the default router if there is one */ - neigh = neigh_clone(dn_db->router); - if (neigh) { - gateway = ((struct dn_neigh *)neigh)->addr; - goto make_route; - } - - /* Close eyes and pray */ - gateway = cb->src; - goto make_route; - default: - goto e_inval; - } - -make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, DST_HOST); - if (rt == NULL) - goto e_nobufs; - - rt->dn_next = NULL; - memset(&rt->fld, 0, sizeof(rt->fld)); - rt->rt_saddr = fld.saddr; - rt->rt_daddr = fld.daddr; - rt->rt_gateway = fld.daddr; - if (gateway) - rt->rt_gateway = gateway; - rt->rt_local_src = local_src ? local_src : rt->rt_saddr; - - rt->rt_dst_map = fld.daddr; - rt->rt_src_map = fld.saddr; - - rt->fld.saddr = cb->src; - rt->fld.daddr = cb->dst; - rt->fld.flowidn_oif = 0; - rt->fld.flowidn_iif = in_dev->ifindex; - rt->fld.flowidn_mark = fld.flowidn_mark; - - rt->n = neigh; - rt->dst.lastuse = jiffies; - rt->dst.output = dn_rt_bug_out; - switch (res.type) { - case RTN_UNICAST: - rt->dst.input = dn_forward; - break; - case RTN_LOCAL: - rt->dst.output = dn_output; - rt->dst.input = dn_nsp_rx; - rt->dst.dev = in_dev; - flags |= RTCF_LOCAL; - break; - default: - case RTN_UNREACHABLE: - case RTN_BLACKHOLE: - rt->dst.input = dst_discard; - } - rt->rt_flags = flags; - - err = dn_rt_set_next_hop(rt, &res); - if (err) - goto e_neighbour; - - hash = dn_hash(rt->fld.saddr, rt->fld.daddr); - /* dn_insert_route() increments dst->__refcnt */ - dn_insert_route(rt, hash, &rt); - skb_dst_set(skb, &rt->dst); - -done: - if (neigh) - neigh_release(neigh); - if (free_res) - dn_fib_res_put(&res); - dev_put(in_dev); - if (out_dev) - dev_put(out_dev); -out: - return err; - -e_inval: - err = -EINVAL; - goto done; - -e_nobufs: - err = -ENOBUFS; - goto done; - -e_neighbour: - dst_release_immediate(&rt->dst); - goto done; -} - -static int dn_route_input(struct sk_buff *skb) -{ - struct dn_route *rt; - struct dn_skb_cb *cb = DN_SKB_CB(skb); - unsigned int hash = dn_hash(cb->src, cb->dst); - - if (skb_dst(skb)) - return 0; - - rcu_read_lock(); - for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->dn_next)) { - if ((rt->fld.saddr == cb->src) && - (rt->fld.daddr == cb->dst) && - (rt->fld.flowidn_oif == 0) && - (rt->fld.flowidn_mark == skb->mark) && - (rt->fld.flowidn_iif == cb->iif)) { - dst_hold_and_use(&rt->dst, jiffies); - rcu_read_unlock(); - skb_dst_set(skb, (struct dst_entry *)rt); - return 0; - } - } - rcu_read_unlock(); - - return dn_route_input_slow(skb); -} - -static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, - int event, int nowait, unsigned int flags) -{ - struct dn_route *rt = (struct dn_route *)skb_dst(skb); - struct rtmsg *r; - struct nlmsghdr *nlh; - long expires; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); - if (!nlh) - return -EMSGSIZE; - - r = nlmsg_data(nlh); - r->rtm_family = AF_DECnet; - r->rtm_dst_len = 16; - r->rtm_src_len = 0; - r->rtm_tos = 0; - r->rtm_table = RT_TABLE_MAIN; - r->rtm_type = rt->rt_type; - r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; - r->rtm_scope = RT_SCOPE_UNIVERSE; - r->rtm_protocol = RTPROT_UNSPEC; - - if (rt->rt_flags & RTCF_NOTIFY) - r->rtm_flags |= RTM_F_NOTIFY; - - if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 || - nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0) - goto errout; - - if (rt->fld.saddr) { - r->rtm_src_len = 16; - if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0) - goto errout; - } - if (rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0) - goto errout; - - /* - * Note to self - change this if input routes reverse direction when - * they deal only with inputs and not with replies like they do - * currently. - */ - if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0) - goto errout; - - if (rt->rt_daddr != rt->rt_gateway && - nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0) - goto errout; - - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) - goto errout; - - expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, - rt->dst.error) < 0) - goto errout; - - if (dn_is_input_route(rt) && - nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0) - goto errout; - - nlmsg_end(skb, nlh); - return 0; - -errout: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - -const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { - [RTA_DST] = { .type = NLA_U16 }, - [RTA_SRC] = { .type = NLA_U16 }, - [RTA_IIF] = { .type = NLA_U32 }, - [RTA_OIF] = { .type = NLA_U32 }, - [RTA_GATEWAY] = { .type = NLA_U16 }, - [RTA_PRIORITY] = { .type = NLA_U32 }, - [RTA_PREFSRC] = { .type = NLA_U16 }, - [RTA_METRICS] = { .type = NLA_NESTED }, - [RTA_MULTIPATH] = { .type = NLA_NESTED }, - [RTA_TABLE] = { .type = NLA_U32 }, - [RTA_MARK] = { .type = NLA_U32 }, -}; - -/* - * This is called by both endnodes and routers now. - */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(in_skb->sk); - struct rtmsg *rtm = nlmsg_data(nlh); - struct dn_route *rt = NULL; - struct dn_skb_cb *cb; - int err; - struct sk_buff *skb; - struct flowidn fld; - struct nlattr *tb[RTA_MAX+1]; - - if (!net_eq(net, &init_net)) - return -EINVAL; - - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy, - extack); - if (err < 0) - return err; - - memset(&fld, 0, sizeof(fld)); - fld.flowidn_proto = DNPROTO_NSP; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb == NULL) - return -ENOBUFS; - skb_reset_mac_header(skb); - cb = DN_SKB_CB(skb); - - if (tb[RTA_SRC]) - fld.saddr = nla_get_le16(tb[RTA_SRC]); - - if (tb[RTA_DST]) - fld.daddr = nla_get_le16(tb[RTA_DST]); - - if (tb[RTA_IIF]) - fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); - - if (fld.flowidn_iif) { - struct net_device *dev; - dev = __dev_get_by_index(&init_net, fld.flowidn_iif); - if (!dev || !dev->dn_ptr) { - kfree_skb(skb); - return -ENODEV; - } - skb->protocol = htons(ETH_P_DNA_RT); - skb->dev = dev; - cb->src = fld.saddr; - cb->dst = fld.daddr; - local_bh_disable(); - err = dn_route_input(skb); - local_bh_enable(); - memset(cb, 0, sizeof(struct dn_skb_cb)); - rt = (struct dn_route *)skb_dst(skb); - if (!err && -rt->dst.error) - err = rt->dst.error; - } else { - if (tb[RTA_OIF]) - fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); - - err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); - } - - skb->dev = NULL; - if (err) - goto out_free; - skb_dst_set(skb, &rt->dst); - if (rtm->rtm_flags & RTM_F_NOTIFY) - rt->rt_flags |= RTCF_NOTIFY; - - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } - - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); - -out_free: - kfree_skb(skb); - return err; -} - -/* - * For routers, this is called from dn_fib_dump, but for endnodes its - * called directly from the rtnetlink dispatch table. - */ -int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct dn_route *rt; - int h, s_h; - int idx, s_idx; - struct rtmsg *rtm; - - if (!net_eq(net, &init_net)) - return 0; - - if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg)) - return -EINVAL; - - rtm = nlmsg_data(cb->nlh); - if (!(rtm->rtm_flags & RTM_F_CLONED)) - return 0; - - s_h = cb->args[0]; - s_idx = idx = cb->args[1]; - for(h = 0; h <= dn_rt_hash_mask; h++) { - if (h < s_h) - continue; - if (h > s_h) - s_idx = 0; - rcu_read_lock_bh(); - for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; - rt; - rt = rcu_dereference_bh(rt->dn_next), idx++) { - if (idx < s_idx) - continue; - skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) < 0) { - skb_dst_drop(skb); - rcu_read_unlock_bh(); - goto done; - } - skb_dst_drop(skb); - } - rcu_read_unlock_bh(); - } - -done: - cb->args[0] = h; - cb->args[1] = idx; - return skb->len; -} - -#ifdef CONFIG_PROC_FS -struct dn_rt_cache_iter_state { - int bucket; -}; - -static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq) -{ - struct dn_route *rt = NULL; - struct dn_rt_cache_iter_state *s = seq->private; - - for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) { - rcu_read_lock_bh(); - rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); - if (rt) - break; - rcu_read_unlock_bh(); - } - return rt; -} - -static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt) -{ - struct dn_rt_cache_iter_state *s = seq->private; - - rt = rcu_dereference_bh(rt->dn_next); - while (!rt) { - rcu_read_unlock_bh(); - if (--s->bucket < 0) - break; - rcu_read_lock_bh(); - rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain); - } - return rt; -} - -static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct dn_route *rt = dn_rt_cache_get_first(seq); - - if (rt) { - while(*pos && (rt = dn_rt_cache_get_next(seq, rt))) - --*pos; - } - return *pos ? NULL : rt; -} - -static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct dn_route *rt = dn_rt_cache_get_next(seq, v); - ++*pos; - return rt; -} - -static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v) -{ - if (v) - rcu_read_unlock_bh(); -} - -static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) -{ - struct dn_route *rt = v; - char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; - - seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->dst.dev ? rt->dst.dev->name : "*", - dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), - dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->dst.__refcnt), - rt->dst.__use, 0); - return 0; -} - -static const struct seq_operations dn_rt_cache_seq_ops = { - .start = dn_rt_cache_seq_start, - .next = dn_rt_cache_seq_next, - .stop = dn_rt_cache_seq_stop, - .show = dn_rt_cache_seq_show, -}; -#endif /* CONFIG_PROC_FS */ - -void __init dn_route_init(void) -{ - int i, goal, order; - - dn_dst_ops.kmem_cachep = - kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - dst_entries_init(&dn_dst_ops); - timer_setup(&dn_route_timer, dn_dst_check_expire, 0); - dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; - add_timer(&dn_route_timer); - - goal = totalram_pages >> (26 - PAGE_SHIFT); - - for(order = 0; (1UL << order) < goal; order++) - /* NOTHING */; - - /* - * Only want 1024 entries max, since the table is very, very unlikely - * to be larger than that. - */ - while(order && ((((1UL << order) * PAGE_SIZE) / - sizeof(struct dn_rt_hash_bucket)) >= 2048)) - order--; - - do { - dn_rt_hash_mask = (1UL << order) * PAGE_SIZE / - sizeof(struct dn_rt_hash_bucket); - while(dn_rt_hash_mask & (dn_rt_hash_mask - 1)) - dn_rt_hash_mask--; - dn_rt_hash_table = (struct dn_rt_hash_bucket *) - __get_free_pages(GFP_ATOMIC, order); - } while (dn_rt_hash_table == NULL && --order > 0); - - if (!dn_rt_hash_table) - panic("Failed to allocate DECnet route cache hash table\n"); - - printk(KERN_INFO - "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n", - dn_rt_hash_mask, - (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024); - - dn_rt_hash_mask--; - for(i = 0; i <= dn_rt_hash_mask; i++) { - spin_lock_init(&dn_rt_hash_table[i].lock); - dn_rt_hash_table[i].chain = NULL; - } - - dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - - proc_create_seq_private("decnet_cache", 0444, init_net.proc_net, - &dn_rt_cache_seq_ops, - sizeof(struct dn_rt_cache_iter_state), NULL); - -#ifdef CONFIG_DECNET_ROUTER - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, - dn_cache_getroute, dn_fib_dump, 0); -#else - rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE, - dn_cache_getroute, dn_cache_dump, 0); -#endif -} - -void __exit dn_route_cleanup(void) -{ - del_timer(&dn_route_timer); - dn_run_flush(NULL); - - remove_proc_entry("decnet_cache", init_net.proc_net); - dst_entries_destroy(&dn_dst_ops); -} diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c deleted file mode 100644 index 4a4e3c17740c..000000000000 --- a/net/decnet/dn_rules.c +++ /dev/null @@ -1,258 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Rules) - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c - * - * - * Changes: - * Steve Whitehouse <steve@chygwyn.com> - * Updated for Thomas Graf's generic rules - * - */ -#include <linux/net.h> -#include <linux/init.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> -#include <linux/netdevice.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/rcupdate.h> -#include <linux/export.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/fib_rules.h> -#include <net/dn.h> -#include <net/dn_fib.h> -#include <net/dn_neigh.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> - -static struct fib_rules_ops *dn_fib_rules_ops; - -struct dn_fib_rule -{ - struct fib_rule common; - unsigned char dst_len; - unsigned char src_len; - __le16 src; - __le16 srcmask; - __le16 dst; - __le16 dstmask; - __le16 srcmap; - u8 flags; -}; - - -int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res) -{ - struct fib_lookup_arg arg = { - .result = res, - }; - int err; - - err = fib_rules_lookup(dn_fib_rules_ops, - flowidn_to_flowi(flp), 0, &arg); - res->r = arg.rule; - - return err; -} - -static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) -{ - struct flowidn *fld = &flp->u.dn; - int err = -EAGAIN; - struct dn_fib_table *tbl; - - switch(rule->action) { - case FR_ACT_TO_TBL: - break; - - case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; - - case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; - - case FR_ACT_BLACKHOLE: - default: - err = -EINVAL; - goto errout; - } - - tbl = dn_fib_get_table(rule->table, 0); - if (tbl == NULL) - goto errout; - - err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result); - if (err > 0) - err = -EAGAIN; -errout: - return err; -} - -static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { - FRA_GENERIC_POLICY, -}; - -static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - struct flowidn *fld = &fl->u.dn; - __le16 daddr = fld->daddr; - __le16 saddr = fld->saddr; - - if (((saddr ^ r->src) & r->srcmask) || - ((daddr ^ r->dst) & r->dstmask)) - return 0; - - return 1; -} - -static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, - struct nlattr **tb, - struct netlink_ext_ack *extack) -{ - int err = -EINVAL; - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - if (frh->tos) { - NL_SET_ERR_MSG(extack, "Invalid tos value"); - goto errout; - } - - if (rule->table == RT_TABLE_UNSPEC) { - if (rule->action == FR_ACT_TO_TBL) { - struct dn_fib_table *table; - - table = dn_fib_empty_table(); - if (table == NULL) { - err = -ENOBUFS; - goto errout; - } - - rule->table = table->n; - } - } - - if (frh->src_len) - r->src = nla_get_le16(tb[FRA_SRC]); - - if (frh->dst_len) - r->dst = nla_get_le16(tb[FRA_DST]); - - r->src_len = frh->src_len; - r->srcmask = dnet_make_mask(r->src_len); - r->dst_len = frh->dst_len; - r->dstmask = dnet_make_mask(r->dst_len); - err = 0; -errout: - return err; -} - -static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - if (frh->src_len && (r->src_len != frh->src_len)) - return 0; - - if (frh->dst_len && (r->dst_len != frh->dst_len)) - return 0; - - if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC]))) - return 0; - - if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST]))) - return 0; - - return 1; -} - -unsigned int dnet_addr_type(__le16 addr) -{ - struct flowidn fld = { .daddr = addr }; - struct dn_fib_res res; - unsigned int ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); - - res.r = NULL; - - if (tb) { - if (!tb->lookup(tb, &fld, &res)) { - ret = res.type; - dn_fib_res_put(&res); - } - } - return ret; -} - -static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - frh->dst_len = r->dst_len; - frh->src_len = r->src_len; - frh->tos = 0; - - if ((r->dst_len && - nla_put_le16(skb, FRA_DST, r->dst)) || - (r->src_len && - nla_put_le16(skb, FRA_SRC, r->src))) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -ENOBUFS; -} - -static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) -{ - dn_rt_cache_flush(-1); -} - -static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = { - .family = AF_DECnet, - .rule_size = sizeof(struct dn_fib_rule), - .addr_size = sizeof(u16), - .action = dn_fib_rule_action, - .match = dn_fib_rule_match, - .configure = dn_fib_rule_configure, - .compare = dn_fib_rule_compare, - .fill = dn_fib_rule_fill, - .flush_cache = dn_fib_rule_flush_cache, - .nlgroup = RTNLGRP_DECnet_RULE, - .policy = dn_fib_rule_policy, - .owner = THIS_MODULE, - .fro_net = &init_net, -}; - -void __init dn_fib_rules_init(void) -{ - dn_fib_rules_ops = - fib_rules_register(&dn_fib_rules_ops_template, &init_net); - BUG_ON(IS_ERR(dn_fib_rules_ops)); - BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff, - RT_TABLE_MAIN, 0)); -} - -void __exit dn_fib_rules_cleanup(void) -{ - rtnl_lock(); - fib_rules_unregister(dn_fib_rules_ops); - rtnl_unlock(); - rcu_barrier(); -} diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c deleted file mode 100644 index f0710b5d037d..000000000000 --- a/net/decnet/dn_table.c +++ /dev/null @@ -1,928 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Forwarding Information Base (Routing Tables) - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * Mostly copied from the IPv4 routing code - * - * - * Changes: - * - */ -#include <linux/string.h> -#include <linux/net.h> -#include <linux/socket.h> -#include <linux/slab.h> -#include <linux/sockios.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/netdevice.h> -#include <linux/timer.h> -#include <linux/spinlock.h> -#include <linux/atomic.h> -#include <linux/uaccess.h> -#include <linux/route.h> /* RTF_xxx */ -#include <net/neighbour.h> -#include <net/netlink.h> -#include <net/tcp.h> -#include <net/dst.h> -#include <net/flow.h> -#include <net/fib_rules.h> -#include <net/dn.h> -#include <net/dn_route.h> -#include <net/dn_fib.h> -#include <net/dn_neigh.h> -#include <net/dn_dev.h> - -struct dn_zone -{ - struct dn_zone *dz_next; - struct dn_fib_node **dz_hash; - int dz_nent; - int dz_divisor; - u32 dz_hashmask; -#define DZ_HASHMASK(dz) ((dz)->dz_hashmask) - int dz_order; - __le16 dz_mask; -#define DZ_MASK(dz) ((dz)->dz_mask) -}; - -struct dn_hash -{ - struct dn_zone *dh_zones[17]; - struct dn_zone *dh_zone_list; -}; - -#define dz_key_0(key) ((key).datum = 0) - -#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ - for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define endfor_nexthops(fi) } - -#define DN_MAX_DIVISOR 1024 -#define DN_S_ZOMBIE 1 -#define DN_S_ACCESSED 2 - -#define DN_FIB_SCAN(f, fp) \ -for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) - -#define DN_FIB_SCAN_KEY(f, fp, key) \ -for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) - -#define RT_TABLE_MIN 1 -#define DN_FIB_TABLE_HASHSZ 256 -static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; -static DEFINE_RWLOCK(dn_fib_tables_lock); - -static struct kmem_cache *dn_hash_kmem __read_mostly; -static int dn_fib_hash_zombies; - -static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) -{ - u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order); - h ^= (h >> 10); - h ^= (h >> 6); - h &= DZ_HASHMASK(dz); - return *(dn_fib_idx_t *)&h; -} - -static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz) -{ - dn_fib_key_t k; - k.datum = dst & DZ_MASK(dz); - return k; -} - -static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz) -{ - return &dz->dz_hash[dn_hash(key, dz).datum]; -} - -static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz) -{ - return dz->dz_hash[dn_hash(key, dz).datum]; -} - -static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b) -{ - return a.datum == b.datum; -} - -static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b) -{ - return a.datum <= b.datum; -} - -static inline void dn_rebuild_zone(struct dn_zone *dz, - struct dn_fib_node **old_ht, - int old_divisor) -{ - struct dn_fib_node *f, **fp, *next; - int i; - - for(i = 0; i < old_divisor; i++) { - for(f = old_ht[i]; f; f = next) { - next = f->fn_next; - for(fp = dn_chain_p(f->fn_key, dz); - *fp && dn_key_leq((*fp)->fn_key, f->fn_key); - fp = &(*fp)->fn_next) - /* NOTHING */; - f->fn_next = *fp; - *fp = f; - } - } -} - -static void dn_rehash_zone(struct dn_zone *dz) -{ - struct dn_fib_node **ht, **old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - old_divisor = dz->dz_divisor; - - switch (old_divisor) { - case 16: - new_divisor = 256; - new_hashmask = 0xFF; - break; - default: - printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", - old_divisor); - /* fall through */ - case 256: - new_divisor = 1024; - new_hashmask = 0x3FF; - break; - } - - ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL); - if (ht == NULL) - return; - - write_lock_bh(&dn_fib_tables_lock); - old_ht = dz->dz_hash; - dz->dz_hash = ht; - dz->dz_hashmask = new_hashmask; - dz->dz_divisor = new_divisor; - dn_rebuild_zone(dz, old_ht, old_divisor); - write_unlock_bh(&dn_fib_tables_lock); - kfree(old_ht); -} - -static void dn_free_node(struct dn_fib_node *f) -{ - dn_fib_release_info(DN_FIB_INFO(f)); - kmem_cache_free(dn_hash_kmem, f); -} - - -static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) -{ - int i; - struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL); - if (!dz) - return NULL; - - if (z) { - dz->dz_divisor = 16; - dz->dz_hashmask = 0x0F; - } else { - dz->dz_divisor = 1; - dz->dz_hashmask = 0; - } - - dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL); - if (!dz->dz_hash) { - kfree(dz); - return NULL; - } - - dz->dz_order = z; - dz->dz_mask = dnet_make_mask(z); - - for(i = z + 1; i <= 16; i++) - if (table->dh_zones[i]) - break; - - write_lock_bh(&dn_fib_tables_lock); - if (i>16) { - dz->dz_next = table->dh_zone_list; - table->dh_zone_list = dz; - } else { - dz->dz_next = table->dh_zones[i]->dz_next; - table->dh_zones[i]->dz_next = dz; - } - table->dh_zones[z] = dz; - write_unlock_bh(&dn_fib_tables_lock); - return dz; -} - - -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) -{ - struct rtnexthop *nhp; - int nhlen; - - if (attrs[RTA_PRIORITY] && - nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) - return 1; - - if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { - if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && - (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) - return 0; - return 1; - } - - if (!attrs[RTA_MULTIPATH]) - return 0; - - nhp = nla_data(attrs[RTA_MULTIPATH]); - nhlen = nla_len(attrs[RTA_MULTIPATH]); - - for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - __le16 gw; - - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) - return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) - return 1; - if (attrlen) { - struct nlattr *gw_attr; - - gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); - gw = gw_attr ? nla_get_le16(gw_attr) : 0; - - if (gw && gw != nh->nh_gw) - return 1; - } - nhp = RTNH_NEXT(nhp); - } endfor_nexthops(fi); - - return 0; -} - -static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) -{ - size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(4) /* RTA_TABLE */ - + nla_total_size(2) /* RTA_DST */ - + nla_total_size(4) /* RTA_PRIORITY */ - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ - - /* space for nested metrics */ - payload += nla_total_size((RTAX_MAX * nla_total_size(4))); - - if (fi->fib_nhs) { - /* Also handles the special case fib_nhs == 1 */ - - /* each nexthop is packed in an attribute */ - size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); - - /* may contain a gateway attribute */ - nhsize += nla_total_size(4); - - /* all nexthops are packed in a nested attribute */ - payload += nla_total_size(fi->fib_nhs * nhsize); - } - - return payload; -} - -static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, - struct dn_fib_info *fi, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); - if (!nlh) - return -EMSGSIZE; - - rtm = nlmsg_data(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = dst_len; - rtm->rtm_src_len = 0; - rtm->rtm_tos = 0; - rtm->rtm_table = tb_id; - rtm->rtm_flags = fi->fib_flags; - rtm->rtm_scope = scope; - rtm->rtm_type = type; - rtm->rtm_protocol = fi->fib_protocol; - - if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0) - goto errout; - - if (rtm->rtm_dst_len && - nla_put(skb, RTA_DST, 2, dst) < 0) - goto errout; - - if (fi->fib_priority && - nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0) - goto errout; - - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto errout; - - if (fi->fib_nhs == 1) { - if (fi->fib_nh->nh_gw && - nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0) - goto errout; - - if (fi->fib_nh->nh_oif && - nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0) - goto errout; - } - - if (fi->fib_nhs > 1) { - struct rtnexthop *nhp; - struct nlattr *mp_head; - - if (!(mp_head = nla_nest_start(skb, RTA_MULTIPATH))) - goto errout; - - for_nexthops(fi) { - if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) - goto errout; - - nhp->rtnh_flags = nh->nh_flags & 0xFF; - nhp->rtnh_hops = nh->nh_weight - 1; - nhp->rtnh_ifindex = nh->nh_oif; - - if (nh->nh_gw && - nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0) - goto errout; - - nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp; - } endfor_nexthops(fi); - - nla_nest_end(skb, mp_head); - } - - nlmsg_end(skb, nlh); - return 0; - -errout: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; -} - - -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, - struct nlmsghdr *nlh, struct netlink_skb_parms *req) -{ - struct sk_buff *skb; - u32 portid = req ? req->portid : 0; - int err = -ENOBUFS; - - skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, - f->fn_type, f->fn_scope, &f->fn_key, z, - DN_FIB_INFO(f), 0); - if (err < 0) { - /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); - kfree_skb(skb); - goto errout; - } - rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); - return; -errout: - if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); -} - -static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, - struct netlink_callback *cb, - struct dn_fib_table *tb, - struct dn_zone *dz, - struct dn_fib_node *f) -{ - int i, s_i; - - s_i = cb->args[4]; - for(i = 0; f; i++, f = f->fn_next) { - if (i < s_i) - continue; - if (f->fn_state & DN_S_ZOMBIE) - continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->n, - (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, - f->fn_scope, &f->fn_key, dz->dz_order, - f->fn_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; - return -1; - } - } - cb->args[4] = i; - return skb->len; -} - -static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, - struct netlink_callback *cb, - struct dn_fib_table *tb, - struct dn_zone *dz) -{ - int h, s_h; - - s_h = cb->args[3]; - for(h = 0; h < dz->dz_divisor; h++) { - if (h < s_h) - continue; - if (h > s_h) - memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); - if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) - continue; - if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[3] = h; - return -1; - } - } - cb->args[3] = h; - return skb->len; -} - -static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) -{ - int m, s_m; - struct dn_zone *dz; - struct dn_hash *table = (struct dn_hash *)tb->data; - - s_m = cb->args[2]; - read_lock(&dn_fib_tables_lock); - for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { - if (m < s_m) - continue; - if (m > s_m) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); - - if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[2] = m; - read_unlock(&dn_fib_tables_lock); - return -1; - } - } - read_unlock(&dn_fib_tables_lock); - cb->args[2] = m; - - return skb->len; -} - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - unsigned int h, s_h; - unsigned int e = 0, s_e; - struct dn_fib_table *tb; - int dumped = 0; - - if (!net_eq(net, &init_net)) - return 0; - - if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && - ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_h = cb->args[0]; - s_e = cb->args[1]; - - for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { - e = 0; - hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) { - if (e < s_e) - goto next; - if (dumped) - memset(&cb->args[2], 0, sizeof(cb->args) - - 2 * sizeof(cb->args[0])); - if (tb->dump(tb, skb, cb) < 0) - goto out; - dumped = 1; -next: - e++; - } - } -out: - cb->args[1] = e; - cb->args[0] = h; - - return skb->len; -} - -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], - struct nlmsghdr *n, struct netlink_skb_parms *req) -{ - struct dn_hash *table = (struct dn_hash *)tb->data; - struct dn_fib_node *new_f, *f, **fp, **del_fp; - struct dn_zone *dz; - struct dn_fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - dn_fib_key_t key; - int err; - - if (z > 16) - return -EINVAL; - - dz = table->dh_zones[z]; - if (!dz && !(dz = dn_new_zone(table, z))) - return -ENOBUFS; - - dz_key_0(key); - if (attrs[RTA_DST]) { - __le16 dst = nla_get_le16(attrs[RTA_DST]); - if (dst & ~DZ_MASK(dz)) - return -EINVAL; - key = dz_key(dst, dz); - } - - if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) - return err; - - if (dz->dz_nent > (dz->dz_divisor << 2) && - dz->dz_divisor > DN_MAX_DIVISOR && - (z==16 || (1<<z) > dz->dz_divisor)) - dn_rehash_zone(dz); - - fp = dn_chain_p(key, dz); - - DN_FIB_SCAN(f, fp) { - if (dn_key_leq(key, f->fn_key)) - break; - } - - del_fp = NULL; - - if (f && (f->fn_state & DN_S_ZOMBIE) && - dn_key_eq(f->fn_key, key)) { - del_fp = fp; - fp = &f->fn_next; - f = *fp; - goto create; - } - - DN_FIB_SCAN_KEY(f, fp, key) { - if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority) - break; - } - - if (f && dn_key_eq(f->fn_key, key) && - fi->fib_priority == DN_FIB_INFO(f)->fib_priority) { - struct dn_fib_node **ins_fp; - - err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) - goto out; - - if (n->nlmsg_flags & NLM_F_REPLACE) { - del_fp = fp; - fp = &f->fn_next; - f = *fp; - goto replace; - } - - ins_fp = fp; - err = -EEXIST; - - DN_FIB_SCAN_KEY(f, fp, key) { - if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority) - break; - if (f->fn_type == type && - f->fn_scope == r->rtm_scope && - DN_FIB_INFO(f) == fi) - goto out; - } - - if (!(n->nlmsg_flags & NLM_F_APPEND)) { - fp = ins_fp; - f = *fp; - } - } - -create: - err = -ENOENT; - if (!(n->nlmsg_flags & NLM_F_CREATE)) - goto out; - -replace: - err = -ENOBUFS; - new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL); - if (new_f == NULL) - goto out; - - new_f->fn_key = key; - new_f->fn_type = type; - new_f->fn_scope = r->rtm_scope; - DN_FIB_INFO(new_f) = fi; - - new_f->fn_next = f; - write_lock_bh(&dn_fib_tables_lock); - *fp = new_f; - write_unlock_bh(&dn_fib_tables_lock); - dz->dz_nent++; - - if (del_fp) { - f = *del_fp; - write_lock_bh(&dn_fib_tables_lock); - *del_fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - if (!(f->fn_state & DN_S_ZOMBIE)) - dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req); - if (f->fn_state & DN_S_ACCESSED) - dn_rt_cache_flush(-1); - dn_free_node(f); - dz->dz_nent--; - } else { - dn_rt_cache_flush(-1); - } - - dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req); - - return 0; -out: - dn_fib_release_info(fi); - return err; -} - - -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], - struct nlmsghdr *n, struct netlink_skb_parms *req) -{ - struct dn_hash *table = (struct dn_hash*)tb->data; - struct dn_fib_node **fp, **del_fp, *f; - int z = r->rtm_dst_len; - struct dn_zone *dz; - dn_fib_key_t key; - int matched; - - - if (z > 16) - return -EINVAL; - - if ((dz = table->dh_zones[z]) == NULL) - return -ESRCH; - - dz_key_0(key); - if (attrs[RTA_DST]) { - __le16 dst = nla_get_le16(attrs[RTA_DST]); - if (dst & ~DZ_MASK(dz)) - return -EINVAL; - key = dz_key(dst, dz); - } - - fp = dn_chain_p(key, dz); - - DN_FIB_SCAN(f, fp) { - if (dn_key_eq(f->fn_key, key)) - break; - if (dn_key_leq(key, f->fn_key)) - return -ESRCH; - } - - matched = 0; - del_fp = NULL; - DN_FIB_SCAN_KEY(f, fp, key) { - struct dn_fib_info *fi = DN_FIB_INFO(f); - - if (f->fn_state & DN_S_ZOMBIE) - return -ESRCH; - - matched++; - - if (del_fp == NULL && - (!r->rtm_type || f->fn_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, attrs, fi) == 0) - del_fp = fp; - } - - if (del_fp) { - f = *del_fp; - dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req); - - if (matched != 1) { - write_lock_bh(&dn_fib_tables_lock); - *del_fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - if (f->fn_state & DN_S_ACCESSED) - dn_rt_cache_flush(-1); - dn_free_node(f); - dz->dz_nent--; - } else { - f->fn_state |= DN_S_ZOMBIE; - if (f->fn_state & DN_S_ACCESSED) { - f->fn_state &= ~DN_S_ACCESSED; - dn_rt_cache_flush(-1); - } - if (++dn_fib_hash_zombies > 128) - dn_fib_flush(); - } - - return 0; - } - - return -ESRCH; -} - -static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table) -{ - int found = 0; - struct dn_fib_node *f; - - while((f = *fp) != NULL) { - struct dn_fib_info *fi = DN_FIB_INFO(f); - - if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) { - write_lock_bh(&dn_fib_tables_lock); - *fp = f->fn_next; - write_unlock_bh(&dn_fib_tables_lock); - - dn_free_node(f); - found++; - continue; - } - fp = &f->fn_next; - } - - return found; -} - -static int dn_fib_table_flush(struct dn_fib_table *tb) -{ - struct dn_hash *table = (struct dn_hash *)tb->data; - struct dn_zone *dz; - int found = 0; - - dn_fib_hash_zombies = 0; - for(dz = table->dh_zone_list; dz; dz = dz->dz_next) { - int i; - int tmp = 0; - for(i = dz->dz_divisor-1; i >= 0; i--) - tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table); - dz->dz_nent -= tmp; - found += tmp; - } - - return found; -} - -static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res) -{ - int err; - struct dn_zone *dz; - struct dn_hash *t = (struct dn_hash *)tb->data; - - read_lock(&dn_fib_tables_lock); - for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { - struct dn_fib_node *f; - dn_fib_key_t k = dz_key(flp->daddr, dz); - - for(f = dz_chain(k, dz); f; f = f->fn_next) { - if (!dn_key_eq(k, f->fn_key)) { - if (dn_key_leq(k, f->fn_key)) - break; - else - continue; - } - - f->fn_state |= DN_S_ACCESSED; - - if (f->fn_state&DN_S_ZOMBIE) - continue; - - if (f->fn_scope < flp->flowidn_scope) - continue; - - err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); - - if (err == 0) { - res->type = f->fn_type; - res->scope = f->fn_scope; - res->prefixlen = dz->dz_order; - goto out; - } - if (err < 0) - goto out; - } - } - err = 1; -out: - read_unlock(&dn_fib_tables_lock); - return err; -} - - -struct dn_fib_table *dn_fib_get_table(u32 n, int create) -{ - struct dn_fib_table *t; - unsigned int h; - - if (n < RT_TABLE_MIN) - return NULL; - - if (n > RT_TABLE_MAX) - return NULL; - - h = n & (DN_FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); - hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) { - if (t->n == n) { - rcu_read_unlock(); - return t; - } - } - rcu_read_unlock(); - - if (!create) - return NULL; - - if (in_interrupt()) { - net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n"); - return NULL; - } - - t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash), - GFP_KERNEL); - if (t == NULL) - return NULL; - - t->n = n; - t->insert = dn_fib_table_insert; - t->delete = dn_fib_table_delete; - t->lookup = dn_fib_table_lookup; - t->flush = dn_fib_table_flush; - t->dump = dn_fib_table_dump; - hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); - - return t; -} - -struct dn_fib_table *dn_fib_empty_table(void) -{ - u32 id; - - for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_get_table(id, 0) == NULL) - return dn_fib_get_table(id, 1); - return NULL; -} - -void dn_fib_flush(void) -{ - int flushed = 0; - struct dn_fib_table *tb; - unsigned int h; - - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) - flushed += tb->flush(tb); - } - - if (flushed) - dn_rt_cache_flush(-1); -} - -void __init dn_fib_table_init(void) -{ - dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", - sizeof(struct dn_fib_info), - 0, SLAB_HWCACHE_ALIGN, - NULL); -} - -void __exit dn_fib_table_cleanup(void) -{ - struct dn_fib_table *t; - struct hlist_node *next; - unsigned int h; - - write_lock(&dn_fib_tables_lock); - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h], - hlist) { - hlist_del(&t->hlist); - kfree(t); - } - } - write_unlock(&dn_fib_tables_lock); -} diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c deleted file mode 100644 index aa4155875ca8..000000000000 --- a/net/decnet/dn_timer.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Socket Timer Functions - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * - * - * Changes: - * Steve Whitehouse : Made keepalive timer part of the same - * timer idea. - * Steve Whitehouse : Added checks for sk->sock_readers - * David S. Miller : New socket locking - * Steve Whitehouse : Timer grabs socket ref. - */ -#include <linux/net.h> -#include <linux/socket.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/timer.h> -#include <linux/spinlock.h> -#include <net/sock.h> -#include <linux/atomic.h> -#include <linux/jiffies.h> -#include <net/flow.h> -#include <net/dn.h> - -/* - * Slow timer is for everything else (n * 500mS) - */ - -#define SLOW_INTERVAL (HZ/2) - -static void dn_slow_timer(struct timer_list *t); - -void dn_start_slow_timer(struct sock *sk) -{ - timer_setup(&sk->sk_timer, dn_slow_timer, 0); - sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); -} - -void dn_stop_slow_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -static void dn_slow_timer(struct timer_list *t) -{ - struct sock *sk = from_timer(sk, t, sk_timer); - struct dn_scp *scp = DN_SK(sk); - - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10); - goto out; - } - - /* - * The persist timer is the standard slow timer used for retransmits - * in both connection establishment and disconnection as well as - * in the RUN state. The different states are catered for by changing - * the function pointer in the socket. Setting the timer to a value - * of zero turns it off. We allow the persist_fxn to turn the - * timer off in a permant way by returning non-zero, so that - * timer based routines may remove sockets. This is why we have a - * sock_hold()/sock_put() around the timer to prevent the socket - * going away in the middle. - */ - if (scp->persist && scp->persist_fxn) { - if (scp->persist <= SLOW_INTERVAL) { - scp->persist = 0; - - if (scp->persist_fxn(sk)) - goto out; - } else { - scp->persist -= SLOW_INTERVAL; - } - } - - /* - * Check for keepalive timeout. After the other timer 'cos if - * the previous timer caused a retransmit, we don't need to - * do this. scp->stamp is the last time that we sent a packet. - * The keepalive function sends a link service packet to the - * other end. If it remains unacknowledged, the standard - * socket timers will eventually shut the socket down. Each - * time we do this, scp->stamp will be updated, thus - * we won't try and send another until scp->keepalive has passed - * since the last successful transmission. - */ - if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) { - if (time_after_eq(jiffies, scp->stamp + scp->keepalive)) - scp->keepalive_fxn(sk); - } - - sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); -out: - bh_unlock_sock(sk); - sock_put(sk); -} diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig deleted file mode 100644 index 8d7c109d5109..000000000000 --- a/net/decnet/netfilter/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# -# DECnet netfilter configuration -# - -menu "DECnet: Netfilter Configuration" - depends on DECNET && NETFILTER - depends on NETFILTER_ADVANCED - -config DECNET_NF_GRABULATOR - tristate "Routing message grabulator (for userland routing daemon)" - help - Enable this module if you want to use the userland DECnet routing - daemon. You will also need to enable routing support for DECnet - unless you just want to monitor routing messages from other nodes. - -endmenu diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile deleted file mode 100644 index b579e52130aa..000000000000 --- a/net/decnet/netfilter/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for DECnet netfilter modules -# - -obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c deleted file mode 100644 index a4faacadd8a8..000000000000 --- a/net/decnet/netfilter/dn_rtmsg.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet Routing Message Grabulator - * - * (C) 2000 ChyGwyn Limited - http://www.chygwyn.com/ - * This code may be copied under the GPL v.2 or at your option - * any later version. - * - * Author: Steven Whitehouse <steve@chygwyn.com> - * - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/spinlock.h> -#include <net/netlink.h> -#include <linux/netfilter_decnet.h> - -#include <net/sock.h> -#include <net/flow.h> -#include <net/dn.h> -#include <net/dn_route.h> - -static struct sock *dnrmg = NULL; - - -static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp) -{ - struct sk_buff *skb = NULL; - size_t size; - sk_buff_data_t old_tail; - struct nlmsghdr *nlh; - unsigned char *ptr; - struct nf_dn_rtmsg *rtm; - - size = NLMSG_ALIGN(rt_skb->len) + - NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); - skb = nlmsg_new(size, GFP_ATOMIC); - if (!skb) { - *errp = -ENOMEM; - return NULL; - } - old_tail = skb->tail; - nlh = nlmsg_put(skb, 0, 0, 0, size, 0); - if (!nlh) { - kfree_skb(skb); - *errp = -ENOMEM; - return NULL; - } - rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh); - rtm->nfdn_ifindex = rt_skb->dev->ifindex; - ptr = NFDN_RTMSG(rtm); - skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); - nlh->nlmsg_len = skb->tail - old_tail; - return skb; -} - -static void dnrmg_send_peer(struct sk_buff *skb) -{ - struct sk_buff *skb2; - int status = 0; - int group = 0; - unsigned char flags = *skb->data; - - switch (flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_L1RT: - group = DNRNG_NLGRP_L1; - break; - case DN_RT_PKT_L2RT: - group = DNRNG_NLGRP_L2; - break; - default: - return; - } - - skb2 = dnrmg_build_message(skb, &status); - if (skb2 == NULL) - return; - NETLINK_CB(skb2).dst_group = group; - netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); -} - - -static unsigned int dnrmg_hook(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - dnrmg_send_peer(skb); - return NF_ACCEPT; -} - - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0) - -static inline void dnrmg_receive_user_skb(struct sk_buff *skb) -{ - struct nlmsghdr *nlh = nlmsg_hdr(skb); - - if (skb->len < sizeof(*nlh) || - nlh->nlmsg_len < sizeof(*nlh) || - skb->len < nlh->nlmsg_len) - return; - - if (!netlink_capable(skb, CAP_NET_ADMIN)) - RCV_SKB_FAIL(-EPERM); - - /* Eventually we might send routing messages too */ - - RCV_SKB_FAIL(-EINVAL); -} - -static const struct nf_hook_ops dnrmg_ops = { - .hook = dnrmg_hook, - .pf = NFPROTO_DECNET, - .hooknum = NF_DN_ROUTE, - .priority = NF_DN_PRI_DNRTMSG, -}; - -static int __init dn_rtmsg_init(void) -{ - int rv = 0; - struct netlink_kernel_cfg cfg = { - .groups = DNRNG_NLGRP_MAX, - .input = dnrmg_receive_user_skb, - }; - - dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg); - if (dnrmg == NULL) { - printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); - return -ENOMEM; - } - - rv = nf_register_net_hook(&init_net, &dnrmg_ops); - if (rv) { - netlink_kernel_release(dnrmg); - } - - return rv; -} - -static void __exit dn_rtmsg_fini(void) -{ - nf_unregister_net_hook(&init_net, &dnrmg_ops); - netlink_kernel_release(dnrmg); -} - - -MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); -MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); - -module_init(dn_rtmsg_init); -module_exit(dn_rtmsg_fini); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c deleted file mode 100644 index 55bf64a22b59..000000000000 --- a/net/decnet/sysctl_net_decnet.c +++ /dev/null @@ -1,373 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DECnet An implementation of the DECnet protocol suite for the LINUX - * operating system. DECnet is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * DECnet sysctl support functions - * - * Author: Steve Whitehouse <SteveW@ACM.org> - * - * - * Changes: - * Steve Whitehouse - C99 changes and default device handling - * Steve Whitehouse - Memory buffer settings, like the tcp ones - * - */ -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/fs.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <net/neighbour.h> -#include <net/dst.h> -#include <net/flow.h> - -#include <linux/uaccess.h> - -#include <net/dn.h> -#include <net/dn_dev.h> -#include <net/dn_route.h> - - -int decnet_debug_level; -int decnet_time_wait = 30; -int decnet_dn_count = 1; -int decnet_di_count = 3; -int decnet_dr_count = 3; -int decnet_log_martians = 1; -int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; - -/* Reasonable defaults, I hope, based on tcp's defaults */ -long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; -int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; -int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; - -#ifdef CONFIG_SYSCTL -extern int decnet_dst_gc_interval; -static int min_decnet_time_wait[] = { 5 }; -static int max_decnet_time_wait[] = { 600 }; -static int min_state_count[] = { 1 }; -static int max_state_count[] = { NSP_MAXRXTSHIFT }; -static int min_decnet_dst_gc_interval[] = { 1 }; -static int max_decnet_dst_gc_interval[] = { 60 }; -static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW }; -static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW }; -static char node_name[7] = "???"; - -static struct ctl_table_header *dn_table_header = NULL; - -/* - * ctype.h :-) - */ -#define ISNUM(x) (((x) >= '0') && ((x) <= '9')) -#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z')) -#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z')) -#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x)) -#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x)) - -static void strip_it(char *str) -{ - for(;;) { - switch (*str) { - case ' ': - case '\n': - case '\r': - case ':': - *str = 0; - /* Fallthrough */ - case 0: - return; - } - str++; - } -} - -/* - * Simple routine to parse an ascii DECnet address - * into a network order address. - */ -static int parse_addr(__le16 *addr, char *str) -{ - __u16 area, node; - - while(*str && !ISNUM(*str)) str++; - - if (*str == 0) - return -1; - - area = (*str++ - '0'); - if (ISNUM(*str)) { - area *= 10; - area += (*str++ - '0'); - } - - if (*str++ != '.') - return -1; - - if (!ISNUM(*str)) - return -1; - - node = *str++ - '0'; - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - if (ISNUM(*str)) { - node *= 10; - node += (*str++ - '0'); - } - - if ((node > 1023) || (area > 63)) - return -1; - - if (INVALID_END_CHAR(*str)) - return -1; - - *addr = cpu_to_le16((area << 10) | node); - - return 0; -} - -static int dn_node_address_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - char addr[DN_ASCBUF_LEN]; - size_t len; - __le16 dnaddr; - - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); - - if (copy_from_user(addr, buffer, len)) - return -EFAULT; - - addr[len] = 0; - strip_it(addr); - - if (parse_addr(&dnaddr, addr)) - return -EINVAL; - - dn_dev_devices_off(); - - decnet_address = dnaddr; - - dn_dev_devices_on(); - - *ppos += len; - - return 0; - } - - dn_addr2asc(le16_to_cpu(decnet_address), addr); - len = strlen(addr); - addr[len++] = '\n'; - - if (len > *lenp) len = *lenp; - - if (copy_to_user(buffer, addr, len)) - return -EFAULT; - - *lenp = len; - *ppos += len; - - return 0; -} - -static int dn_def_dev_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - size_t len; - struct net_device *dev; - char devname[17]; - - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - if (*lenp > 16) - return -E2BIG; - - if (copy_from_user(devname, buffer, *lenp)) - return -EFAULT; - - devname[*lenp] = 0; - strip_it(devname); - - dev = dev_get_by_name(&init_net, devname); - if (dev == NULL) - return -ENODEV; - - if (dev->dn_ptr == NULL) { - dev_put(dev); - return -ENODEV; - } - - if (dn_dev_set_default(dev, 1)) { - dev_put(dev); - return -ENODEV; - } - *ppos += *lenp; - - return 0; - } - - dev = dn_dev_get_default(); - if (dev == NULL) { - *lenp = 0; - return 0; - } - - strcpy(devname, dev->name); - dev_put(dev); - len = strlen(devname); - devname[len++] = '\n'; - - if (len > *lenp) len = *lenp; - - if (copy_to_user(buffer, devname, len)) - return -EFAULT; - - *lenp = len; - *ppos += len; - - return 0; -} - -static struct ctl_table dn_table[] = { - { - .procname = "node_address", - .maxlen = 7, - .mode = 0644, - .proc_handler = dn_node_address_handler, - }, - { - .procname = "node_name", - .data = node_name, - .maxlen = 7, - .mode = 0644, - .proc_handler = proc_dostring, - }, - { - .procname = "default_device", - .maxlen = 16, - .mode = 0644, - .proc_handler = dn_def_dev_handler, - }, - { - .procname = "time_wait", - .data = &decnet_time_wait, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_time_wait, - .extra2 = &max_decnet_time_wait - }, - { - .procname = "dn_count", - .data = &decnet_dn_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "di_count", - .data = &decnet_di_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "dr_count", - .data = &decnet_dr_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_state_count, - .extra2 = &max_state_count - }, - { - .procname = "dst_gc_interval", - .data = &decnet_dst_gc_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_dst_gc_interval, - .extra2 = &max_decnet_dst_gc_interval - }, - { - .procname = "no_fc_max_cwnd", - .data = &decnet_no_fc_max_cwnd, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_decnet_no_fc_max_cwnd, - .extra2 = &max_decnet_no_fc_max_cwnd - }, - { - .procname = "decnet_mem", - .data = &sysctl_decnet_mem, - .maxlen = sizeof(sysctl_decnet_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "decnet_rmem", - .data = &sysctl_decnet_rmem, - .maxlen = sizeof(sysctl_decnet_rmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "decnet_wmem", - .data = &sysctl_decnet_wmem, - .maxlen = sizeof(sysctl_decnet_wmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "debug", - .data = &decnet_debug_level, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; - -void dn_register_sysctl(void) -{ - dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table); -} - -void dn_unregister_sysctl(void) -{ - unregister_net_sysctl_table(dn_table_header); -} - -#else /* CONFIG_SYSCTL */ -void dn_unregister_sysctl(void) -{ -} -void dn_register_sysctl(void) -{ -} - -#endif diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4b26ae525d6d..7c902a1efbbf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -578,6 +578,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; + sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -593,6 +594,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; + sk->sk_wait_pending--; return timeo; } @@ -865,7 +867,7 @@ int inet_shutdown(struct socket *sock, int how) EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ /* fall through */ default: - sk->sk_shutdown |= how; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how); if (sk->sk_prot->shutdown) sk->sk_prot->shutdown(sk, how); break; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 203569500b91..24cd5c9c7839 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -565,7 +565,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 58834a10c0be..93045373e44b 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -237,6 +237,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1885a2fbad86..9aa48b4c4096 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -557,6 +557,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index aa179e6461e1..af0ddaa55e43 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -759,6 +759,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); + /* Guard against tiny mtu. We need to include at least one + * IP network header for this message to make any sense. + */ + if (room <= (int)sizeof(struct iphdr)) + goto ende; icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a08acb54b6b0..5edf426fa414 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -221,8 +221,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = prandom_u32() % max_delay; im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) @@ -357,8 +359,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) struct flowi4 fl4; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - unsigned int size = mtu; + unsigned int size; + size = min(mtu, IP_MAX_MTU); while (1) { skb = alloc_skb(size + hlen + tlen, GFP_ATOMIC | __GFP_NOWARN); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0a69f92da71b..7392a744c677 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -826,6 +826,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); + newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; @@ -903,11 +904,25 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); +static int inet_ulp_can_listen(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ulp_ops) + return -EINVAL; + + return 0; +} + int inet_csk_listen_start(struct sock *sk, int backlog) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - int err = -EADDRINUSE; + int err; + + err = inet_ulp_can_listen(sk); + if (unlikely(err)) + return err; reqsk_queue_alloc(&icsk->icsk_accept_queue); @@ -920,6 +935,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) * It is OK, because this socket enters to hash table only * after validation is complete. */ + err = -EADDRINUSE; inet_sk_state_store(sk, TCP_LISTEN); if (!sk->sk_prot->get_port(sk, inet->inet_num)) { inet->inet_sport = htons(inet->inet_num); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c58019f0718..c6d670cd872f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -743,17 +743,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, u32 index; if (port) { - head = &hinfo->bhash[inet_bhashfn(net, port, - hinfo->bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - inet_ehash_nolisten(sk, NULL, NULL); - spin_unlock_bh(&head->lock); - return 0; - } - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ + local_bh_disable(); ret = check_established(death_row, sk, port, NULL); local_bh_enable(); return ret; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c72432ce9bf5..38c8db78cda1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -440,7 +440,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -546,7 +546,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -603,7 +603,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -612,7 +612,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) @@ -635,7 +635,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -683,15 +683,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 06a981676356..6936f703758b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -221,7 +221,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); - if (res < 0 || res == LWTUNNEL_XMIT_DONE) + if (res != LWTUNNEL_XMIT_CONTINUE) return res; } @@ -1443,9 +1443,19 @@ struct sk_buff *__ip_make_skb(struct sock *sk, cork->dst = NULL; skb_dst_set(skb, &rt->dst); - if (iph->protocol == IPPROTO_ICMP) - icmp_out_count(net, ((struct icmphdr *) - skb_transport_header(skb))->type); + if (iph->protocol == IPPROTO_ICMP) { + u8 icmp_type; + + /* For such sockets, transhdrlen is zero when do ip_append_data(), + * so icmphdr does not in skb linear region and can not get icmp_type + * by icmp_hdr(skb)->type. + */ + if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp_type = fl4->fl4_icmp_type; + else + icmp_type = icmp_hdr(skb)->type; + icmp_out_count(net, icmp_type); + } ip_cork_release(cork); out: diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 82f341e84fae..fbf39077fc54 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -316,7 +316,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->tos = val; ipc->priority = rt_tos2priority(ipc->tos); break; - + case IP_PROTOCOL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->protocol = val; + break; default: return -EINVAL; } @@ -1522,6 +1529,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MINTTL: val = inet->min_ttl; break; + case IP_PROTOCOL: + val = inet_sk(sk)->inet_num; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 30e93b4f831f..9c2381cf675d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -609,10 +609,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) else if (skb->protocol == htons(ETH_P_IP)) df = inner_iph->frag_off & htons(IP_DF); headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > dev->needed_headroom) - dev->needed_headroom = headroom; + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); goto tx_dropped; } @@ -777,10 +777,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); dev->stats.tx_dropped++; kfree_skb(skb); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 15c71b08c2df..a3536dfe9b16 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -319,12 +319,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); break; default: goto tx_err; diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 04311f7067e2..9a6b01d85cd0 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -1,4 +1,5 @@ #include <linux/netlink.h> +#include <linux/nospec.h> #include <linux/rtnetlink.h> #include <linux/types.h> #include <net/ip.h> @@ -24,6 +25,7 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, if (type > RTAX_MAX) return -EINVAL; + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 4824b1e183a1..bff2b85c5fd6 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -96,11 +96,11 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); + __be32 daddr, saddr; + __be16 dport, sport; const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; - u8 uninitialized_var(protocol); + u8 protocol; #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn const *ct; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index 0af3d8df70dd..157bca240edc 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -16,8 +16,8 @@ #include <net/netfilter/ipv4/nf_dup_ipv4.h> struct nft_dup_ipv4 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv4_eval(const struct nft_expr *expr, @@ -43,16 +43,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 654f586fc0d7..8ad120c07096 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,6 +563,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } ipcm_init_sk(&ipc, inet); + /* Keep backward compat */ + if (hdrincl) + ipc.protocol = IPPROTO_RAW; if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); @@ -630,7 +633,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 57e2316529d0..f4d41ceef946 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -791,7 +791,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw); if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { @@ -1215,6 +1215,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1232,7 +1233,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 68f89fe7f923..00648a478c6a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -515,6 +515,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + u8 shutdown; int state; sock_poll_wait(file, sock, wait); @@ -557,9 +558,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* Connected or passive Fast Open socket? */ @@ -575,8 +577,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tcp_stream_is_readable(tp, target, sk)) mask |= EPOLLIN | EPOLLRDNORM; - if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_is_writeable(sk)) { + if (!(shutdown & SEND_SHUTDOWN)) { + if (__sk_stream_is_writeable(sk, 1)) { mask |= EPOLLOUT | EPOLLWRNORM; } else { /* send SIGIO later */ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -588,7 +590,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * pairs with the input side. */ smp_mb__after_atomic(); - if (sk_stream_is_writeable(sk)) + if (__sk_stream_is_writeable(sk, 1)) mask |= EPOLLOUT | EPOLLWRNORM; } } else @@ -2338,14 +2340,13 @@ bool tcp_check_oom(struct sock *sk, int shift) return too_many_orphans || out_of_socket_memory; } -void tcp_close(struct sock *sk, long timeout) +void __tcp_close(struct sock *sk, long timeout) { struct sk_buff *skb; int data_was_unread = 0; int state; - lock_sock(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); @@ -2505,6 +2506,12 @@ adjudge_to_death: out: bh_unlock_sock(sk); local_bh_enable(); +} + +void tcp_close(struct sock *sk, long timeout) +{ + lock_sock(sk); + __tcp_close(sk, timeout); release_sock(sk); sock_put(sk); } @@ -2561,6 +2568,12 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); @@ -2593,7 +2606,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); - sk->sk_shutdown = 0; + WRITE_ONCE(sk->sk_shutdown, 0); sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; tp->rcv_rtt_last_tsecr = 0; @@ -2987,18 +3000,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_LINGER2: if (val < 0) - tp->linger2 = -1; - else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ) - tp->linger2 = 0; + WRITE_ONCE(tp->linger2, -1); + else if (val > TCP_FIN_TIMEOUT_MAX / HZ) + WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); else - tp->linger2 = val * HZ; + WRITE_ONCE(tp->linger2, val * HZ); break; case TCP_DEFER_ACCEPT: /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ)); break; case TCP_WINDOW_CLAMP: @@ -3086,7 +3099,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tcp_repair_set_window(tp, optval, optlen); break; case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; + WRITE_ONCE(tp->notsent_lowat, val); sk->sk_write_space(sk); break; case TCP_INQ: @@ -3363,7 +3376,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; - if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + if (tp->rx_opt.user_mss && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; @@ -3387,13 +3401,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; break; case TCP_LINGER2: - val = tp->linger2; + val = READ_ONCE(tp->linger2); if (val >= 0) val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: - val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, - TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); + val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); + val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -3539,7 +3554,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - val = icsk->icsk_accept_queue.fastopenq.max_qlen; + val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); break; case TCP_FASTOPEN_CONNECT: @@ -3554,7 +3569,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tcp_time_stamp_raw() + tp->tsoffset; break; case TCP_NOTSENT_LOWAT: - val = tp->notsent_lowat; + val = READ_ONCE(tp->notsent_lowat); break; case TCP_INQ: val = tp->recvmsg_inq; @@ -3802,7 +3817,7 @@ void tcp_done(struct sock *sk) if (req) reqsk_fastopen_remove(sk, req, false); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f726591de7c7..f7bb78b443fa 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -276,6 +276,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; + int max_qlen; /* Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying @@ -288,10 +289,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * temporarily vs a server not supporting Fast Open at all. */ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; - if (fastopenq->max_qlen == 0) + max_qlen = READ_ONCE(fastopenq->max_qlen); + if (max_qlen == 0) return false; - if (fastopenq->qlen >= fastopenq->max_qlen) { + if (fastopenq->qlen >= max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 11716780667c..407ad07dc598 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -172,6 +172,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (unlikely(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE)) tcp_gro_dev_warn(sk, skb, len); + /* If the skb has a len of exactly 1*MSS and has the PSH bit + * set then it is likely the end of an application write. So + * more data may not be arriving soon, and yet the data sender + * may be waiting for an ACK if cwnd-bound or using TX zero + * copy. So we set ICSK_ACK_PUSHED here so that + * tcp_cleanup_rbuf() will send an ACK immediately if the app + * reads all of the data and is not ping-pong. If len > MSS + * then this logic does not matter (and does not hurt) because + * tcp_cleanup_rbuf() will always ACK immediately if the app + * reads data and there is more than an MSS of unACKed data. + */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -216,7 +229,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) icsk->icsk_ack.quick = quickacks; } -void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) +static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -224,7 +237,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } -EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. @@ -3429,8 +3441,11 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { - if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + /* Paired with the WRITE_ONCE() in this function. */ + u32 val = READ_ONCE(*last_oow_ack_time); + + if (val) { + s32 elapsed = (s32)(tcp_jiffies32 - val); if (0 <= elapsed && elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { @@ -3439,7 +3454,10 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_jiffies32; + /* Paired with the prior READ_ONCE() and with itself, + * as we might be lockless. + */ + WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32); return false; /* not rate-limited: go ahead, send dupack now! */ } @@ -3622,8 +3640,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk, skb); return -1; @@ -4130,7 +4152,7 @@ void tcp_fin(struct sock *sk) inet_csk_schedule_ack(sk); - sk->sk_shutdown |= RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) { @@ -6209,7 +6231,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN); sk_dst_confirm(sk); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4960e2b6bd7f..60619b1f4acd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; - possible_net_t tcpm_net; + struct net *tcpm_net; struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; @@ -51,34 +51,38 @@ struct tcp_metrics_block { struct rcu_head rcu_head; }; -static inline struct net *tm_net(struct tcp_metrics_block *tm) +static inline struct net *tm_net(const struct tcp_metrics_block *tm) { - return read_pnet(&tm->tcpm_net); + /* Paired with the WRITE_ONCE() in tcpm_new() */ + return READ_ONCE(tm->tcpm_net); } static bool tcp_metric_locked(struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_lock & (1 << idx); + /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ + return READ_ONCE(tm->tcpm_lock) & (1 << idx); } -static u32 tcp_metric_get(struct tcp_metrics_block *tm, +static u32 tcp_metric_get(const struct tcp_metrics_block *tm, enum tcp_metric_index idx) { - return tm->tcpm_vals[idx]; + /* Paired with WRITE_ONCE() in tcp_metric_set() */ + return READ_ONCE(tm->tcpm_vals[idx]); } static void tcp_metric_set(struct tcp_metrics_block *tm, enum tcp_metric_index idx, u32 val) { - tm->tcpm_vals[idx] = val; + /* Paired with READ_ONCE() in tcp_metric_get() */ + WRITE_ONCE(tm->tcpm_vals[idx], val); } static bool addr_same(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { - return inetpeer_addr_cmp(a, b) == 0; + return (a->family == b->family) && !inetpeer_addr_cmp(a, b); } struct tcpm_hash_bucket { @@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; static unsigned int tcp_metrics_hash_log __read_mostly; static DEFINE_SPINLOCK(tcp_metrics_lock); +static DEFINE_SEQLOCK(fastopen_seqlock); static void tcpm_suck_dst(struct tcp_metrics_block *tm, const struct dst_entry *dst, @@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, u32 msval; u32 val; - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); val = 0; if (dst_metric_locked(dst, RTAX_RTT)) @@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, val |= 1 << TCP_METRIC_CWND; if (dst_metric_locked(dst, RTAX_REORDERING)) val |= 1 << TCP_METRIC_REORDERING; - tm->tcpm_lock = val; + /* Paired with READ_ONCE() in tcp_metric_locked() */ + WRITE_ONCE(tm->tcpm_lock, val); msval = dst_metric_raw(dst, RTAX_RTT); - tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; + tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); msval = dst_metric_raw(dst, RTAX_RTTVAR); - tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; - tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); - tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); - tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); + tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + dst_metric_raw(dst, RTAX_SSTHRESH)); + tcp_metric_set(tm, TCP_METRIC_CWND, + dst_metric_raw(dst, RTAX_CWND)); + tcp_metric_set(tm, TCP_METRIC_REORDERING, + dst_metric_raw(dst, RTAX_REORDERING)); if (fastopen_clear) { + write_seqlock(&fastopen_seqlock); tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.try_exp = 0; tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; + write_sequnlock(&fastopen_seqlock); } } #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) -static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +static void tcpm_check_stamp(struct tcp_metrics_block *tm, + const struct dst_entry *dst) { - if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + unsigned long limit; + + if (!tm) + return; + limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; + if (unlikely(time_after(jiffies, limit))) tcpm_suck_dst(tm, dst, false); } @@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, oldest = deref_locked(tcp_metrics_hash[hash].chain); for (tm = deref_locked(oldest->tcpm_next); tm; tm = deref_locked(tm->tcpm_next)) { - if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) + if (time_before(READ_ONCE(tm->tcpm_stamp), + READ_ONCE(oldest->tcpm_stamp))) oldest = tm; } tm = oldest; } else { - tm = kmalloc(sizeof(*tm), GFP_ATOMIC); + tm = kzalloc(sizeof(*tm), GFP_ATOMIC); if (!tm) goto out_unlock; } - write_pnet(&tm->tcpm_net, net); + /* Paired with the READ_ONCE() in tm_net() */ + WRITE_ONCE(tm->tcpm_net, net); + tm->tcpm_saddr = *saddr; tm->tcpm_daddr = *daddr; - tcpm_suck_dst(tm, dst, true); + tcpm_suck_dst(tm, dst, reclaim); if (likely(!reclaim)) { tm->tcpm_next = tcp_metrics_hash[hash].chain; @@ -431,7 +451,7 @@ void tcp_update_metrics(struct sock *sk) tp->reordering); } } - tm->tcpm_stamp = jiffies; + WRITE_ONCE(tm->tcpm_stamp, jiffies); out_unlock: rcu_read_unlock(); } @@ -446,11 +466,15 @@ void tcp_init_metrics(struct sock *sk) u32 val, crtt = 0; /* cached RTT scaled by 8 */ sk_dst_confirm(sk); + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; if (!dst) goto reset; rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); + tm = tcp_get_metrics(sk, dst, false); if (!tm) { rcu_read_unlock(); goto reset; @@ -464,11 +488,6 @@ void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = val; if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; - } else { - /* ssthresh may have been reduced unnecessarily during. - * 3WHS. Restore it back to its initial default. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); if (val && tp->reordering != val) @@ -544,8 +563,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) return ret; } -static DEFINE_SEQLOCK(fastopen_seqlock); - void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { @@ -652,7 +669,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, } if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, - jiffies - tm->tcpm_stamp, + jiffies - READ_ONCE(tm->tcpm_stamp), TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; @@ -663,7 +680,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, if (!nest) goto nla_put_failure; for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { - u32 val = tm->tcpm_vals[i]; + u32 val = tcp_metric_get(tm, i); if (!val) continue; @@ -895,7 +912,7 @@ static void tcp_metrics_flush_all(struct net *net) match = net ? net_eq(tm_net(tm), net) : !refcount_read(&tm_net(tm)->count); if (match) { - *pp = tm->tcpm_next; + rcu_assign_pointer(*pp, tm->tcpm_next); kfree_rcu(tm, rcu_head); } else { pp = &tm->tcpm_next; @@ -936,7 +953,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) if (addr_same(&tm->tcpm_daddr, &daddr) && (!src || addr_same(&tm->tcpm_saddr, &saddr)) && net_eq(tm_net(tm), net)) { - *pp = tm->tcpm_next; + rcu_assign_pointer(*pp, tm->tcpm_next); kfree_rcu(tm, rcu_head); found = true; } else { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0fc238d79b03..bae0199a943b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -582,6 +582,9 @@ EXPORT_SYMBOL(tcp_create_openreq_child); * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? * * We don't need to initialize tmp_opt.sack_ok as we don't use the results + * + * Note: If @fastopen is true, this can be called from process context. + * Otherwise, this is from BH context. */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, @@ -734,7 +737,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, &tcp_rsk(req)->last_oow_ack_time)) req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) - __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; } @@ -753,7 +756,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * "fourth, check the SYN bit" */ if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8962864223b4..670804d4c169 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -164,8 +164,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, } /* Account for an ACK we sent. */ -static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, - u32 rcv_nxt) +static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) { struct tcp_sock *tp = tcp_sk(sk); @@ -179,7 +178,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ - tcp_dec_quickack_mode(sk, pkts); + tcp_dec_quickack_mode(sk); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } @@ -1091,7 +1090,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb_set_hash_from_sk(skb, sk); refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm)); /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; @@ -1139,7 +1138,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, icsk->icsk_af_ops->send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) - tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); + tcp_event_ack_sent(sk, rcv_nxt); if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); @@ -2221,6 +2220,18 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk) +{ + const struct rb_node *node = sk->tcp_rtx_queue.rb_node; + + /* No skb in the rtx queue. */ + if (!node) + return true; + + /* Only one skb in rtx queue. */ + return !node->rb_left && !node->rb_right; +} + /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) @@ -2243,12 +2254,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit <<= factor; if (refcount_read(&sk->sk_wmem_alloc) > limit) { - /* Always send skb if rtx queue is empty. + /* Always send skb if rtx queue is empty or has one skb. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. * This helps when TX completions are delayed too much. */ - if (tcp_rtx_queue_empty(sk)) + if (tcp_rtx_queue_empty_or_single_skb(sk)) return false; set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); @@ -2450,7 +2461,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 timeout, rto_delta_us; + u32 timeout, timeout_us, rto_delta_us; int early_retrans; /* Don't do any loss probe on a Fast Open connection before 3WHS @@ -2474,11 +2485,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) * sample is available then probe after TCP_TIMEOUT_INIT. */ if (tp->srtt_us) { - timeout = usecs_to_jiffies(tp->srtt_us >> 2); + timeout_us = tp->srtt_us >> 2; if (tp->packets_out == 1) - timeout += TCP_RTO_MIN; + timeout_us += tcp_rto_min_us(sk); else - timeout += TCP_TIMEOUT_MIN; + timeout_us += TCP_TIMEOUT_MIN_US; + timeout = usecs_to_jiffies(timeout_us); } else { timeout = TCP_TIMEOUT_INIT; } @@ -2878,7 +2890,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (skb_still_in_host_queue(sk, skb)) return -EBUSY; +start: if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(skb)->seq++; + goto start; + } if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; @@ -3307,7 +3325,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), NULL, &opts); th->doff = (tcp_header_size >> 2); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 61969bb9395c..844ff390f726 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -122,7 +122,7 @@ bool tcp_rack_mark_lost(struct sock *sk) tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { - timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; + timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US); inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 84069db0423a..d8d28ba169b4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -549,7 +549,9 @@ out_reset_timer: tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; - icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + icsk->icsk_rto = clamp(__tcp_set_rto(tp), + tcp_rto_min(sk), + TCP_RTO_MAX); } else { /* Use normal (exponential) backoff */ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dd3fa0ff84e7..a6048cc7fc35 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1458,7 +1458,7 @@ drop: } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); -void udp_destruct_sock(struct sock *sk) +void udp_destruct_common(struct sock *sk) { /* reclaim completely the forward allocated memory */ struct udp_sock *up = udp_sk(sk); @@ -1471,10 +1471,14 @@ void udp_destruct_sock(struct sock *sk) kfree_skb(skb); } udp_rmem_release(sk, total, 0, true); +} +EXPORT_SYMBOL_GPL(udp_destruct_common); +static void udp_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); inet_sock_destruct(sk); } -EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { @@ -1482,7 +1486,6 @@ int udp_init_sock(struct sock *sk) sk->sk_destruct = udp_destruct_sock; return 0; } -EXPORT_SYMBOL_GPL(udp_init_sock); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6539ff15e9a3..d03d74388870 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -186,6 +186,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); + synchronize_rcu(); kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 8545457752fb..27173549b000 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -20,6 +20,14 @@ struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); +/* Designate sk as UDP-Lite socket */ +static int udplite_sk_init(struct sock *sk) +{ + udp_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); @@ -56,6 +64,8 @@ struct proto udplite_prot = { .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f261c6d7f1f2..5ffa8777ab09 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -316,9 +316,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) static void addrconf_mod_rs_timer(struct inet6_dev *idev, unsigned long when) { - if (!timer_pending(&idev->rs_timer)) + if (!mod_timer(&idev->rs_timer, jiffies + when)) in6_dev_hold(idev); - mod_timer(&idev->rs_timer, jiffies + when); } static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, @@ -1322,7 +1321,7 @@ retry: * idev->desync_factor if it's larger */ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); - max_desync_factor = min_t(__u32, + max_desync_factor = min_t(long, idev->cnf.max_desync_factor, cnf_temp_preferred_lft - regen_advance); @@ -2489,12 +2488,18 @@ static void manage_tempaddrs(struct inet6_dev *idev, ipv6_ifa_notify(0, ift); } - if ((create || list_empty(&idev->tempaddr_list)) && - idev->cnf.use_tempaddr > 0) { + /* Also create a temporary address if it's enabled but no temporary + * address currently exists. + * However, we get called with valid_lft == 0, prefered_lft == 0, create == false + * as part of cleanup (ie. deleting the mngtmpaddr). + * We don't want that to result in creating a new temporary ip address. + */ + if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft)) + create = true; + + if (create && idev->cnf.use_tempaddr > 0) { /* When a new public address is created as described * in [ADDRCONF], also create a new temporary address. - * Also create a temporary address if it's enabled but - * no temporary address currently exists. */ read_unlock_bh(&idev->lock); ipv6_create_tempaddr(ifp, NULL, false); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5c2351deedc8..c8f39d61b51e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -107,6 +107,13 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } +void inet6_sock_destruct(struct sock *sk) +{ + inet6_cleanup_sock(sk); + inet_sock_destruct(sk); +} +EXPORT_SYMBOL_GPL(inet6_sock_destruct); + static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -199,7 +206,7 @@ lookup_protocol: inet->hdrincl = 1; } - sk->sk_destruct = inet_sock_destruct; + sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; @@ -502,6 +509,12 @@ void inet6_destroy_sock(struct sock *sk) } EXPORT_SYMBOL_GPL(inet6_destroy_sock); +void inet6_cleanup_sock(struct sock *sk) +{ + inet6_destroy_sock(sk); +} +EXPORT_SYMBOL_GPL(inet6_cleanup_sock); + /* * This does both peername and sockname. */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 727f958dd869..45ece6a898bf 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,7 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->flowi6_mark = sk->sk_mark; fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; - fl6->flowlabel = np->flow_label; + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d847ffbe9745..6529e46ad091 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -517,7 +517,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index eeee64a8a72c..69313ec24264 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -272,6 +272,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index ae365df8abf7..f356d3049143 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -142,6 +142,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type) optlen = 1; break; default: + if (len < 2) + goto bad; optlen = nh[offset + 1] + 2; if (optlen > len) goto bad; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fbc8746371b6..bfafd7649ccb 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -395,23 +395,31 @@ relookup_failed: return ERR_PTR(err); } -static int icmp6_iif(const struct sk_buff *skb) +static struct net_device *icmp6_dev(const struct sk_buff *skb) { - int iif = skb->dev->ifindex; + struct net_device *dev = skb->dev; /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so * get the real device index. Same is needed for replies to a link * local address on a device enslaved to an L3 master device */ - if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { + if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); - if (rt6) - iif = rt6->rt6i_idev->dev->ifindex; + /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), + * and ip6_null_entry could be set to skb if no route is found. + */ + if (rt6 && rt6->rt6i_idev) + dev = rt6->rt6i_idev->dev; } - return iif; + return dev; +} + +static int icmp6_iif(const struct sk_buff *skb) +{ + return icmp6_dev(skb)->ifindex; } /* @@ -800,7 +808,7 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - struct net_device *dev = skb->dev; + struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 7858fa9ea103..87744eb8d0c4 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -480,6 +480,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); + ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5ff67cb8b6ac..92bc56028b8b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1351,13 +1351,9 @@ out: if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f994f50e1516..1858cf783a4f 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -518,7 +518,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { - int uninitialized_var(err); + int err; struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e617a98f4df6..aa8ada354a39 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -731,6 +731,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, { struct ip6_tnl *tunnel = netdev_priv(dev); __be16 protocol; + __be16 flags; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -740,16 +741,12 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; - if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) - return -ENOMEM; - /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; if (tunnel->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - __be16 flags; int tun_hlen; tun_info = skb_tunnel_info(skb); @@ -770,19 +767,25 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); tun_hlen = gre_calc_hlen(flags); + if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen)) + return -ENOMEM; + gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { - if (tunnel->parms.o_flags & TUNNEL_SEQ) - tunnel->o_seqno++; + if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) + return -ENOMEM; + + flags = tunnel->parms.o_flags; - gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - htonl(tunnel->o_seqno)); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) + : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -957,11 +960,12 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, goto tx_err; if (skb->len > dev->mtu + dev->hard_header_len) { - pskb_trim(skb, dev->mtu + dev->hard_header_len); + if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) + goto tx_err; truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -970,7 +974,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) @@ -1018,12 +1022,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, ntohl(tun_id), ntohl(md->u.index), truncate, false); + proto = htons(ETH_P_ERSPAN); } else if (md->version == 2) { erspan_build_header_v2(skb, ntohl(tun_id), md->u.md2.dir, get_hwid(&md->u.md2), truncate, false); + proto = htons(ETH_P_ERSPAN2); } else { goto tx_err; } @@ -1046,25 +1052,26 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, break; } - if (t->parms.erspan_ver == 1) + if (t->parms.erspan_ver == 1) { erspan_build_header(skb, ntohl(t->parms.o_key), t->parms.index, truncate, false); - else if (t->parms.erspan_ver == 2) + proto = htons(ETH_P_ERSPAN); + } else if (t->parms.erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(t->parms.o_key), t->parms.dir, t->parms.hwid, truncate, false); - else + proto = htons(ETH_P_ERSPAN2); + } else { goto tx_err; + } fl6.daddr = t->parms.raddr; } /* Push GRE header. */ - proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) - : htons(ETH_P_ERSPAN2); - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1154,14 +1161,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, dev->needed_headroom = dst_len; if (set_mtu) { - dev->mtu = rt->dst.dev->mtu - t_hlen; + int mtu = rt->dst.dev->mtu - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; if (dev->type == ARPHRD_ETHER) - dev->mtu -= ETH_HLEN; + mtu -= ETH_HLEN; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } } ip6_rt_put(rt); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6fd1a4b61747..0872df066a4e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -106,7 +106,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); - if (res < 0 || res == LWTUNNEL_XMIT_DONE) + if (res != LWTUNNEL_XMIT_CONTINUE) return res; } @@ -153,7 +153,13 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, int err; skb_mark_not_on_list(segs); - err = ip6_fragment(net, sk, segs, ip6_finish_output2); + /* Last GSO segment can be smaller than gso_size (and MTU). + * Adding a fragment header would produce an "atomic fragment", + * which is considered harmful (RFC-8021). Avoid that. + */ + err = segs->len > mtu ? + ip6_fragment(net, sk, segs, ip6_finish_output2) : + ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } @@ -734,6 +740,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); + /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -776,6 +785,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; } @@ -783,6 +793,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err; slow_path_clean: @@ -1725,8 +1736,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + u8 icmp6_type; - ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp6_type = fl6->fl6_icmp_type; + else + icmp6_type = icmp6_hdr(skb)->icmp6_type; + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b647a4037679..56309c851928 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -404,7 +404,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); - u8 next, nexthdr = ipv6h->nexthdr; + u8 nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; @@ -415,25 +415,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { - struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr; - if (frag_hdr->frag_off) - break; optlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { optlen = (hdr->hdrlen + 2) << 2; } else { optlen = ipv6_optlen(hdr); } - /* cache hdr->nexthdr, since pskb_may_pull() might - * invalidate hdr - */ - next = hdr->nexthdr; - if (nexthdr == NEXTHDR_DEST) { - u16 i = 2; - /* Remember : hdr is no longer valid at this point. */ - if (!pskb_may_pull(skb, off + optlen)) + if (!pskb_may_pull(skb, off + optlen)) + break; + + hdr = (struct ipv6_opt_hdr *)(skb->data + off); + if (nexthdr == NEXTHDR_FRAGMENT) { + struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr; + + if (frag_hdr->frag_off) break; + } + if (nexthdr == NEXTHDR_DEST) { + u16 i = 2; while (1) { struct ipv6_tlv_tnl_enc_lim *tel; @@ -454,7 +454,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) i++; } } - nexthdr = next; + nexthdr = hdr->nexthdr; off += optlen; } return 0; @@ -1206,8 +1206,8 @@ route_lookup: */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) @@ -1435,6 +1435,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; int t_hlen; + int mtu; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); @@ -1477,12 +1478,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) dev->hard_header_len = rt->dst.dev->hard_header_len + t_hlen; - dev->mtu = rt->dst.dev->mtu - t_hlen; + mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu -= 8; + mtu -= 8; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); } ip6_rt_put(rt); } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 866ce815625e..a64050e77588 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -562,12 +562,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) vti6_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); break; case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); break; default: goto tx_err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e9eb917342b8..329bad6cbb76 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1064,7 +1064,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, And all this only to mangle msg->im6_msgtype and to set msg->im6_mbz to "mbz" :-) */ - skb_push(skb, -skb_network_offset(pkt)); + __skb_pull(skb, skb_network_offset(pkt)); skb_push(skb, sizeof(*msg)); skb_reset_transport_header(skb); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4f958d24f9e4..1c155e610c06 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -178,9 +178,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { - struct ipv6_txoptions *opt; - struct sk_buff *pktopt; - if (sk->sk_type == SOCK_RAW) break; @@ -211,7 +208,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); @@ -246,14 +242,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg((__force struct ipv6_txoptions **)&np->opt, - NULL); - if (opt) { - atomic_sub(opt->tot_len, &sk->sk_omem_alloc); - txopt_put(opt); - } - pktopt = xchg(&np->pktoptions, NULL); - kfree_skb(pktopt); + + /* Disable all options not to allocate memory anymore, + * but there is still a race. See the lockless path + * in udpv6_sendmsg() and ipv6_local_rxpmtu(). + */ + np->rxopt.all = 0; + + inet6_cleanup_sock(sk); /* * ... and add it to the refcnt debug socks count diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 673a4a932f2a..a640deb9ab14 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -195,7 +195,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, static inline int ndisc_is_useropt(const struct net_device *dev, struct nd_opt_hdr *opt) { - return opt->nd_opt_type == ND_OPT_RDNSS || + return opt->nd_opt_type == ND_OPT_PREFIX_INFO || + opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index 58e839e2ce1d..5e5463459563 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -102,7 +102,7 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { - __be16 uninitialized_var(dport), uninitialized_var(sport); + __be16 dport, sport; const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index d8b5b60b7d53..d8bb7c85287c 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -16,8 +16,8 @@ #include <net/netfilter/ipv6/nf_dup_ipv6.h> struct nft_dup_ipv6 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv6_eval(const struct nft_expr *expr, @@ -41,16 +41,16 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in6_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 23ae01715e7b..e065f49a4ae3 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -27,11 +27,6 @@ #include <linux/proc_fs.h> #include <net/ping.h> -static void ping_v6_destroy(struct sock *sk) -{ - inet6_destroy_sock(sk); -} - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -106,7 +101,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || (addr_type & IPV6_ADDR_MAPPED) || - (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if && + l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ @@ -175,7 +171,6 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ad7bd40b6d53..31aad22c59fc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -544,6 +544,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -561,6 +562,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); @@ -828,7 +832,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!proto) proto = inet->inet_num; - else if (proto != inet->inet_num) + else if (proto != inet->inet_num && + inet->inet_num != IPPROTO_RAW) return -EINVAL; if (proto > 255) @@ -1255,8 +1260,6 @@ static void raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - - inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 60dfd0d11851..b596727f0497 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -302,7 +302,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_network_header_len(skb)); rcu_read_lock(); - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; @@ -317,7 +317,7 @@ out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); inet_frag_kill(&fq->q); return -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7b41d5d3575f..9dbc9c0cbc5a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,7 +92,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(struct dst_ops *ops); +static void ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -2767,29 +2767,30 @@ out: return dst; } -static int ip6_dst_gc(struct dst_ops *ops) +static void ip6_dst_gc(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; - int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + unsigned int val; int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, jiffies) && - entries <= rt_max_size) + if (entries > ops->gc_thresh) + entries = dst_entries_get_slow(ops); + + if (time_after(rt_last_gc + rt_min_interval, jiffies)) goto out; - net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); + fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) - net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; + atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1); out: - net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; - return entries > rt_max_size; + val = atomic_read(&net->ipv6.ip6_rt_gc_expire); + atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity)); } static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, @@ -5332,7 +5333,7 @@ static int __net_init ip6_route_net_init(struct net *net) #endif net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_max_size = INT_MAX; net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; @@ -5340,7 +5341,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; - net->ipv6.ip6_rt_gc_expire = 30*HZ; + atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ); ret = 0; out: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 420b9fcc10d7..ec1de1e6b8e3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1053,12 +1053,13 @@ tx_err: static void ipip6_tunnel_bind_dev(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; + int hlen = LL_MAX_HEADER; const struct iphdr *iph; struct flowi4 fl4; - tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; if (iph->daddr) { @@ -1081,12 +1082,15 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev && !netif_is_l3_master(tdev)) { - int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int mtu; - dev->mtu = tdev->mtu - t_hlen; - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; + mtu = tdev->mtu - t_hlen; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + WRITE_ONCE(dev->mtu, mtu); + hlen = tdev->hard_header_len + tdev->needed_headroom; } + dev->needed_headroom = t_hlen + hlen; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ca291e342900..ab073ac3d7ac 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -184,14 +184,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->af_specific = &tcp_request_sock_ipv6_ops; treq->tfo_listener = false; - if (security_inet_conn_request(sk, skb, req)) - goto out_free; - req->mss = mss; ireq->ir_rmt_port = th->source; ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; + + if (security_inet_conn_request(sk, skb, req)) + goto out_free; + if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index babf69b2403b..033cf81f3483 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -257,6 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; + fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; @@ -1268,14 +1269,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_mask(sk, GFP_ATOMIC)); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) { + if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); - skb_set_owner_r(newnp->pktoptions, newsk); - } } } else { if (!req_unhash && found_dup_sk) { @@ -1343,7 +1341,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; @@ -1425,7 +1423,6 @@ ipv6_pktoptions: if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { @@ -1795,12 +1792,6 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static void tcp_v6_destroy_sock(struct sock *sk) -{ - tcp_v4_destroy_sock(sk); - inet6_destroy_sock(sk); -} - #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, @@ -1993,7 +1984,7 @@ struct proto tcpv6_prot = { .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, - .destroy = tcp_v6_destroy_sock, + .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9b0cae403027..cf0bbe2e3a79 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -66,6 +66,19 @@ static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb) return false; } +static void udpv6_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +int udpv6_init_sock(struct sock *sk) +{ + skb_queue_head_init(&udp_sk(sk)->reader_queue); + sk->sk_destruct = udpv6_destruct_sock; + return 0; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -86,7 +99,7 @@ static u32 udp6_ehashfn(const struct net *net, fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, - udp_ipv6_hash_secret + net_hash_mix(net)); + udp6_ehash_secret + net_hash_mix(net)); } int udp_v6_get_port(struct sock *sk, unsigned short snum) @@ -1219,9 +1232,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); do_udp_sendmsg: - if (__ipv6_only_sock(sk)) - return -ENETUNREACH; - return udp_sendmsg(sk, msg, len); + err = __ipv6_only_sock(sk) ? + -ENETUNREACH : udp_sendmsg(sk, msg, len); + msg->msg_name = sin6; + msg->msg_namelen = addr_len; + return err; } } @@ -1488,8 +1503,6 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - - inet6_destroy_sock(sk); } /* @@ -1593,7 +1606,7 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udp_init_sock, + .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 7903e21c178b..e5d067b09ccf 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); +int udpv6_init_sock(struct sock *sk); int udp_v6_get_port(struct sock *sk, unsigned short snum); int udpv6_getsockopt(struct sock *sk, int level, int optname, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5000ad6878e6..a26a4b5da09c 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -15,6 +15,13 @@ #include <linux/proc_fs.h> #include "udp_impl.h" +static int udplitev6_sk_init(struct sock *sk) +{ + udpv6_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); @@ -40,7 +47,7 @@ struct proto udplitev6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udplite_sk_init, + .init = udplitev6_sk_init, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, @@ -51,6 +58,8 @@ struct proto udplitev6_prot = { .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1925fb91e514..a1dfe4f5ed3a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -243,11 +243,11 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (likely(xdst->u.rt6.rt6i_idev)) - in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); if (xdst->u.rt6.rt6i_uncached_list) rt6_uncached_list_del(&xdst->u.rt6); + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index eb502c6290c2..aacaa5119b45 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -119,7 +119,7 @@ struct iucv_irq_data { u16 ippathid; u8 ipflags1; u8 iptype; - u32 res2[8]; + u32 res2[9]; }; struct iucv_irq_list { diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index c0034546a9ed..a82892c28860 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1065,15 +1065,18 @@ partial_message: out_error: kcm_push(kcm); - if (copied && sock->type == SOCK_SEQPACKET) { + if (sock->type == SOCK_SEQPACKET) { /* Wrote some bytes before encountering an * error, return partial success. */ - goto partial_message; - } - - if (head != kcm->seq_skb) + if (copied) + goto partial_message; + if (head != kcm->seq_skb) + kfree_skb(head); + } else { kfree_skb(head); + kcm->seq_skb = NULL; + } err = sk_stream_error(sk, msg->msg_flags, err); @@ -1983,6 +1986,8 @@ static __net_exit void kcm_exit_net(struct net *net) * that all multiplexors and psocks have been destroyed. */ WARN_ON(!list_empty(&knet->mux_list)); + + mutex_destroy(&knet->mutex); } static struct pernet_operations kcm_net_ops = { diff --git a/net/key/af_key.c b/net/key/af_key.c index 976b67089ac1..47ffa69ca6f6 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1858,9 +1858,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; - if ((xfilter->sadb_x_filter_splen >= + if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || - (xfilter->sadb_x_filter_dplen >= + (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; @@ -1950,7 +1950,8 @@ static u32 gen_reqid(struct net *net) } static int -parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) +parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol, + struct sadb_x_ipsecrequest *rq) { struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; @@ -1968,9 +1969,12 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; - if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) + if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) { + if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) && + pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND) + return -EINVAL; t->optional = 1; - else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { + } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; @@ -2012,7 +2016,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) rq->sadb_x_ipsecrequest_len < sizeof(*rq)) return -EINVAL; - if ((err = parse_ipsecrequest(xp, rq)) < 0) + if ((err = parse_ipsecrequest(xp, pol, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9dae10d8880c..7342344d99a9 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -272,8 +272,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk) if (tunnel) l2tp_tunnel_delete(tunnel); - - inet6_destroy_sock(sk); } static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -527,7 +525,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ if (len > INT_MAX - transhdrlen) return -EMSGSIZE; - ulen = len + transhdrlen; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) @@ -651,6 +648,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); + ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 82cb93f66b9b..f4fb309185ce 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb) skb->transport_header += llc_len; skb_pull(skb, llc_len); if (skb->protocol == htons(ETH_P_802_2)) { - __be16 pdulen = eth_hdr(skb)->h_proto; - s32 data_size = ntohs(pdulen) - llc_len; + __be16 pdulen; + s32 data_size; + + if (skb->mac_len < ETH_HLEN) + return 0; + + pdulen = eth_hdr(skb)->h_proto; + data_size = ntohs(pdulen) - llc_len; if (data_size < 0 || !pskb_may_pull(skb, data_size)) @@ -162,9 +168,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, void (*sta_handler)(struct sk_buff *skb); void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); - if (!net_eq(dev_net(dev), &init_net)) - goto drop; - /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 9fa3342c7a82..df26557a0244 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb) int rc = 1; u32 data_size; + if (skb->mac_len < ETH_HLEN) + return 1; + llc_pdu_decode_sa(skb, mac_da); llc_pdu_decode_da(skb, mac_sa); llc_pdu_decode_ssap(skb, &dsap); diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index c29170e767a8..64e2c67e16ba 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -77,6 +77,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) u32 data_size; struct sk_buff *nskb; + if (skb->mac_len < ETH_HLEN) + goto out; + /* The test request command is type U (llc_len = 3) */ data_size = ntohs(eth_hdr(skb)->h_proto) - 3; nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5659af1bec17..77d8ed184c1c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2452,6 +2452,10 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, else *dbm = sdata->vif.bss_conf.txpower; + /* INT_MIN indicates no power level was set yet */ + if (*dbm == INT_MIN) + return -EINVAL; + return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2c2532196ef6..c5e5e978d3ed 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -627,6 +627,26 @@ struct mesh_csa_settings { struct cfg80211_csa_settings settings; }; +/** + * struct mesh_table + * + * @known_gates: list of known mesh gates and their mpaths by the station. The + * gate's mpath may or may not be resolved and active. + * @gates_lock: protects updates to known_gates + * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr + * @walk_head: linked list containing all mesh_path objects + * @walk_lock: lock protecting walk_head + * @entries: number of entries in the table + */ +struct mesh_table { + struct hlist_head known_gates; + spinlock_t gates_lock; + struct rhashtable rhead; + struct hlist_head walk_head; + spinlock_t walk_lock; + atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ +}; + struct ieee80211_if_mesh { struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; @@ -701,8 +721,8 @@ struct ieee80211_if_mesh { /* offset from skb->data while building IE */ int meshconf_offset; - struct mesh_table *mesh_paths; - struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ + struct mesh_table mesh_paths; + struct mesh_table mpp_paths; /* Store paths for MPP&MAP */ int mesh_paths_generation; int mpp_paths_generation; }; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index e84103b40534..e60444039e76 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -128,26 +128,6 @@ struct mesh_path { bool is_gate; }; -/** - * struct mesh_table - * - * @known_gates: list of known mesh gates and their mpaths by the station. The - * gate's mpath may or may not be resolved and active. - * @gates_lock: protects updates to known_gates - * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr - * @walk_head: linked list containging all mesh_path objects - * @walk_lock: lock protecting walk_head - * @entries: number of entries in the table - */ -struct mesh_table { - struct hlist_head known_gates; - spinlock_t gates_lock; - struct rhashtable rhead; - struct hlist_head walk_head; - spinlock_t walk_lock; - atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ -}; - /* Recent multicast cache */ /* RMC_BUCKETS must be a power of 2, maximum 256 */ #define RMC_BUCKETS 256 @@ -300,7 +280,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); -int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); +void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(struct timer_list *t); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 71ebdc85755c..8efb2bf08bf4 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -50,32 +50,24 @@ static void mesh_path_rht_free(void *ptr, void *tblptr) mesh_path_free_rcu(tbl, mpath); } -static struct mesh_table *mesh_table_alloc(void) +static void mesh_table_init(struct mesh_table *tbl) { - struct mesh_table *newtbl; + INIT_HLIST_HEAD(&tbl->known_gates); + INIT_HLIST_HEAD(&tbl->walk_head); + atomic_set(&tbl->entries, 0); + spin_lock_init(&tbl->gates_lock); + spin_lock_init(&tbl->walk_lock); - newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC); - if (!newtbl) - return NULL; - - INIT_HLIST_HEAD(&newtbl->known_gates); - INIT_HLIST_HEAD(&newtbl->walk_head); - atomic_set(&newtbl->entries, 0); - spin_lock_init(&newtbl->gates_lock); - spin_lock_init(&newtbl->walk_lock); - if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) { - kfree(newtbl); - return NULL; - } - - return newtbl; + /* rhashtable_init() may fail only in case of wrong + * mesh_rht_params + */ + WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params)); } static void mesh_table_free(struct mesh_table *tbl) { rhashtable_free_and_destroy(&tbl->rhead, mesh_path_rht_free, tbl); - kfree(tbl); } /** @@ -243,13 +235,13 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata); + return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata); + return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata); } static struct mesh_path * @@ -286,7 +278,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx); + return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx); } /** @@ -301,7 +293,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx); + return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx); } /** @@ -314,7 +306,7 @@ int mesh_path_add_gate(struct mesh_path *mpath) int err; rcu_read_lock(); - tbl = mpath->sdata->u.mesh.mesh_paths; + tbl = &mpath->sdata->u.mesh.mesh_paths; spin_lock_bh(&mpath->state_lock); if (mpath->is_gate) { @@ -424,7 +416,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, if (!new_mpath) return ERR_PTR(-ENOMEM); - tbl = sdata->u.mesh.mesh_paths; + tbl = &sdata->u.mesh.mesh_paths; spin_lock_bh(&tbl->walk_lock); do { ret = rhashtable_lookup_insert_fast(&tbl->rhead, @@ -473,7 +465,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, return -ENOMEM; memcpy(new_mpath->mpp, mpp, ETH_ALEN); - tbl = sdata->u.mesh.mpp_paths; + tbl = &sdata->u.mesh.mpp_paths; spin_lock_bh(&tbl->walk_lock); ret = rhashtable_lookup_insert_fast(&tbl->rhead, @@ -502,7 +494,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, void mesh_plink_broken(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct mesh_table *tbl = sdata->u.mesh.mesh_paths; + struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; @@ -561,7 +553,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) void mesh_path_flush_by_nexthop(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct mesh_table *tbl = sdata->u.mesh.mesh_paths; + struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; struct mesh_path *mpath; struct hlist_node *n; @@ -576,7 +568,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { - struct mesh_table *tbl = sdata->u.mesh.mpp_paths; + struct mesh_table *tbl = &sdata->u.mesh.mpp_paths; struct mesh_path *mpath; struct hlist_node *n; @@ -610,8 +602,8 @@ static void table_flush_by_iface(struct mesh_table *tbl) */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - table_flush_by_iface(sdata->u.mesh.mesh_paths); - table_flush_by_iface(sdata->u.mesh.mpp_paths); + table_flush_by_iface(&sdata->u.mesh.mesh_paths); + table_flush_by_iface(&sdata->u.mesh.mpp_paths); } /** @@ -657,7 +649,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); - err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr); + err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr); sdata->u.mesh.mesh_paths_generation++; return err; } @@ -695,7 +687,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) struct mesh_path *gate; bool copy = false; - tbl = sdata->u.mesh.mesh_paths; + tbl = &sdata->u.mesh.mesh_paths; rcu_read_lock(); hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { @@ -775,29 +767,10 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mesh_path_tx_pending(mpath); } -int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) +void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl_path, *tbl_mpp; - int ret; - - tbl_path = mesh_table_alloc(); - if (!tbl_path) - return -ENOMEM; - - tbl_mpp = mesh_table_alloc(); - if (!tbl_mpp) { - ret = -ENOMEM; - goto free_path; - } - - sdata->u.mesh.mesh_paths = tbl_path; - sdata->u.mesh.mpp_paths = tbl_mpp; - - return 0; - -free_path: - mesh_table_free(tbl_path); - return ret; + mesh_table_init(&sdata->u.mesh.mesh_paths); + mesh_table_init(&sdata->u.mesh.mpp_paths); } static @@ -819,12 +792,12 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { - mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths); - mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths); + mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths); + mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths); } void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { - mesh_table_free(sdata->u.mesh.mesh_paths); - mesh_table_free(sdata->u.mesh.mpp_paths); + mesh_table_free(&sdata->u.mesh.mesh_paths); + mesh_table_free(&sdata->u.mesh.mpp_paths); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5b5b0f95ffd1..c7f47dba884e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1022,8 +1022,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1031,9 +1031,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3a907ba7f763..5c209f72de70 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -969,7 +969,8 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) list_del_rcu(&sta->list); sta->removed = true; - drv_sta_pre_rcu_remove(local, sta->sdata, sta); + if (sta->uploaded) + drv_sta_pre_rcu_remove(local, sta->sdata, sta); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && rcu_access_pointer(sdata->u.vlan.sta) == sta) @@ -2047,7 +2048,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate, static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { - u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); + u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 74045e927e04..3a0aadf881fc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -654,7 +654,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED && - !ieee80211_is_deauth(hdr->frame_control))) + !ieee80211_is_deauth(hdr->frame_control)) && + tx->skb->protocol != tx->sdata->control_port_protocol) return TX_DROP; if (!skip_hw && tx->key && diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index bd88a9b80773..8c2aedf3fa74 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -669,6 +669,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; + INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ea1745cb93ed..40b3e6a52f92 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1375,6 +1375,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1384,6 +1385,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 8055e3965cef..2477caf9c967 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -69,9 +69,12 @@ enum { }; struct ncsi_channel_version { - u32 version; /* Supported BCD encoded NCSI version */ - u32 alpha2; /* Supported BCD encoded NCSI version */ - u8 fw_name[12]; /* Firware name string */ + u8 major; /* NCSI version major */ + u8 minor; /* NCSI version minor */ + u8 update; /* NCSI version update */ + char alpha1; /* NCSI version alpha1 */ + char alpha2; /* NCSI version alpha2 */ + u8 fw_name[12]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ u32 mf_id; /* Manufacture ID */ diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index a2f4280e2889..d0169bf0fcce 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb, if (ndp->force_channel == nc) nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 91b4b66438df..0bf62b4883d4 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -164,9 +164,12 @@ struct ncsi_rsp_gls_pkt { /* Get Version ID */ struct ncsi_rsp_gvi_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 ncsi_version; /* NCSI version */ + unsigned char major; /* NCSI version major */ + unsigned char minor; /* NCSI version minor */ + unsigned char update; /* NCSI version update */ + unsigned char alpha1; /* NCSI version alpha1 */ unsigned char reserved[3]; /* Reserved */ - unsigned char alpha2; /* NCSI version */ + unsigned char alpha2; /* NCSI version alpha2 */ unsigned char fw_name[12]; /* f/w name string */ __be32 fw_version; /* f/w version */ __be16 pci_ids[4]; /* PCI IDs */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index a43c9a44f870..05dea43bbc66 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -20,6 +20,19 @@ #include "internal.h" #include "ncsi-pkt.h" +/* Nibbles within [0xA, 0xF] add zero "0" to the returned value. + * Optional fields (encoded as 0xFF) will default to zero. + */ +static u8 decode_bcd_u8(u8 x) +{ + int lo = x & 0xF; + int hi = x >> 4; + + lo = lo < 0xA ? lo : 0; + hi = hi < 0xA ? hi : 0; + return lo + hi * 10; +} + static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, unsigned short payload) { @@ -611,9 +624,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) if (!nc) return -ENODEV; - /* Update to channel's version info */ + /* Update channel's version info + * + * Major, minor, and update fields are supposed to be + * unsigned integers encoded as packed BCD. + * + * Alpha1 and alpha2 are ISO/IEC 8859-1 characters. + */ ncv = &nc->version; - ncv->version = ntohl(rsp->ncsi_version); + ncv->major = decode_bcd_u8(rsp->major); + ncv->minor = decode_bcd_u8(rsp->minor); + ncv->update = decode_bcd_u8(rsp->update); + ncv->alpha1 = rsp->alpha1; ncv->alpha2 = rsp->alpha2; memcpy(ncv->fw_name, rsp->fw_name, 12); ncv->fw_version = ntohl(rsp->fw_version); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 722d1b057f61..0c6540780cb4 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -289,12 +289,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum)) return NULL; return net->nf.hooks_ipv6 + hooknum; -#if IS_ENABLED(CONFIG_DECNET) - case NFPROTO_DECNET: - if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum)) - return NULL; - return net->nf.hooks_decnet + hooknum; -#endif default: WARN_ON_ONCE(1); return NULL; @@ -646,10 +640,6 @@ static int __net_init netfilter_net_init(struct net *net) #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge)); #endif -#if IS_ENABLED(CONFIG_DECNET) - __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet)); -#endif - #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index e3257077158f..1b9df64c6236 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -299,8 +299,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_BITMAP_RANGE; pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); - hosts = 2 << (32 - netmask - 1); - elements = 2 << (netmask - mask_bits - 1); + hosts = 2U << (32 - netmask - 1); + elements = 2UL << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 0427e66bc478..031bb83aed70 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -64,6 +64,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -552,15 +554,10 @@ __ip_set_put_netlink(struct ip_set *set) static inline struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } int @@ -791,20 +788,9 @@ static struct nlmsghdr * start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - - nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), - sizeof(*nfmsg), flags); - if (!nlh) - return NULL; - - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = NFPROTO_IPV4; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - return nlh; + return nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags, + NFPROTO_IPV4, NFNETLINK_V0, 0); } /* Create a set */ @@ -1238,6 +1224,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); + /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 613e18e720a4..9290a4d7b862 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -39,6 +39,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS #define IPSET_NET_COUNT 2 +#define IP_SET_HASH_WITH_NET0 /* IPv4 variant */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 3bf8d7f3cdc3..0909f32eabfd 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1656,6 +1656,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL static int zero; +static int one = 1; static int three = 3; static int @@ -1667,12 +1668,18 @@ proc_do_defense_mode(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 3)) { - /* Restore the correct value */ - *valp = val; + if (val < 0 || val > 3) { + rc = -EINVAL; } else { + *valp = val; update_defense_level(ipvs); } } @@ -1683,37 +1690,27 @@ static int proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; int val[2]; int rc; + struct ctl_table tmp = { + .data = &val, + .maxlen = table->maxlen, + .mode = table->mode, + }; - /* backup the value first */ + mutex_lock(&ipvs->sync_mutex); memcpy(val, valp, sizeof(val)); - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || - (valp[0] >= valp[1] && valp[1]))) { - /* Restore the correct value */ - memcpy(valp, val, sizeof(val)); - } - return rc; -} - -static int -proc_do_sync_mode(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int *valp = table->data; - int val = *valp; - int rc; - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 1)) { - /* Restore the correct value */ - *valp = val; - } + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write) { + if (val[0] < 0 || val[1] < 0 || + (val[0] >= val[1] && val[1])) + rc = -EINVAL; + else + memcpy(valp, val, sizeof(val)); } + mutex_unlock(&ipvs->sync_mutex); return rc; } @@ -1725,12 +1722,18 @@ proc_do_sync_ports(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if (*valp < 1 || !is_power_of_2(*valp)) { - /* Restore the correct value */ + if (val < 1 || !is_power_of_2(val)) + rc = -EINVAL; + else *valp = val; - } } return rc; } @@ -1790,7 +1793,9 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_do_sync_mode, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, { .procname = "sync_ports", @@ -3942,6 +3947,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx].extra2 = ipvs; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6af13c16cf5..c133ce825c2d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1444,7 +1444,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) sin.sin_addr.s_addr = addr; sin.sin_port = 0; - return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); + return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin)); } static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, @@ -1510,8 +1510,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id, } get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); - result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - salen, 0); + result = kernel_connect(sock, (struct sockaddr *)&mcast_addr, + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; @@ -1551,7 +1551,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id, get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); sock->sk->sk_bound_dev_if = dev->ifindex; - result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); + result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 11f7c546e57b..e47d1a29c140 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -272,7 +272,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return false; } @@ -287,7 +287,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs, { if (ip_hdr(skb)->ttl <= 1) { /* Tell the sender its packet died... */ - __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); return false; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index efc14c7b4f8e..c2fece0593ea 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -383,7 +383,7 @@ static int help(struct sk_buff *skb, int ret; u32 seq; int dir = CTINFO2DIR(ctinfo); - unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); + unsigned int matchlen, matchoff; struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e24b762ffa1d..06c70d4584cf 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -400,6 +400,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); + if (!nf_ct_helper_hash) + return -ENOENT; + if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; @@ -570,4 +573,5 @@ void nf_conntrack_helper_fini(void) { nf_ct_extend_unregister(&helper_extend); kvfree(nf_ct_helper_hash); + nf_ct_helper_hash = NULL; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2850a638401d..83e8566ec3f0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -517,20 +517,15 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, { const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -687,7 +682,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct nlattr *nest_parms; struct nf_conn *ct = item->ct; struct sk_buff *skb; @@ -717,15 +711,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct), + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = nf_ct_l3num(ct); - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -1216,9 +1206,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) { - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) - return 0; - return ctnetlink_filter_match(ct, data); } @@ -1280,11 +1267,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); - if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { - nf_ct_put(ct); - return -EBUSY; - } - if (cda[CTA_ID]) { __be32 id = nla_get_be32(cda[CTA_ID]); @@ -2056,12 +2038,15 @@ ctnetlink_create_conntrack(struct net *net, err = nf_conntrack_hash_check_insert(ct); if (err < 0) - goto err2; + goto err3; rcu_read_unlock(); return ct; +err3: + if (ct->master) + nf_ct_put(ct->master); err2: rcu_read_unlock(); err1: @@ -2175,20 +2160,15 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || @@ -2259,20 +2239,15 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) goto nla_put_failure; @@ -2686,7 +2661,9 @@ nla_put_failure: return -1; } +#if IS_ENABLED(CONFIG_NF_NAT) static const union nf_inet_addr any_addr; +#endif static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) { @@ -2783,19 +2760,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -2815,7 +2787,6 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *skb; unsigned int type, group; int flags = 0; @@ -2838,15 +2809,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); - nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, + exp->tuple.src.l3num, NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = exp->tuple.src.l3num; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; @@ -3186,10 +3153,12 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, return 0; } +#if IS_ENABLED(CONFIG_NF_NAT) static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = { [CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 }, [CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED }, }; +#endif static int ctnetlink_parse_expect_nat(const struct nlattr *attr, @@ -3414,20 +3383,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, htons(cpu)); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8cb62805fd68..8453e92936ac 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -58,8 +58,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, - [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, - [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, + [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS, + [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, @@ -119,7 +119,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, +/* init */ {sCW, sCW, sCW, sCE, sES, sCL, sCL, sSA, sCW, sHA}, /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, @@ -317,22 +317,29 @@ static int sctp_packet(struct nf_conn *ct, for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ + /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir]) + /* (B) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir] && - sch->flags & SCTP_CHUNK_FLAG_T) + /* (C) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ + /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; } else if (sch->type == SCTP_CID_HEARTBEAT) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 66cda5e2d6b9..955b73a9a05e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1094,6 +1094,16 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 046f118dea06..d16aa43ebd4d 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -605,7 +605,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, start += strlen(name); *val = simple_strtoul(start, &end, 0); if (start == end) - return 0; + return -1; if (matchoff && matchlen) { *matchoff = start - dptr; *matchlen = end - start; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 84c59de27882..b3a0385290a1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -93,8 +93,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(state->in); if (state->out) dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { struct net_device *physdev; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 079f76849693..e0c224dea316 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -22,10 +22,13 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/sock.h> #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +unsigned int nf_tables_net_id __read_mostly; + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -53,7 +56,9 @@ static const struct rhashtable_params nft_chain_ht_params = { static void nft_validate_state_update(struct net *net, u8 new_validate_state) { - switch (net->nft.validate_state) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -64,7 +69,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) return; } - net->nft.validate_state = new_validate_state; + nft_net->validate_state = new_validate_state; } static void nft_ctx_init(struct nft_ctx *ctx, @@ -97,6 +102,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -109,34 +115,68 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; - list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) +{ + struct nftables_pernet *nft_net; + + nft_net = net_generic(net, nf_tables_net_id); + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + + list_add_tail(&trans->list, &nft_net->commit_list); +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -187,7 +227,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -214,7 +254,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWCHAIN) nft_activate_next(ctx->net, ctx->chain); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -226,7 +266,7 @@ static int nft_delchain(struct nft_ctx *ctx) if (err < 0) return err; - ctx->table->use--; + nft_use_dec(&ctx->table->use); nft_deactivate_next(ctx->net, ctx->chain); return err; @@ -267,7 +307,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) /* You cannot delete the same rule twice */ if (nft_is_active_next(ctx->net, rule)) { nft_deactivate_next(ctx->net, rule); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); return 0; } return -ENOENT; @@ -287,7 +327,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -342,7 +382,7 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -356,7 +396,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) return err; nft_deactivate_next(ctx->net, set); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -374,7 +414,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -388,7 +428,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; nft_deactivate_next(ctx->net, obj); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -407,7 +447,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, flowtable); nft_trans_flowtable(trans) = flowtable; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -422,7 +462,7 @@ static int nft_delflowtable(struct nft_ctx *ctx, return err; nft_deactivate_next(ctx->net, flowtable); - ctx->table->use--; + nft_use_dec(&ctx->table->use); return err; } @@ -435,12 +475,14 @@ static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, u8 family, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) @@ -454,9 +496,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; @@ -509,11 +553,13 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) static void nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; + struct nftables_pernet *nft_net; LIST_HEAD(commit_list); va_list args; int ret; - list_splice_init(&net->nft.commit_list, &commit_list); + nft_net = net_generic(net, nf_tables_net_id); + list_splice_init(&nft_net->commit_list, &commit_list); va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); @@ -521,12 +567,12 @@ static void nft_request_module(struct net *net, const char *fmt, ...) if (ret >= MODULE_NAME_LEN) return; - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); request_module("%s", module_name); - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_splice(&commit_list, &net->nft.commit_list); + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + list_splice(&commit_list, &nft_net->commit_list); } #endif @@ -561,6 +607,13 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, return ERR_PTR(-ENOENT); } +static __be16 nft_base_seq(const struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return htons(nft_net->base_seq & 0xffff); +} + static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, @@ -573,18 +626,13 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || @@ -631,15 +679,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -743,7 +793,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) if (cnt && i++ == cnt) break; - nf_unregister_net_hook(net, &nft_base_chain(chain)->ops); + nf_tables_unregister_hook(net, table, chain); } } @@ -758,7 +808,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) if (!nft_is_base_chain(chain)) continue; - err = nf_register_net_hook(net, &nft_base_chain(chain)->ops); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err; @@ -802,17 +852,18 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (ret >= 0) nft_trans_table_enable(trans) = true; - } + else + ctx->table->flags |= NFT_TABLE_F_DORMANT; } if (ret < 0) goto err; nft_trans_table_update(trans) = true; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: nft_trans_destroy(trans); @@ -842,11 +893,36 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(chain->name, name); } +static bool nft_supported_family(u8 family) +{ + return false +#ifdef CONFIG_NF_TABLES_INET + || family == NFPROTO_INET +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + || family == NFPROTO_IPV4 +#endif +#ifdef CONFIG_NF_TABLES_ARP + || family == NFPROTO_ARP +#endif +#ifdef CONFIG_NF_TABLES_NETDEV + || family == NFPROTO_NETDEV +#endif +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) + || family == NFPROTO_BRIDGE +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + || family == NFPROTO_IPV6 +#endif + ; +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -856,7 +932,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int err; - lockdep_assert_held(&net->nft.commit_mutex); + if (!nft_supported_family(family)) + return -EOPNOTSUPP; + + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask); if (IS_ERR(table)) { @@ -906,7 +985,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (err < 0) goto err_trans; - list_add_tail_rcu(&table->list, &net->nft.tables); + list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); @@ -941,8 +1020,7 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, set)) continue; - if (nft_set_is_anonymous(set) && - !list_empty(&set->bindings)) + if (nft_set_is_anonymous(set)) continue; err = nft_delset(ctx, set); @@ -986,11 +1064,12 @@ out: static int nft_flush(struct nft_ctx *ctx, int family) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; @@ -1104,7 +1183,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) static bool lockdep_commit_lock_is_held(struct net *net) { #ifdef CONFIG_PROVE_LOCKING - return lockdep_is_held(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif @@ -1207,18 +1288,13 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nft_chain *chain) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle), @@ -1306,11 +1382,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -1503,12 +1581,13 @@ static int nft_chain_parse_hook(struct net *net, struct nft_chain_hook *hook, u8 family, bool autoload) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; struct net_device *dev; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], @@ -1607,9 +1686,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_rule **rules; int err; - if (table->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; struct nf_hook_ops *ops; @@ -1681,6 +1757,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err_use; + } + err = rhltable_insert_key(&table->chains_ht, chain->name, &chain->rhlhead, nft_chain_ht_params); if (err) @@ -1693,11 +1774,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err2; } - table->use++; list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: + nft_use_dec_restore(&table->use); +err_use: nf_tables_unregister_hook(net, table, chain); err1: nf_tables_chain_destroy(ctx); @@ -1777,6 +1859,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy) if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_trans *tmp; char *name; @@ -1786,7 +1869,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy) goto err; err = -EEXIST; - list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->ctx.table == table && nft_trans_chain_update(tmp) && @@ -1799,7 +1882,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy) nft_trans_chain_name(trans) = name; } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: @@ -1813,6 +1896,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -1823,7 +1907,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_ctx ctx; u64 handle = 0; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2251,21 +2335,16 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, const struct nft_rule *rule) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; const struct nft_expr *expr, *next; struct nlattr *list; const struct nft_rule *prule; u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0, + nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_RULE_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name)) @@ -2351,11 +2430,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -2508,7 +2589,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, { struct nft_expr *expr, *next; - lockdep_assert_held(&ctx->net->nft.commit_mutex); /* * Careful: some expressions might not be initialized in case this * is called on error from nf_tables_newrule(). @@ -2574,6 +2654,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) err = nft_chain_validate(&ctx, chain); if (err < 0) return err; + + cond_resched(); } return 0; @@ -2586,6 +2668,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); struct nft_expr_info *info = NULL; @@ -2602,7 +2685,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, int err, rem; u64 handle, pos_handle; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2638,9 +2721,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; handle = nf_tables_alloc_handle(table); - if (chain->use == UINT_MAX) - return -EOVERFLOW; - if (nla[NFTA_RULE_POSITION]) { pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION])); old_rule = __nft_rule_lookup(chain, pos_handle); @@ -2716,23 +2796,28 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, expr = nft_expr_next(expr); } + if (!nft_use_inc(&chain->use)) { + err = -EMFILE; + goto err2; + } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule); if (trans == NULL) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } err = nft_delrule(&ctx, old_rule); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_destroy_flow_rule; } list_add_tail_rcu(&rule->list, &old_rule->list); } else { if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { err = -ENOMEM; - goto err2; + goto err_destroy_flow_rule; } if (nlh->nlmsg_flags & NLM_F_APPEND) { @@ -2748,14 +2833,17 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } } kvfree(info); - chain->use++; - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); return 0; + +err_destroy_flow_rule: + nft_use_dec_restore(&chain->use); err2: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); + nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops) { @@ -2769,15 +2857,18 @@ err1: } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && + trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) return rule; } @@ -2824,7 +2915,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { - rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); @@ -2892,12 +2983,13 @@ nft_select_set_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, enum nft_set_policies policy) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); #ifdef CONFIG_MODULES if (list_empty(&nf_tables_set_types)) { @@ -3043,10 +3135,11 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { struct nft_set *set = nft_trans_set(trans); @@ -3156,23 +3249,17 @@ static __be64 nf_jiffies64_to_msecs(u64 input) static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *desc; u32 portid = ctx->portid; u32 seq = ctx->seq; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -3268,14 +3355,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; + struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; @@ -3569,10 +3658,15 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); + if (!nft_use_inc(&table->use)) { + err = -EMFILE; + goto err1; + } + set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (!set) { err = -ENOMEM; - goto err1; + goto err_alloc; } name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); @@ -3619,7 +3713,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err4; list_add_tail_rcu(&set->list, &table->sets); - table->use++; + return 0; err4: @@ -3628,6 +3722,8 @@ err3: kfree(set->name); err2: kvfree(set); +err_alloc: + nft_use_dec_restore(&table->use); err1: module_put(to_set_type(ops)->owner); return err; @@ -3687,6 +3783,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, return nft_delset(&ctx, set); } +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len); + static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, @@ -3708,9 +3810,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; - if (set->use == UINT_MAX) - return -EOVERFLOW; - if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3735,10 +3834,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, return iter.err; } bind: + if (!nft_use_inc(&set->use)) + return -EMFILE; + binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); - set->use++; return 0; } @@ -3758,17 +3859,38 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + nft_use_inc_restore(&set->use); +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + else + list_del_rcu(&binding->list); + + nft_use_dec(&set->use); + break; case NFT_TRANS_PREPARE: - set->use--; + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + + nft_use_dec(&set->use); return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: - set->use--; + nft_use_dec(&set->use); /* fall through */ default: nf_tables_unbind_set(ctx, set, binding, @@ -3964,18 +4086,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; bool set_found = false; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; int event; rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -4001,16 +4124,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - NLM_F_MULTI); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI, + table->family, NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = table->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) @@ -4067,22 +4185,16 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_elem *elem) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; int err; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), - flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, + NFNETLINK_V0, nft_base_seq(ctx->net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_NAME, set->name)) @@ -4122,11 +4234,54 @@ static int nft_setelem_parse_flags(const struct nft_set *set, return 0; } +static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, + struct nft_data *key, struct nlattr *attr) +{ + struct nft_data_desc desc; + int err; + + err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr); + if (err < 0) + return err; + + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { + nft_data_release(key, desc.type); + return -EINVAL; + } + + return 0; +} + +static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, + struct nft_data_desc *desc, + struct nft_data *data, + struct nlattr *attr) +{ + u32 dtype; + int err; + + err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); + if (err < 0) + return err; + + if (set->dtype == NFT_DATA_VERDICT) + dtype = NFT_DATA_VERDICT; + else + dtype = NFT_DATA_VALUE; + + if (dtype != desc->type || + set->dlen != desc->len) { + nft_data_release(data, desc->type); + return -EINVAL; + } + + return 0; +} + static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; - struct nft_data_desc desc; struct nft_set_elem elem; struct sk_buff *skb; uint32_t flags = 0; @@ -4145,17 +4300,11 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) return err; - err = -EINVAL; - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { - nft_data_release(&elem.key.val, desc.type); - return err; - } - priv = set->ops->get(ctx->net, set, &elem, flags); if (IS_ERR(priv)) return PTR_ERR(priv); @@ -4323,7 +4472,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } } if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -4346,14 +4495,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; u8 genmask = nft_genmask_next(ctx->net); - struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_object *obj = NULL; struct nft_userdata *udata; - struct nft_data data; + struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; u32 flags = 0; @@ -4386,6 +4534,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } + if (set->flags & NFT_SET_OBJECT) { + if (!nla[NFTA_SET_ELEM_OBJREF] && + !(flags & NFT_SET_ELEM_INTERVAL_END)) + return -EINVAL; + } else { + if (nla[NFTA_SET_ELEM_OBJREF]) + return -EINVAL; + } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && (nla[NFTA_SET_ELEM_DATA] || nla[NFTA_SET_ELEM_OBJREF] || @@ -4407,15 +4564,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, timeout = set->timeout; } - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; - err = -EINVAL; - if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) - goto err2; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); if (timeout > 0) { nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); if (timeout != set->timeout) @@ -4423,29 +4577,29 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { - if (!(set->flags & NFT_SET_OBJECT)) { - err = -EINVAL; - goto err2; - } obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); + obj = NULL; + goto err2; + } + + if (!nft_use_inc(&obj->use)) { + err = -EMFILE; + obj = NULL; goto err2; } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); } if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &data, sizeof(data), &d2, - nla[NFTA_SET_ELEM_DATA]); + err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val, + nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; - err = -EINVAL; - if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen) - goto err3; - dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { @@ -4459,19 +4613,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, continue; err = nft_validate_register_store(&bind_ctx, dreg, - &data, - d2.type, d2.len); + &elem.data.val, + desc.type, desc.len); if (err < 0) goto err3; - if (d2.type == NFT_DATA_VERDICT && - (data.verdict.code == NFT_GOTO || - data.verdict.code == NFT_JUMP)) + if (desc.type == NFT_DATA_VERDICT && + (elem.data.val.verdict.code == NFT_GOTO || + elem.data.val.verdict.code == NFT_JUMP)) nft_validate_state_update(ctx->net, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); } /* The full maximum length of userdata can exceed the maximum @@ -4487,7 +4641,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data, + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, + elem.data.val.data, timeout, GFP_KERNEL); if (elem.priv == NULL) goto err3; @@ -4500,10 +4655,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { + if (obj) *nft_set_ext_obj(ext) = obj; - obj->use++; - } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -4541,7 +4694,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err6: @@ -4549,14 +4702,15 @@ err6: err5: kfree(trans); err4: - if (obj) - obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_release(&data, d2.type); + nft_data_release(&elem.data.val, desc.type); err2: - nft_data_release(&elem.key.val, d1.type); + if (obj) + nft_use_dec_restore(&obj->use); + + nft_data_release(&elem.key.val, NFT_DATA_VALUE); err1: return err; } @@ -4566,6 +4720,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; @@ -4585,7 +4740,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -4594,7 +4750,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, ctx.table); return 0; @@ -4613,11 +4769,14 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { + struct nft_chain *chain; + if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use++; + chain = data->verdict.chain; + nft_use_inc_restore(&chain->use); break; } } @@ -4632,7 +4791,7 @@ static void nft_set_elem_activate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use++; + nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use); } static void nft_set_elem_deactivate(const struct net *net, @@ -4644,7 +4803,7 @@ static void nft_set_elem_deactivate(const struct net *net, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) - (*nft_set_ext_obj(ext))->use--; + nft_use_dec(&(*nft_set_ext_obj(ext))->use); } static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, @@ -4652,7 +4811,6 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_set_ext_tmpl tmpl; - struct nft_data_desc desc; struct nft_set_elem elem; struct nft_set_ext *ext; struct nft_trans *trans; @@ -4663,11 +4821,10 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy, NULL); if (err < 0) - goto err1; + return err; - err = -EINVAL; if (nla[NFTA_SET_ELEM_KEY] == NULL) - goto err1; + return -EINVAL; nft_set_ext_prepare(&tmpl); @@ -4677,37 +4834,31 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (flags != 0) nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); - err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, - nla[NFTA_SET_ELEM_KEY]); + err = nft_setelem_parse_key(ctx, set, &elem.key.val, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) - goto err1; - - err = -EINVAL; - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) - goto err2; + return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); err = -ENOMEM; elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0, GFP_KERNEL); if (elem.priv == NULL) - goto err2; + goto fail_elem; ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); - if (trans == NULL) { - err = -ENOMEM; - goto err3; - } + if (trans == NULL) + goto fail_trans; priv = set->ops->deactivate(ctx->net, set, &elem); if (priv == NULL) { err = -ENOENT; - goto err4; + goto fail_ops; } kfree(elem.priv); elem.priv = priv; @@ -4715,16 +4866,15 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_elem_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err4: +fail_ops: kfree(trans); -err3: +fail_trans: kfree(elem.priv); -err2: - nft_data_release(&elem.key.val, desc.type); -err1: +fail_elem: + nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; } @@ -4750,7 +4900,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, nft_set_elem_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err1: @@ -4777,7 +4927,11 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + + if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT)) return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { @@ -5048,9 +5202,14 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + type = nft_obj_type_get(net, objtype); - if (IS_ERR(type)) - return PTR_ERR(type); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err_type; + } obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { @@ -5071,7 +5230,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err3; list_add_tail_rcu(&obj->list, &table->objects); - table->use++; + return 0; err3: kfree(obj->name); @@ -5081,6 +5240,9 @@ err2: kfree(obj); err1: module_put(type->owner); +err_type: + nft_use_dec_restore(&table->use); + return err; } @@ -5089,19 +5251,14 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, int family, const struct nft_table *table, struct nft_object *obj, bool reset) { - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || @@ -5132,6 +5289,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) struct nft_obj_filter *filter = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_object *obj; bool reset = false; @@ -5139,9 +5297,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) reset = true; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -5419,6 +5578,23 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, } EXPORT_SYMBOL_GPL(nft_flowtable_lookup); +void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, + struct nft_flowtable *flowtable, + enum nft_trans_phase phase) +{ + switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + case NFT_TRANS_PREPARE: + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: + nft_use_dec(&flowtable->use); + /* fall through */ + default: + return; + } +} +EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable); + static struct nft_flowtable * nft_flowtable_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, u8 genmask) @@ -5618,9 +5794,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + if (!nft_use_inc(&table->use)) + return -EMFILE; + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); - if (!flowtable) - return -ENOMEM; + if (!flowtable) { + err = -ENOMEM; + goto flowtable_alloc; + } flowtable->table = table; flowtable->handle = nf_tables_alloc_handle(table); @@ -5674,7 +5855,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err6; list_add_tail_rcu(&flowtable->list, &table->flowtables); - table->use++; return 0; err6: @@ -5692,6 +5872,9 @@ err2: kfree(flowtable->name); err1: kfree(flowtable); +flowtable_alloc: + nft_use_dec_restore(&table->use); + return err; } @@ -5749,20 +5932,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, struct nft_flowtable *flowtable) { struct nlattr *nest, *nest_devs; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; int i; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || @@ -5812,12 +5990,14 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; const struct nft_table *table; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -5987,21 +6167,17 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC, + NFNETLINK_V0, nft_base_seq(net)); + if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -6034,6 +6210,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; struct nft_table *table; struct net *net; @@ -6041,13 +6218,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this, return 0; net = dev_net(dev); - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { nft_flowtable_event(event, dev, flowtable); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -6228,19 +6406,22 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { static int nf_tables_validate(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - switch (net->nft.validate_state) { + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); /* fall through */ case NFT_VALIDATE_DO: - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_validate(net, table) < 0) return -EAGAIN; } + + nft_validate_state_update(net, NFT_VALIDATE_SKIP); break; } @@ -6312,15 +6493,16 @@ static void nft_commit_release(struct nft_trans *trans) static void nf_tables_commit_release(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - if (list_empty(&net->nft.commit_list)) + if (list_empty(&nft_net->commit_list)) return; synchronize_rcu(); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - list_del(&trans->list); + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -6358,9 +6540,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha static void nf_tables_commit_chain_prepare_cancel(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -6452,17 +6635,30 @@ static void nft_chain_del(struct nft_chain *chain) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; /* 1. Allocate space for next generation rules_gen_X[] */ - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { int ret; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -6478,7 +6674,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } /* step 2. Make rules_gen_X visible to packet path */ - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } @@ -6487,12 +6683,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++net->nft.base_seq == 0); + while (++nft_net->base_seq == 0) + ; /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { @@ -6552,7 +6749,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + nft_use_dec(&trans->ctx.table->use); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); @@ -6613,7 +6810,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_commit_release(net); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return 0; } @@ -6649,10 +6846,11 @@ static void nf_tables_abort_release(struct nft_trans *trans) static int __nf_tables_abort(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; - list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -6677,7 +6875,7 @@ static int __nf_tables_abort(struct net *net) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, trans->ctx.table, @@ -6685,25 +6883,25 @@ static int __nf_tables_abort(struct net *net) } break; case NFT_MSG_DELCHAIN: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: - trans->ctx.chain->use--; + nft_use_dec_restore(&trans->ctx.chain->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: - trans->ctx.chain->use++; + nft_use_inc_restore(&trans->ctx.chain->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -6711,7 +6909,7 @@ static int __nf_tables_abort(struct net *net) list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -6734,22 +6932,22 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_obj(trans)->list); break; case NFT_MSG_DELOBJ: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: - trans->ctx.table->use--; + nft_use_dec_restore(&trans->ctx.table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, nft_trans_flowtable(trans)); break; case NFT_MSG_DELFLOWTABLE: - trans->ctx.table->use++; + nft_use_inc_restore(&trans->ctx.table->use); nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); nft_trans_destroy(trans); break; @@ -6759,37 +6957,34 @@ static int __nf_tables_abort(struct net *net) synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, - &net->nft.commit_list, list) { - list_del(&trans->list); + &nft_net->commit_list, list) { + nft_trans_list_del(trans); nf_tables_abort_release(trans); } return 0; } -static void nf_tables_cleanup(struct net *net) -{ - nft_validate_state_update(net, NFT_VALIDATE_SKIP); -} - static int nf_tables_abort(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); int ret = __nf_tables_abort(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); bool genid_ok; - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - genid_ok = genid == 0 || net->nft.base_seq == genid; + genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; @@ -6802,7 +6997,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, - .cleanup = nf_tables_cleanup, .valid_genid = nf_tables_valid_genid, .owner = THIS_MODULE, }; @@ -6964,28 +7158,24 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); -/** - * nft_parse_register - parse a register value from a netlink attribute - * - * @attr: netlink attribute - * - * Parse and translate a register value from a netlink attribute. - * Registers used to be 128 bit wide, these register numbers will be - * mapped to the corresponding 32 bit register numbers. - */ -unsigned int nft_parse_register(const struct nlattr *attr) +static int nft_parse_register(const struct nlattr *attr, u32 *preg) { unsigned int reg; reg = ntohl(nla_get_be32(attr)); switch (reg) { case NFT_REG_VERDICT...NFT_REG_4: - return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE; + break; + case NFT_REG32_00...NFT_REG32_15: + *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + break; default: - return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + return -ERANGE; } + + return 0; } -EXPORT_SYMBOL_GPL(nft_parse_register); /** * nft_dump_register - dump a register value to a netlink attribute @@ -7018,7 +7208,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register); * Validate that the input register is one of the general purpose * registers and that the length of the load is within the bounds. */ -int nft_validate_register_load(enum nft_registers reg, unsigned int len) +static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; @@ -7029,7 +7219,24 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -EXPORT_SYMBOL_GPL(nft_validate_register_load); + +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +{ + u32 reg; + int err; + + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + + err = nft_validate_register_load(reg, len); + if (err < 0) + return err; + + *sreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_load); /** * nft_validate_register_store - validate an expressions' register store @@ -7045,10 +7252,11 @@ EXPORT_SYMBOL_GPL(nft_validate_register_load); * A value of NULL for the data means that its runtime gathered * data. */ -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len) +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len) { int err; @@ -7080,7 +7288,27 @@ int nft_validate_register_store(const struct nft_ctx *ctx, return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_register_store); + +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + int err; + u32 reg; + + err = nft_parse_register(attr, ®); + if (err < 0) + return err; + + err = nft_validate_register_store(ctx, reg, data, type, len); + if (err < 0) + return err; + + *dreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, @@ -7103,6 +7331,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { @@ -7130,8 +7361,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (!nft_use_inc(&chain->use)) + return -EMFILE; - chain->use++; data->verdict.chain = chain; break; } @@ -7143,10 +7375,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { + struct nft_chain *chain; + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use--; + chain = data->verdict.chain; + nft_use_dec(&chain->use); break; } } @@ -7299,21 +7534,20 @@ int __nft_release_basechain(struct nft_ctx *ctx) nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); - ctx->chain->use--; + nft_use_dec(&ctx->chain->use); nf_tables_rule_release(ctx, rule); } nft_chain_del(ctx->chain); - ctx->table->use--; + nft_use_dec(&ctx->table->use); nf_tables_chain_destroy(ctx); return 0; } EXPORT_SYMBOL_GPL(__nft_release_basechain); -static void __nft_release_tables(struct net *net) +static void __nft_release_table(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable, *nf; - struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_rule *rule, *nr; @@ -7323,71 +7557,85 @@ static void __nft_release_tables(struct net *net) .family = NFPROTO_NETDEV, }; - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; + ctx.family = table->family; - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - /* No packets are walking on these chains anymore. */ - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } - } - list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { - list_del(&flowtable->list); - table->use--; - nf_tables_flowtable_destroy(flowtable); - } - list_for_each_entry_safe(set, ns, &table->sets, list) { - list_del(&set->list); - table->use--; - nft_set_destroy(set); - } - list_for_each_entry_safe(obj, ne, &table->objects, list) { - list_del(&obj->list); - table->use--; - nft_obj_destroy(&ctx, obj); - } - list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; - nft_chain_del(chain); - table->use--; - nf_tables_chain_destroy(&ctx); + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + nft_use_dec(&chain->use); + nf_tables_rule_release(&ctx, rule); } - list_del(&table->list); - nf_tables_table_destroy(&ctx); } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + nft_use_dec(&table->use); + nf_tables_flowtable_destroy(flowtable); + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + nft_use_dec(&table->use); + nft_set_destroy(set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + list_del(&obj->list); + nft_use_dec(&table->use); + nft_obj_destroy(&ctx, obj); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; + nft_chain_del(chain); + nft_use_dec(&table->use); + nf_tables_chain_destroy(&ctx); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); +} + +static void __nft_release_tables(struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + struct nft_table *table, *nt; + + list_for_each_entry_safe(table, nt, &nft_net->tables, list) + __nft_release_table(net, table); } static int __net_init nf_tables_init_net(struct net *net) { - INIT_LIST_HEAD(&net->nft.tables); - INIT_LIST_HEAD(&net->nft.commit_list); - mutex_init(&net->nft.commit_mutex); - net->nft.base_seq = 1; - net->nft.validate_state = NFT_VALIDATE_SKIP; + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; } static void __net_exit nf_tables_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); - if (!list_empty(&net->nft.commit_list)) + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); + if (!list_empty(&nft_net->commit_list)) __nf_tables_abort(net); __nft_release_tables(net); - mutex_unlock(&net->nft.commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.tables)); + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .exit = nf_tables_exit_net, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e1dc527a493b..7a19c517b191 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -186,7 +186,6 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) void nft_trace_notify(struct nft_traceinfo *info) { const struct nft_pktinfo *pkt = info->pkt; - struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; @@ -222,15 +221,11 @@ void nft_trace_notify(struct nft_traceinfo *info) return; event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family, + NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = info->basechain->type->family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9bacddc761ba..0267be2e9cfe 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -452,7 +452,8 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. @@ -495,8 +496,6 @@ done: } else { ss->abort(net, oskb); } - if (ss->cleanup) - ss->cleanup(net); nfnl_err_deliver(&err_list, oskb); kfree_skb(skb); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 8fa8bf7c48e6..7c5f428dc5c9 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -135,21 +135,16 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; u32 old_flags; event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFACCT_NAME, acct->name)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index c8b0f1122c44..720177721e3c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -532,20 +532,15 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFCTH_NAME, helper->name)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 70a7382b9787..ae01e9ad5546 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -164,20 +164,15 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->timeout.l3num)) || @@ -396,19 +391,14 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, const unsigned int *timeouts) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = AF_UNSPEC; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 17ca9a681d47..1735bcb07381 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -404,20 +404,15 @@ __build_packet_message(struct nfnl_log_net *log, { struct nfulnl_msg_packet_hdr pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; const unsigned char *hwhdrp; - nlh = nlmsg_put(inst->skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(inst->skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), + 0, pf, NFNETLINK_V0, htons(inst->group_num)); if (!nlh) return -1; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(inst->group_num); memset(&pmsg, 0, sizeof(pmsg)); pmsg.hw_protocol = skb->protocol; @@ -636,8 +631,8 @@ nfulnl_log_packet(struct net *net, unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; + enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 917f06110c82..f3676238e64f 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -318,6 +318,14 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl, f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + if (f->opt_num > ARRAY_SIZE(f->opt)) + return -EINVAL; + + if (!memchr(f->genre, 0, MAXGENRELEN) || + !memchr(f->subtype, 0, MAXGENRELEN) || + !memchr(f->version, 0, MAXGENRELEN)) + return -EINVAL; + kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL); if (!kf) return -ENOMEM; @@ -442,3 +450,4 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cd496b074a71..1aacc31a6bf9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -387,12 +387,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; @@ -473,18 +472,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nlmsg_failure; } - nlh = nlmsg_put(skb, 0, 0, - nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), - sizeof(struct nfgenmsg), 0); + nlh = nfnl_msg_put(skb, 0, 0, + nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), + 0, entry->state.pf, NFNETLINK_V0, + htons(queue->queue_num)); if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); goto nlmsg_failure; } - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = entry->state.pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(queue->queue_num); nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); pmsg = nla_data(nla); @@ -1191,7 +1187,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfqnl_instance *queue; unsigned int verdict; struct nf_queue_entry *entry; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 058ee84ea531..c1055251ebde 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -18,8 +18,8 @@ #include <net/netfilter/nf_tables.h> struct nft_bitwise { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; u8 len; struct nft_data mask; struct nft_data xor; @@ -68,14 +68,14 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 13d4e421a6b3..dba16126c7ee 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -19,8 +19,8 @@ #include <net/netfilter/nf_tables.h> struct nft_byteorder { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; @@ -33,11 +33,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr, const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; u32 *dst = ®s->data[priv->dreg]; - union { u32 u32; u16 u16; } *s, *d; + u16 *s16, *d16; unsigned int i; - s = (void *)src; - d = (void *)dst; + s16 = (void *)src; + d16 = (void *)dst; switch (priv->size) { case 8: { @@ -63,11 +63,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = ntohl((__force __be32)s[i].u32); + dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = (__force __u32)htonl(s[i].u32); + dst[i] = (__force __u32)htonl(src[i]); break; } break; @@ -75,11 +75,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = ntohs((__force __be16)s[i].u16); + d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = (__force __u16)htons(s[i].u16); + d16[i] = (__force __u16)htons(s16[i]); break; } break; @@ -133,20 +133,20 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + priv->len); if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 3fd540b2c6ba..a308d45ee95e 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -2,6 +2,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> @@ -10,6 +11,8 @@ #include <net/netfilter/nf_tables_ipv4.h> #include <net/netfilter/nf_tables_ipv6.h> +extern unsigned int nf_tables_net_id; + #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, @@ -315,6 +318,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -325,8 +329,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - mutex_lock(&ctx.net->nft.commit_mutex); - list_for_each_entry(table, &ctx.net->nft.tables, list) { + nft_net = net_generic(ctx.net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -340,7 +345,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_netdev_event(event, dev, &ctx); } } - mutex_unlock(&ctx.net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 7007045c0849..36bf64ebc892 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -19,7 +19,7 @@ struct nft_cmp_expr { struct nft_data data; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_cmp_ops op:8; }; @@ -88,8 +88,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return err; } - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -139,8 +138,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 469f9da5073b..2846d64659f2 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -575,19 +575,14 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int rev, int target) { struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); - if (nlh == NULL) + nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, + NFNETLINK_V0, 0); + if (!nlh) goto nlmsg_failure; - nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = family; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - if (nla_put_string(skb, NFTA_COMPAT_NAME, name) || nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) || nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target))) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 5dd87748afa8..f29f02805bcc 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -29,8 +29,8 @@ struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; @@ -486,9 +486,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; @@ -581,8 +580,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } } - priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 2cc1e0ef56e8..e862f916efa0 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -16,7 +16,7 @@ #include <net/netfilter/nf_dup_netdev.h> struct nft_dup_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_dup_netdev_eval(const struct nft_expr *expr, @@ -42,8 +42,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static const struct nft_expr_ops nft_dup_netdev_ingress_ops; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index cc076d535e14..a4c6aba7da7e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -15,13 +15,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netns/generic.h> + +extern unsigned int nf_tables_net_id; struct nft_dynset { struct nft_set *set; struct nft_set_ext_tmpl tmpl; enum nft_dynset_ops op:8; - enum nft_registers sreg_key:8; - enum nft_registers sreg_data:8; + u8 sreg_key; + u8 sreg_data; bool invert; u64 timeout; struct nft_expr *expr; @@ -112,13 +115,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_dynset *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); if (tb[NFTA_DYNSET_SET_NAME] == NULL || tb[NFTA_DYNSET_OP] == NULL || @@ -140,6 +144,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); + if (set->flags & NFT_SET_OBJECT) + return -EOPNOTSUPP; + if (set->ops->update == NULL) return -EOPNOTSUPP; @@ -166,8 +173,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_TIMEOUT]))); } - priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + set->klen); if (err < 0) return err; @@ -177,8 +184,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); - err = nft_validate_register_load(priv->sreg_data, set->dlen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + &priv->sreg_data, set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) @@ -248,7 +255,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 93fee4106019..8d0f14cd7cc3 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -22,8 +22,8 @@ struct nft_exthdr { u8 offset; u8 len; u8 op; - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; u8 flags; }; @@ -258,12 +258,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, @@ -308,11 +308,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + priv->len); } static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 21df8cccea65..ce6891337304 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -88,7 +88,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); - priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); switch (priv->result) { case NFT_FIB_RESULT_OIF: @@ -108,8 +107,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); if (err < 0) return err; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 166edea0e452..ec35a41c7262 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -169,18 +169,34 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx, if (IS_ERR(flowtable)) return PTR_ERR(flowtable); + if (!nft_use_inc(&flowtable->use)) + return -EMFILE; + priv->flowtable = flowtable; - flowtable->use++; return nf_ct_netns_get(ctx->net, ctx->family); } -static void nft_flow_offload_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_flow_offload_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_flow_offload *priv = nft_expr_priv(expr); - priv->flowtable->use--; + nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase); +} + +static void nft_flow_offload_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_flow_offload *priv = nft_expr_priv(expr); + + nft_use_inc_restore(&priv->flowtable->use); +} + +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ nf_ct_netns_put(ctx->net, ctx->family); } @@ -203,6 +219,8 @@ static const struct nft_expr_ops nft_flow_offload_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), .eval = nft_flow_offload_eval, .init = nft_flow_offload_init, + .activate = nft_flow_offload_activate, + .deactivate = nft_flow_offload_deactivate, .destroy = nft_flow_offload_destroy, .validate = nft_flow_offload_validate, .dump = nft_flow_offload_dump, diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 10a12e094929..2efbe78de3b2 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -20,7 +20,7 @@ #include <net/ip.h> struct nft_fwd_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_fwd_netdev_eval(const struct nft_expr *expr, @@ -49,8 +49,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; @@ -69,8 +69,8 @@ nla_put_failure: } struct nft_fwd_neigh { - enum nft_registers sreg_dev:8; - enum nft_registers sreg_addr:8; + u8 sreg_dev; + u8 sreg_addr; u8 nfproto; }; @@ -148,8 +148,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, !tb[NFTA_FWD_NFPROTO]) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]); priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); switch (priv->nfproto) { @@ -163,11 +161,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_validate_register_load(priv->sreg_dev, sizeof(int)); + err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); if (err < 0) return err; - return nft_validate_register_load(priv->sreg_addr, addr_len); + return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + addr_len); } static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b8f23f75aea6..513419aca9c6 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -18,8 +18,8 @@ #include <linux/jhash.h> struct nft_jhash { - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; u8 len; bool autogen_seed:1; u32 modulus; @@ -65,7 +65,7 @@ static void nft_jhash_map_eval(const struct nft_expr *expr, } struct nft_symhash { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; struct nft_set *map; @@ -136,9 +136,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); if (err < 0) return err; @@ -147,6 +144,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; + err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + if (err < 0) + return err; + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -161,9 +162,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx, get_random_bytes(&priv->seed, sizeof(priv->seed)); } - return nft_validate_register_load(priv->sreg, len) && - nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_jhash_map_init(const struct nft_ctx *ctx, @@ -193,8 +193,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -202,8 +200,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + sizeof(u32)); } static int nft_symhash_map_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3f6d1d2a6281..af4e2a4bce93 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -50,9 +50,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, priv->dlen = desc.len; - priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, &priv->data, - desc.type, desc.len); + err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG], + &priv->dreg, &priv->data, desc.type, + desc.len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 55754d9939b5..3c380fb32651 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -20,8 +20,8 @@ struct nft_lookup { struct nft_set *set; - enum nft_registers sreg:8; - enum nft_registers dreg:8; + u8 sreg; + u8 dreg; bool invert; struct nft_set_binding binding; }; @@ -76,8 +76,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + set->klen); if (err < 0) return err; @@ -100,9 +100,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - set->dtype, set->dlen); + err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], + &priv->dreg, NULL, set->dtype, + set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) @@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9d8655bc1bea..4ecfebc2fdc4 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -53,19 +53,15 @@ int nft_masq_init(const struct nft_ctx *ctx, } if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 297fe7d97c18..061a29bd3066 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -30,8 +30,8 @@ struct nft_meta { enum nft_meta_keys key:8; union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 dreg; + u8 sreg; }; }; @@ -358,9 +358,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_meta_get_validate(const struct nft_ctx *ctx, @@ -448,8 +447,7 @@ static int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 2c3d7ff6f58a..aa6149cc8c87 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -27,10 +27,10 @@ #include <net/ip.h> struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_addr_min; + u8 sreg_addr_max; + u8 sreg_proto_min; + u8 sreg_proto_max; enum nf_nat_manip_type type:8; u8 family; u16 flags; @@ -160,18 +160,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); - err = nft_validate_register_load(priv->sreg_addr_min, alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - - err = nft_validate_register_load(priv->sreg_addr_max, - alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + &priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -181,19 +178,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 3cc1b3dc3c3c..8ff82f17ecba 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; atomic_t counter; u32 offset; @@ -70,11 +70,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, priv->modulus - 1); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -104,7 +103,7 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) } struct nft_ng_random { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -144,10 +143,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, prandom_init_once(&nft_numgen_prandom_state); - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bf92a40dd1b2..2401e9fa17c4 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -43,8 +43,10 @@ static int nft_objref_init(const struct nft_ctx *ctx, if (IS_ERR(obj)) return -ENOENT; + if (!nft_use_inc(&obj->use)) + return -EMFILE; + nft_objref_priv(expr) = obj; - obj->use++; return 0; } @@ -73,7 +75,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx, if (phase == NFT_TRANS_COMMIT) return; - obj->use--; + nft_use_dec(&obj->use); } static void nft_objref_activate(const struct nft_ctx *ctx, @@ -81,7 +83,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx, { struct nft_object *obj = nft_objref_priv(expr); - obj->use++; + nft_use_inc_restore(&obj->use); } static struct nft_expr_type nft_objref_type; @@ -97,7 +99,7 @@ static const struct nft_expr_ops nft_objref_ops = { struct nft_objref_map { struct nft_set *set; - enum nft_registers sreg:8; + u8 sreg; struct nft_set_binding binding; }; @@ -139,8 +141,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + set->klen); if (err < 0) return err; @@ -182,7 +184,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 4fac2d9a4b88..af2ce7a8c587 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -5,7 +5,7 @@ #include <linux/netfilter/nfnetlink_osf.h> struct nft_osf { - enum nft_registers dreg:8; + u8 dreg; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { @@ -55,9 +55,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, if (!tb[NFTA_OSF_DREG]) return -EINVAL; - priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); + err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 5732b32ab932..0ef51c81ec94 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -84,7 +84,7 @@ static void nft_payload_eval(const struct nft_expr *expr, switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: - if (!skb_mac_header_was_set(skb)) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; if (skb_vlan_tag_present(skb)) { @@ -135,10 +135,10 @@ static int nft_payload_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -338,7 +338,6 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); if (tb[NFTA_PAYLOAD_CSUM_TYPE]) csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); @@ -369,7 +368,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, } priv->csum_type = csum_type; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + priv->len); } static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 98613658d4ac..de5f1bda9d6f 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -22,10 +22,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - enum nft_registers sreg_qnum:8; - u16 queuenum; - u16 queues_total; - u16 flags; + u8 sreg_qnum; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -114,8 +114,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); - err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 2e1d2ec2f52a..a5f74e5b8184 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -18,7 +18,7 @@ struct nft_range_expr { struct nft_data data_from; struct nft_data data_to; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_range_ops op:8; }; @@ -90,8 +90,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); - err = nft_validate_register_load(priv->sreg, desc_from.len); + err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index c64cbe78dee7..08a05bd1e817 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -49,19 +49,15 @@ int nft_redir_init(const struct nft_ctx *ctx, plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 76dba9f6b6f6..edce109ef4b0 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -18,7 +18,7 @@ struct nft_rt { enum nft_rt_keys key:8; - enum nft_registers dreg:8; + u8 dreg; }; static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) @@ -134,9 +134,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_rt_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 84d317418d18..9c7ec2ec1fcf 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -326,6 +326,8 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (nft_set_elem_expired(&rbe->ext)) { + break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; @@ -375,23 +377,37 @@ static void nft_rbtree_gc(struct work_struct *work) struct nft_rbtree *priv; struct rb_node *node; struct nft_set *set; + struct net *net; + u8 genmask; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + genmask = nft_genmask_cur(net); write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + if (nft_set_elem_mark_busy(&rbe->ext)) { + rbe_end = NULL; continue; + } if (rbe_prev) { rb_erase(&rbe_prev->node, &priv->root); diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 4026ec38526f..7e4f7063f481 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -10,7 +10,7 @@ struct nft_socket { enum nft_socket_keys key:8; union { - enum nft_registers dreg:8; + u8 dreg; }; }; @@ -119,9 +119,8 @@ static int nft_socket_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index b97ab1198b03..db780b5985ab 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -13,9 +13,9 @@ #endif struct nft_tproxy { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_port:8; - u8 family; + u8 sreg_addr; + u8 sreg_port; + u8 family; }; static void nft_tproxy_eval_v4(const struct nft_expr *expr, @@ -254,15 +254,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, alen); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]); - err = nft_validate_register_load(priv->sreg_port, sizeof(u16)); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + &priv->sreg_port, sizeof(u16)); if (err < 0) return err; } @@ -289,6 +289,13 @@ static int nft_tproxy_dump(struct sk_buff *skb, return 0; } +static int nft_tproxy_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); +} + static struct nft_expr_type nft_tproxy_type; static const struct nft_expr_ops nft_tproxy_ops = { .type = &nft_tproxy_type, @@ -296,6 +303,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { .eval = nft_tproxy_eval, .init = nft_tproxy_init, .dump = nft_tproxy_dump, + .validate = nft_tproxy_validate, }; static struct nft_expr_type nft_tproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3fc55c81f16a..ab69a34210a8 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -14,7 +14,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; - enum nft_registers dreg:8; + u8 dreg; }; static void nft_tunnel_get_eval(const struct nft_expr *expr, @@ -72,10 +72,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); - - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_tunnel_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index bf7bba80e24c..226a317d52a0 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -90,4 +90,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); MODULE_DESCRIPTION("Passive OS fingerprint matching."); MODULE_ALIAS("ipt_osf"); MODULE_ALIAS("ip6t_osf"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 46686fb73784..0c101b25cacf 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,29 +76,54 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { + unsigned int i, match = false; kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); - if ((gid_gte(filp->f_cred->fsgid, gid_min) && - gid_lte(filp->f_cred->fsgid, gid_max)) ^ - !(info->invert & XT_OWNER_GID)) + struct group_info *gi = filp->f_cred->group_info; + + if (gid_gte(filp->f_cred->fsgid, gid_min) && + gid_lte(filp->f_cred->fsgid, gid_max)) + match = true; + + if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) { + for (i = 0; i < gi->ngroups; ++i) { + kgid_t group = gi->gid[i]; + + if (gid_gte(group, gid_min) && + gid_lte(group, gid_max)) { + match = true; + break; + } + } + } + + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index cb58bc7ae30d..2dbf92346a7e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -566,7 +566,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, { struct recent_table *t = PDE_DATA(file_inode(file)); struct recent_entry *e; - char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 2d2fa1d53ea6..05495d3f47b8 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par) { const struct xt_sctp_info *info = par->matchinfo; + if (info->flag_count > ARRAY_SIZE(info->flag_info)) + return -EINVAL; if (info->flags & ~XT_SCTP_VALID_FLAGS) return -EINVAL; if (info->invflags & ~XT_SCTP_VALID_FLAGS) diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index a95b50342dbb..58ba402bc0b0 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -95,11 +95,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret ^ data->invert; } +static int u32_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_u32 *data = par->matchinfo; + const struct xt_u32_test *ct; + unsigned int i; + + if (data->ntests > ARRAY_SIZE(data->tests)) + return -EINVAL; + + for (i = 0; i < data->ntests; ++i) { + ct = &data->tests[i]; + + if (ct->nnums > ARRAY_SIZE(ct->location) || + ct->nvalues > ARRAY_SIZE(ct->value)) + return -EINVAL; + } + + return 0; +} + static struct xt_match xt_u32_mt_reg __read_mostly = { .name = "u32", .revision = 0, .family = NFPROTO_UNSPEC, .match = u32_mt, + .checkentry = u32_mt_checkentry, .matchsize = sizeof(struct xt_u32), .me = THIS_MODULE, }; diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 4d748975117d..a0b7269cf190 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -68,6 +68,28 @@ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { [NLBL_CALIPSO_A_MTYPE] = { .type = NLA_U32 }, }; +static const struct netlbl_calipso_ops *calipso_ops; + +/** + * netlbl_calipso_ops_register - Register the CALIPSO operations + * @ops: ops to register + * + * Description: + * Register the CALIPSO packet engine operations. + * + */ +const struct netlbl_calipso_ops * +netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops) +{ + return xchg(&calipso_ops, ops); +} +EXPORT_SYMBOL(netlbl_calipso_ops_register); + +static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) +{ + return READ_ONCE(calipso_ops); +} + /* NetLabel Command Handlers */ /** @@ -110,16 +132,19 @@ static int netlbl_calipso_add_pass(struct genl_info *info, * */ static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info) - { int ret_val = -EINVAL; struct netlbl_audit audit_info; + const struct netlbl_calipso_ops *ops = netlbl_calipso_ops_get(); if (!info->attrs[NLBL_CALIPSO_A_DOI] || !info->attrs[NLBL_CALIPSO_A_MTYPE]) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + if (!ops) + return -EOPNOTSUPP; + + netlbl_netlink_auditinfo(&audit_info); switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) { case CALIPSO_MAP_PASS: ret_val = netlbl_calipso_add_pass(info, &audit_info); @@ -301,7 +326,7 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NLBL_CALIPSO_A_DOI]) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); cb_arg.doi = nla_get_u32(info->attrs[NLBL_CALIPSO_A_DOI]); cb_arg.audit_info = &audit_info; ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, @@ -375,27 +400,6 @@ int __init netlbl_calipso_genl_init(void) return genl_register_family(&netlbl_calipso_gnl_family); } -static const struct netlbl_calipso_ops *calipso_ops; - -/** - * netlbl_calipso_ops_register - Register the CALIPSO operations - * - * Description: - * Register the CALIPSO packet engine operations. - * - */ -const struct netlbl_calipso_ops * -netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops) -{ - return xchg(&calipso_ops, ops); -} -EXPORT_SYMBOL(netlbl_calipso_ops_register); - -static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) -{ - return READ_ONCE(calipso_ops); -} - /** * calipso_doi_add - Add a new DOI to the CALIPSO protocol engine * @doi_def: the DOI structure diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index e252f62bb8c2..a0a145db3fc7 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -420,7 +420,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) { case CIPSO_V4_MAP_TRANS: ret_val = netlbl_cipsov4_add_std(info, &audit_info); @@ -715,7 +715,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NLBL_CIPSOV4_A_DOI]) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); cb_arg.doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); cb_arg.audit_info = &audit_info; ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 15fe2120b310..14c3d640f94b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -871,7 +871,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, offset -= iter->startbit; idx = offset / NETLBL_CATMAP_MAPSIZE; - iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE); + iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap + << (offset % NETLBL_CATMAP_MAPSIZE); return 0; } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 71ba69cb50c9..43c51242dcd2 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -447,7 +447,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); return netlbl_mgmt_add_common(info, &audit_info); } @@ -470,7 +470,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NLBL_MGMT_A_DOMAIN]) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); return netlbl_domhsh_remove(domain, AF_UNSPEC, &audit_info); @@ -570,7 +570,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); return netlbl_mgmt_add_common(info, &audit_info); } @@ -589,7 +589,7 @@ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { struct netlbl_audit audit_info; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); return netlbl_domhsh_remove_default(AF_UNSPEC, &audit_info); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0067f472367b..ff52ff2278ed 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -827,7 +827,7 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } @@ -910,7 +910,7 @@ static int netlbl_unlabel_staticadd(struct sk_buff *skb, !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) @@ -960,7 +960,7 @@ static int netlbl_unlabel_staticadddef(struct sk_buff *skb, !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) @@ -1007,7 +1007,7 @@ static int netlbl_unlabel_staticremove(struct sk_buff *skb, !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) @@ -1047,7 +1047,7 @@ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb, !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) return -EINVAL; - netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_netlink_auditinfo(&audit_info); ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); if (ret_val != 0) diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 4a397cde1a48..2c608677b43b 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -42,11 +42,9 @@ /** * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg - * @skb: the packet * @audit_info: NetLabel audit information */ -static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, - struct netlbl_audit *audit_info) +static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info) { security_task_getsecid(current, &audit_info->secid); audit_info->loginuid = audit_get_loginuid(current); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ffa83319d08..e91489b3274c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -578,12 +578,9 @@ static int netlink_insert(struct sock *sk, u32 portid) if (nlk_sk(sk)->bound) goto err; - err = -ENOMEM; - if (BITS_PER_LONG > 32 && - unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX)) - goto err; + /* portid can be read locklessly from netlink_getname(). */ + WRITE_ONCE(nlk_sk(sk)->portid, portid); - nlk_sk(sk)->portid = portid; sock_hold(sk); err = __netlink_insert(table, sk); @@ -1033,7 +1030,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -1045,13 +1041,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - goto unlock; + return err; } } /* No need for barriers here as we return to user-space without * using any of the bound attributes. */ + netlink_lock_table(); if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : @@ -1093,9 +1090,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (addr->sa_family == AF_UNSPEC) { - sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_portid = 0; - nlk->dst_group = 0; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, 0); + WRITE_ONCE(nlk->dst_group, 0); return 0; } if (addr->sa_family != AF_NETLINK) @@ -1116,9 +1115,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, err = netlink_autobind(sock); if (err == 0) { - sk->sk_state = NETLINK_CONNECTED; - nlk->dst_portid = nladdr->nl_pid; - nlk->dst_group = ffs(nladdr->nl_groups); + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); + WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); } return err; @@ -1135,10 +1136,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pad = 0; if (peer) { - nladdr->nl_pid = nlk->dst_portid; - nladdr->nl_groups = netlink_group_mask(nlk->dst_group); + /* Paired with WRITE_ONCE() in netlink_connect() */ + nladdr->nl_pid = READ_ONCE(nlk->dst_portid); + nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); } else { - nladdr->nl_pid = nlk->portid; + /* Paired with WRITE_ONCE() in netlink_insert() */ + nladdr->nl_pid = READ_ONCE(nlk->portid); netlink_lock_table(); nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); @@ -1165,8 +1168,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_portid != nlk_sk(ssk)->portid) { + /* dst_portid and sk_state can be changed in netlink_connect() */ + if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && + READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -1599,6 +1603,7 @@ out: int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; + unsigned long flags; struct sock *sk; int ret = 0; @@ -1608,12 +1613,12 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) /* sk->sk_err wants a positive error value */ info.code = -code; - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); return ret; } EXPORT_SYMBOL(netlink_set_err); @@ -1734,7 +1739,8 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - int len, val, err; + unsigned int flag; + int len, val; if (level != SOL_NETLINK) return -ENOPROTOOPT; @@ -1746,39 +1752,17 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case NETLINK_PKTINFO: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; - if (put_user(len, optlen) || - put_user(val, optval)) - return -EFAULT; - err = 0; + flag = NETLINK_F_RECV_PKTINFO; break; case NETLINK_BROADCAST_ERROR: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; - if (put_user(len, optlen) || - put_user(val, optval)) - return -EFAULT; - err = 0; + flag = NETLINK_F_BROADCAST_SEND_ERROR; break; case NETLINK_NO_ENOBUFS: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; - if (put_user(len, optlen) || - put_user(val, optval)) - return -EFAULT; - err = 0; + flag = NETLINK_F_RECV_NO_ENOBUFS; break; case NETLINK_LIST_MEMBERSHIPS: { - int pos, idx, shift; + int pos, idx, shift, err = 0; - err = 0; netlink_lock_table(); for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) { if (len - pos < sizeof(u32)) @@ -1792,34 +1776,32 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, break; } } - if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) + if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen)) err = -EFAULT; netlink_unlock_table(); - break; + return err; } case NETLINK_CAP_ACK: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0; - if (put_user(len, optlen) || - put_user(val, optval)) - return -EFAULT; - err = 0; + flag = NETLINK_F_CAP_ACK; break; case NETLINK_EXT_ACK: - if (len < sizeof(int)) - return -EINVAL; - len = sizeof(int); - val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; - if (put_user(len, optlen) || put_user(val, optval)) - return -EFAULT; - err = 0; + flag = NETLINK_F_EXT_ACK; break; default: - err = -ENOPROTOOPT; + return -ENOPROTOOPT; } - return err; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + val = nlk->flags & flag ? 1 : 0; + + if (put_user(len, optlen) || + copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; } static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) @@ -1878,8 +1860,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; netlink_skb_flags |= NETLINK_SKB_DST; } else { - dst_portid = nlk->dst_portid; - dst_group = nlk->dst_group; + /* Paired with WRITE_ONCE() in netlink_connect() */ + dst_portid = READ_ONCE(nlk->dst_portid); + dst_group = READ_ONCE(nlk->dst_group); } /* Paired with WRITE_ONCE() in netlink_insert() */ @@ -2001,7 +1984,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); - if (nlk->cb_running && + if (READ_ONCE(nlk->cb_running) && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { @@ -2283,7 +2266,7 @@ static int netlink_dump(struct sock *sk) if (cb->done) cb->done(cb); - nlk->cb_running = false; + WRITE_ONCE(nlk->cb_running, false); module = cb->module; skb = cb->skb; mutex_unlock(nlk->cb_mutex); @@ -2343,7 +2326,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, goto error_put; } - nlk->cb_running = true; + WRITE_ONCE(nlk->cb_running, true); nlk->dump_done_errno = INT_MAX; mutex_unlock(nlk->cb_mutex); @@ -2649,7 +2632,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), - nlk->cb_running, + READ_ONCE(nlk->cb_running), refcount_read(&s->sk_refcnt), atomic_read(&s->sk_drops), sock_i_ino(s) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 7dda33b9b784..85ee4891c2c7 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -93,6 +93,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; + unsigned long flags; struct sock *sk; int num = 2; int ret = 0; @@ -151,7 +152,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num++; mc_list: - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &tbl->mc_list) { if (sk_hashed(sk)) continue; @@ -166,13 +167,13 @@ mc_list: NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - sock_i_ino(sk)) < 0) { + __sock_i_ino(sk)) < 0) { ret = 1; break; } num++; } - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); done: cb->args[0] = num; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index ede73ecfb1f5..f449be93b375 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -961,11 +961,46 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; +static int genl_bind(struct net *net, int group) +{ + const struct genl_family *family; + unsigned int id; + int ret = 0; + + genl_lock_all(); + + idr_for_each_entry(&genl_fam_idr, family, id) { + const struct genl_multicast_group *grp; + int i; + + if (family->n_mcgrps == 0) + continue; + + i = group - family->mcgrp_offset; + if (i < 0 || i >= family->n_mcgrps) + continue; + + grp = &family->mcgrps[i]; + if ((grp->flags & GENL_UNS_ADMIN_PERM) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) + ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; + + break; + } + + genl_unlock_all(); + return ret; +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, }; /* we'll bump the group number right afterwards */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 43910e50752c..146550ce0ac6 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -403,6 +403,11 @@ static int nr_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; @@ -658,6 +663,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, goto out_release; } + if (sock->state == SS_CONNECTING) { + err = -EALREADY; + goto out_release; + } + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 029c8bb90f4c..a7d3a265befb 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -126,7 +126,7 @@ void nr_write_internal(struct sock *sk, int frametype) unsigned char *dptr; int len, timeout; - len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; + len = NR_TRANSPORT_LEN; switch (frametype & 0x0F) { case NR_CONNREQ: @@ -144,7 +144,8 @@ void nr_write_internal(struct sock *sk, int frametype) return; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC); + if (!skb) return; /* @@ -152,7 +153,7 @@ void nr_write_internal(struct sock *sk, int frametype) */ skb_reserve(skb, NR_NETWORK_LEN); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); switch (frametype & 0x0F) { case NR_CONNREQ: diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 426d49609524..2bf99bd5be58 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -124,6 +124,7 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; diff --git a/net/nfc/core.c b/net/nfc/core.c index a84f824da051..dd12ee46ac73 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -646,7 +646,7 @@ error: return rc; } -int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -675,7 +675,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) EXPORT_SYMBOL(nfc_tm_data_received); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len) + const u8 *gb, size_t gb_len) { int rc; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index fe988936ad92..e6863c71f566 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -134,7 +134,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) return ((y >= x) || (y < z)) ? true : false; } -static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, +static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc, int payload_len) { struct sk_buff *skb; @@ -148,7 +148,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, } /* immediately sends an S frame. */ -static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc, enum sframe_type sframe_type, int nr) { int r; @@ -170,7 +170,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, } /* immediately sends an U frame. skb may contain optional payload */ -static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier uframe_modifier) { @@ -372,7 +372,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) wake_up(shdlc->connect_wq); } -static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; @@ -388,7 +388,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } -static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 1f68724d44d3..a070a57fc151 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -233,15 +233,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock); /* TLV API */ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); /* Commands API */ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length); struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index d1fc019e932e..737c7aa384f4 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -27,7 +27,7 @@ #include "nfc.h" #include "llcp.h" -static u8 llcp_tlv_length[LLCP_TLV_MAX] = { +static const u8 llcp_tlv_length[LLCP_TLV_MAX] = { 0, 1, /* VERSION */ 2, /* MIUX */ @@ -41,7 +41,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = { }; -static u8 llcp_tlv8(u8 *tlv, u8 type) +static u8 llcp_tlv8(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -49,7 +49,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type) return tlv[2]; } -static u16 llcp_tlv16(u8 *tlv, u8 type) +static u16 llcp_tlv16(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -58,37 +58,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type) } -static u8 llcp_tlv_version(u8 *tlv) +static u8 llcp_tlv_version(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_VERSION); } -static u16 llcp_tlv_miux(u8 *tlv) +static u16 llcp_tlv_miux(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff; } -static u16 llcp_tlv_wks(u8 *tlv) +static u16 llcp_tlv_wks(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_WKS); } -static u16 llcp_tlv_lto(u8 *tlv) +static u16 llcp_tlv_lto(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_LTO); } -static u8 llcp_tlv_opt(u8 *tlv) +static u8 llcp_tlv_opt(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_OPT); } -static u8 llcp_tlv_rw(u8 *tlv) +static u8 llcp_tlv_rw(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf; } -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length) { u8 *tlv, length; @@ -142,7 +142,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) return sdres; } -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len) { struct nfc_llcp_sdp_tlv *sdreq; @@ -202,9 +202,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) } int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -251,9 +252,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, } int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -307,7 +309,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu, return pdu; } -static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, +static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv, u8 tlv_length) { /* XXX Add an skb length check */ @@ -401,9 +403,11 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *service_name_tlv = NULL, service_name_tlv_length; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *service_name_tlv = NULL; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 service_name_tlv_length = 0; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; @@ -477,8 +481,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 4fa015208aab..a217830f0f34 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -157,6 +157,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device, struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) { + /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever + * we hold a reference to local, we also need to hold a reference to + * the device to avoid UAF. + */ + if (!nfc_get_device(local->dev->idx)) + return NULL; + kref_get(&local->ref); return local; @@ -171,6 +178,7 @@ static void local_cleanup(struct nfc_llcp_local *local) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + local->rx_pending = NULL; del_timer_sync(&local->sdreq_timer); cancel_work_sync(&local->sdreq_timeout_work); nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); @@ -189,10 +197,18 @@ static void local_release(struct kref *ref) int nfc_llcp_local_put(struct nfc_llcp_local *local) { + struct nfc_dev *dev; + int ret; + if (local == NULL) return 0; - return kref_put(&local->ref, local_release); + dev = local->dev; + + ret = kref_put(&local->ref, local_release); + nfc_put_device(dev); + + return ret; } static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, @@ -215,17 +231,13 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local, if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) { llcp_sock = tmp_sock; + sock_hold(&llcp_sock->sk); break; } } read_unlock(&local->sockets.lock); - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - return llcp_sock; } @@ -313,7 +325,7 @@ static char *wks[] = { "urn:nfc:sn:snep", }; -static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) +static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) { int sap, num_wks; @@ -337,7 +349,8 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len, + bool needref) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -373,6 +386,8 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) { llcp_sock = tmp_sock; + if (needref) + sock_hold(&llcp_sock->sk); break; } } @@ -414,7 +429,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, * to this service name. */ if (nfc_llcp_sock_from_sn(local, sock->service_name, - sock->service_name_len) != NULL) { + sock->service_name_len, + false) != NULL) { mutex_unlock(&local->sdp_lock); return LLCP_SAP_MAX; @@ -534,7 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, version, version_length; u8 lto_length, wks_length, miux_length; - u8 *version_tlv = NULL, *lto_tlv = NULL, + const u8 *version_tlv = NULL, *lto_tlv = NULL, *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; @@ -624,7 +640,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) return local->gb; } -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; @@ -651,27 +667,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) local->remote_gb_len - 3); } -static u8 nfc_llcp_dsap(struct sk_buff *pdu) +static u8 nfc_llcp_dsap(const struct sk_buff *pdu) { return (pdu->data[0] & 0xfc) >> 2; } -static u8 nfc_llcp_ptype(struct sk_buff *pdu) +static u8 nfc_llcp_ptype(const struct sk_buff *pdu) { return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6); } -static u8 nfc_llcp_ssap(struct sk_buff *pdu) +static u8 nfc_llcp_ssap(const struct sk_buff *pdu) { return pdu->data[1] & 0x3f; } -static u8 nfc_llcp_ns(struct sk_buff *pdu) +static u8 nfc_llcp_ns(const struct sk_buff *pdu) { return pdu->data[2] >> 4; } -static u8 nfc_llcp_nr(struct sk_buff *pdu) +static u8 nfc_llcp_nr(const struct sk_buff *pdu) { return pdu->data[2] & 0xf; } @@ -813,23 +829,15 @@ out: } static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { - struct nfc_llcp_sock *llcp_sock; - - llcp_sock = nfc_llcp_sock_from_sn(local, sn, sn_len); - - if (llcp_sock == NULL) - return NULL; - - sock_hold(&llcp_sock->sk); - - return llcp_sock; + return nfc_llcp_sock_from_sn(local, sn, sn_len, true); } -static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len) +static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) { - u8 *tlv = &skb->data[2], type, length; + u8 type, length; + const u8 *tlv = &skb->data[2]; size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0; while (offset < tlv_array_len) { @@ -887,7 +895,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, } static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct sock *new_sk, *parent; struct nfc_llcp_sock *sock, *new_sock; @@ -905,7 +913,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, goto fail; } } else { - u8 *sn; + const u8 *sn; size_t sn_len; sn = nfc_llcp_connect_sn(skb, &sn_len); @@ -958,8 +966,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, } new_sock = nfc_llcp_sock(new_sk); - new_sock->dev = local->dev; + new_sock->local = nfc_llcp_local_get(local); + if (!new_sock->local) { + reason = LLCP_DM_REJ; + sock_put(&new_sock->sk); + release_sock(&sock->sk); + sock_put(&sock->sk); + goto fail; + } + + new_sock->dev = local->dev; new_sock->rw = sock->rw; new_sock->miux = sock->miux; new_sock->nfc_protocol = sock->nfc_protocol; @@ -1124,7 +1141,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, } static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1167,7 +1184,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1200,7 +1218,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1238,12 +1257,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) } static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; - u8 dsap, ssap, *tlv, type, length, tid, sap; + u8 dsap, ssap, type, length, tid, sap; + const u8 *tlv; u16 tlv_len, offset; - char *service_name; + const char *service_name; size_t service_name_len; struct nfc_llcp_sdp_tlv *sdp; HLIST_HEAD(llc_sdres_list); @@ -1285,7 +1305,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, } llcp_sock = nfc_llcp_sock_from_sn(local, service_name, - service_name_len); + service_name_len, + true); if (!llcp_sock) { sap = 0; goto add_snl; @@ -1305,6 +1326,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, if (sap == LLCP_SAP_MAX) { sap = 0; + nfc_llcp_sock_put(llcp_sock); goto add_snl; } @@ -1322,6 +1344,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, pr_debug("%p %d\n", llcp_sock, sap); + nfc_llcp_sock_put(llcp_sock); add_snl: sdp = nfc_llcp_build_sdres_tlv(tid, sap); if (sdp == NULL) @@ -1585,7 +1608,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) if (local == NULL) return -ENOMEM; - local->dev = ndev; + /* As we are going to initialize local's refcount, we need to get the + * nfc_dev to avoid UAF, otherwise there is no point in continuing. + * See nfc_llcp_local_get(). + */ + local->dev = nfc_get_device(ndev->idx); + if (!local->dev) { + kfree(local); + return -ENODEV; + } + INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 66608e6c5b0e..33723d843e47 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -906,6 +906,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, return -EINVAL; } + if (protocol >= NFC_PROTO_MAX) { + pr_err("the requested nfc protocol is invalid\n"); + return -EINVAL; + } + if (!(nci_target->supported_protocols & (1 << protocol))) { pr_err("target does not support the requested protocol 0x%x\n", protocol); diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 1e8c1a12aaec..4f75453c07aa 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -230,6 +230,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, target->sens_res = nfca_poll->sens_res; target->sel_res = nfca_poll->sel_res; target->nfcid1_len = nfca_poll->nfcid1_len; + if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) + return -EPROTO; if (target->nfcid1_len > 0) { memcpy(target->nfcid1, nfca_poll->nfcid1, target->nfcid1_len); @@ -238,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; target->sensb_res_len = nfcb_poll->sensb_res_len; + if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) + return -EPROTO; if (target->sensb_res_len > 0) { memcpy(target->sensb_res, nfcb_poll->sensb_res, target->sensb_res_len); @@ -246,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; target->sensf_res_len = nfcf_poll->sensf_res_len; + if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) + return -EPROTO; if (target->sensf_res_len > 0) { memcpy(target->sensf_res, nfcf_poll->sensf_res, target->sensf_res_len); diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 452f4c16b7a9..d2de7fc226f0 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -163,6 +163,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge) int ret; skb = nci_skb_alloc(nspi->ndev, 0, GFP_KERNEL); + if (!skb) + return -ENOMEM; /* add the NCI SPI header to the start of the buffer */ hdr = skb_push(skb, NCI_SPI_HDR_LEN); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 39fb01ee9222..f705800b2248 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1460,8 +1460,12 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, rc = dev->ops->se_io(dev, se_idx, apdu, apdu_length, cb, cb_context); + device_unlock(&dev->dev); + return rc; + error: device_unlock(&dev->dev); + kfree(cb_context); return rc; } @@ -1515,6 +1519,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) u32 dev_idx, se_idx; u8 *apdu; size_t apdu_len; + int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX] || @@ -1528,25 +1533,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - if (!dev->ops || !dev->ops->se_io) - return -ENOTSUPP; + if (!dev->ops || !dev->ops->se_io) { + rc = -EOPNOTSUPP; + goto put_dev; + } apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); - if (apdu_len == 0) - return -EINVAL; + if (apdu_len == 0) { + rc = -EINVAL; + goto put_dev; + } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); - if (!apdu) - return -EINVAL; + if (!apdu) { + rc = -EINVAL; + goto put_dev; + } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) { + rc = -ENOMEM; + goto put_dev; + } ctx->dev_idx = dev_idx; ctx->se_idx = se_idx; - return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + +put_dev: + nfc_put_device(dev); + return rc; } static int nfc_genl_vendor_cmd(struct sk_buff *skb, @@ -1569,14 +1586,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); dev = nfc_get_device(dev_idx); - if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) + if (!dev) return -ENODEV; + if (!dev->vendor_cmds || !dev->n_vendor_cmds) { + err = -ENODEV; + goto put_dev; + } + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); - if (data_len == 0) - return -EINVAL; + if (data_len == 0) { + err = -EINVAL; + goto put_dev; + } } else { data = NULL; data_len = 0; @@ -1591,10 +1615,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, dev->cur_cmd_info = info; err = cmd->doit(dev, data, data_len); dev->cur_cmd_info = NULL; - return err; + goto put_dev; } - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + +put_dev: + nfc_put_device(dev); + return err; } /* message building helper */ diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 6c6f76b370b1..c792165f523f 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -60,7 +60,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 1a30e165eeb4..a5fa25555d7e 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -80,13 +80,12 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; unsigned int nsh_len, mac_len; __be16 proto; - int nhoff; skb_reset_network_header(skb); - nhoff = skb->network_header - skb->mac_header; mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -111,15 +110,14 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len, - skb->network_header - nhoff, - mac_len); + mac_offset, mac_len); goto out; } for (skb = segs; skb; skb = skb->next) { skb->protocol = htons(ETH_P_NSH); __skb_push(skb, nsh_len); - skb_set_mac_header(skb, -nhoff); + skb->mac_header = mac_offset; skb->network_header = skb->mac_header + mac_len; skb->mac_len = mac_len; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e9a10a66b4ca..0551915519d9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -903,6 +903,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; + struct sw_flow_key *key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -930,30 +931,32 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &new_flow->key, false, &mask); + key = kzalloc(sizeof(*key), GFP_KERNEL); + if (!key) { + error = -ENOMEM; + goto err_kfree_flow; + } + + ovs_match_init(&match, key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) - goto err_kfree_flow; + goto err_kfree_key; + + ovs_flow_mask_key(&new_flow->key, key, true, &mask); /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &new_flow->key, log); + key, log); if (error) - goto err_kfree_flow; - - /* unmasked key is needed to match when ufid is not used. */ - if (ovs_identifier_is_key(&new_flow->id)) - match.key = new_flow->id.unmasked_key; - - ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + goto err_kfree_key; /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); - goto err_kfree_flow; + goto err_kfree_key; } reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, @@ -974,7 +977,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); + flow = ovs_flow_tbl_lookup(&dp->table, key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -1044,6 +1047,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (reply) ovs_notify(&dp_flow_genl_family, reply, info); + + kfree(key); return 0; err_unlock_ovs: @@ -1051,6 +1056,8 @@ err_unlock_ovs: kfree_skb(reply); err_kfree_acts: ovs_nla_free_flow_actions(acts); +err_kfree_key: + kfree(key); err_kfree_flow: ovs_flow_free(new_flow, false); error: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6fa0a9a453a8..377832981178 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -370,18 +370,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) { union tpacket_uhdr h; + /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ + h.raw = frame; switch (po->tp_version) { case TPACKET_V1: - h.h1->tp_status = status; + WRITE_ONCE(h.h1->tp_status, status); flush_dcache_page(pgv_to_page(&h.h1->tp_status)); break; case TPACKET_V2: - h.h2->tp_status = status; + WRITE_ONCE(h.h2->tp_status, status); flush_dcache_page(pgv_to_page(&h.h2->tp_status)); break; case TPACKET_V3: - h.h3->tp_status = status; + WRITE_ONCE(h.h3->tp_status, status); flush_dcache_page(pgv_to_page(&h.h3->tp_status)); break; default: @@ -398,17 +400,19 @@ static int __packet_get_status(struct packet_sock *po, void *frame) smp_rmb(); + /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ + h.raw = frame; switch (po->tp_version) { case TPACKET_V1: flush_dcache_page(pgv_to_page(&h.h1->tp_status)); - return h.h1->tp_status; + return READ_ONCE(h.h1->tp_status); case TPACKET_V2: flush_dcache_page(pgv_to_page(&h.h2->tp_status)); - return h.h2->tp_status; + return READ_ONCE(h.h2->tp_status); case TPACKET_V3: flush_dcache_page(pgv_to_page(&h.h3->tp_status)); - return h.h3->tp_status; + return READ_ONCE(h.h3->tp_status); default: WARN(1, "TPACKET version not supported.\n"); BUG(); @@ -1955,7 +1959,7 @@ retry: goto retry; } - if (!dev_validate_header(dev, skb->data, len)) { + if (!dev_validate_header(dev, skb->data, len) || !skb->len) { err = -EINVAL; goto out_unlock; } @@ -2105,7 +2109,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sll = &PACKET_SKB_CB(skb)->sa.ll; sll->sll_hatype = dev->type; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -2371,7 +2375,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sll->sll_hatype = dev->type; sll->sll_protocol = skb->protocol; sll->sll_pkttype = skb->pkt_type; - if (unlikely(po->origdev)) + if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) sll->sll_ifindex = orig_dev->ifindex; else sll->sll_ifindex = dev->ifindex; @@ -3117,6 +3121,9 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, lock_sock(sk); spin_lock(&po->bind_lock); + if (!proto) + proto = po->num; + rcu_read_lock(); if (po->fanout) { @@ -3219,7 +3226,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); name[sizeof(uaddr->sa_data)] = 0; - return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); + return packet_do_bind(sk, name, 0, 0); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -3236,8 +3243,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - return packet_do_bind(sk, NULL, sll->sll_ifindex, - sll->sll_protocol ? : pkt_sk(sk)->num); + return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); } static struct proto packet_proto = { @@ -3444,7 +3450,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } - if (pkt_sk(sk)->auxdata) { + if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { struct tpacket_auxdata aux; aux.tp_status = TP_STATUS_USER; @@ -3827,9 +3833,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->auxdata = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); return 0; } case PACKET_ORIGDEV: @@ -3841,9 +3845,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - lock_sock(sk); - po->origdev = !!val; - release_sock(sk); + packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); return 0; } case PACKET_VNET_HDR: @@ -3973,10 +3975,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, break; case PACKET_AUXDATA: - val = po->auxdata; + val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); break; case PACKET_ORIGDEV: - val = po->origdev; + val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); break; case PACKET_VNET_HDR: val = po->has_vnet_hdr; @@ -4212,7 +4214,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4222,7 +4224,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4317,7 +4319,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4420,7 +4422,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4438,9 +4440,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4518,7 +4520,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/diag.c b/net/packet/diag.c index 7ef1c881ae74..ecabf78d29b8 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -22,9 +22,9 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) pinfo.pdi_flags = 0; if (po->running) pinfo.pdi_flags |= PDI_RUNNING; - if (po->auxdata) + if (packet_sock_flag(po, PACKET_SOCK_AUXDATA)) pinfo.pdi_flags |= PDI_AUXDATA; - if (po->origdev) + if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV)) pinfo.pdi_flags |= PDI_ORIGDEV; if (po->has_vnet_hdr) pinfo.pdi_flags |= PDI_VNETHDR; @@ -142,7 +142,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, rp = nlmsg_data(nlh); rp->pdiag_family = AF_PACKET; rp->pdiag_type = sk->sk_type; - rp->pdiag_num = ntohs(po->num); + rp->pdiag_num = ntohs(READ_ONCE(po->num)); rp->pdiag_ino = sk_ino; sock_diag_save_cookie(sk, rp->pdiag_cookie); diff --git a/net/packet/internal.h b/net/packet/internal.h index f10294800aaf..7f2d5eed5e00 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -115,10 +115,9 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; + unsigned long flags; unsigned int running; /* bind_lock must be held */ - unsigned int auxdata:1, /* writer must hold sock lock */ - origdev:1, - has_vnet_hdr:1, + unsigned int has_vnet_hdr:1, /* writer must hold sock lock */ tp_loss:1, tp_tx_has_off:1; int pressure; @@ -126,7 +125,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; @@ -142,4 +141,25 @@ static struct packet_sock *pkt_sk(struct sock *sk) return (struct packet_sock *)sk; } +enum packet_sock_flags { + PACKET_SOCK_ORIGDEV, + PACKET_SOCK_AUXDATA, +}; + +static inline void packet_sock_flag_set(struct packet_sock *po, + enum packet_sock_flags flag, + bool val) +{ + if (val) + set_bit(flag, &po->flags); + else + clear_bit(flag, &po->flags); +} + +static inline bool packet_sock_flag(const struct packet_sock *po, + enum packet_sock_flags flag) +{ + return test_bit(flag, &po->flags); +} + #endif diff --git a/net/psample/psample.c b/net/psample/psample.c index 30e8239bd774..196fbf674dc1 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -31,7 +31,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; diff --git a/net/rds/message.c b/net/rds/message.c index 4b00b1152a5f..29f67ad483ea 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, spin_lock_irqsave(&q->lock, flags); head = &q->zcookie_head; if (!list_empty(head)) { - info = list_entry(head, struct rds_msg_zcopy_info, - rs_zcookie_next); - if (info && rds_zcookie_add(info, cookie)) { + info = list_first_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (rds_zcookie_add(info, cookie)) { spin_unlock_irqrestore(&q->lock, flags); kfree(rds_info_from_znotifier(znotif)); /* caller invokes rds_wake_sk_sleep() */ @@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, ck = &info->zcookies; memset(ck, 0, sizeof(*ck)); WARN_ON(!rds_zcookie_add(info, cookie)); - list_add_tail(&q->zcookie_head, &info->rs_zcookie_next); + list_add_tail(&info->rs_zcookie_next, &q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); /* caller invokes rds_wake_sk_sleep() */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 008f50fb25dd..63efe60fda1f 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -141,7 +141,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) addrlen = sizeof(sin); } - ret = sock->ops->bind(sock, addr, addrlen); + ret = kernel_bind(sock, addr, addrlen); if (ret) { rdsdebug("bind failed with %d at address %pI6c\n", ret, &conn->c_laddr); @@ -169,7 +169,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) * own the socket */ rds_tcp_set_callbacks(sock, cp); - ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); + ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK); rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0d095d3f5fee..37f4a8ca3ac8 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -332,7 +332,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) addr_len = sizeof(*sin); } - ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len); + ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len); if (ret < 0) { rdsdebug("could not bind %s listener socket: %d\n", isv6 ? "IPv6" : "IPv4", ret); diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 0f8465852254..7524544a965f 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -112,13 +112,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->clk = devm_clk_get(&pdev->dev, NULL); - gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); rfkill->reset_gpio = gpio; - gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); @@ -130,6 +130,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 03a1ee221112..015e475f5554 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -184,21 +184,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); + if (rose->device == dev) { + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; dev_put(rose->device); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -490,6 +516,12 @@ static int rose_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; + lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { struct rose_sock *rose = rose_sk(sk); @@ -499,8 +531,10 @@ static int rose_listen(struct socket *sock, int backlog) memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; + release_sock(sk); return 0; } + release_sock(sk); return -EOPNOTSUPP; } @@ -650,7 +684,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); dev_put(rose->device); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); @@ -1300,9 +1337,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: { struct sk_buff *skb; long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ + + spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0220a2935002..eaa032c498c9 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -689,7 +689,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (call->tx_total_len != -1 || call->tx_pending || call->tx_top != 0) - goto error_put; + goto out_put_unlock; call->tx_total_len = p.call.tx_total_len; } } @@ -709,7 +709,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* Fall through */ case 1: if (p.call.timeouts.hard > 0) { - j = msecs_to_jiffies(p.call.timeouts.hard); + j = p.call.timeouts.hard * HZ; now = jiffies; j += now; WRITE_ONCE(call->expect_term_by, j); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e95741388311..7698a8974a47 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -458,17 +458,6 @@ config NET_CLS_BASIC To compile this code as a module, choose M here: the module will be called cls_basic. -config NET_CLS_TCINDEX - tristate "Traffic-Control Index (TCINDEX)" - select NET_CLS - ---help--- - Say Y here if you want to be able to classify packets based on - traffic control indices. You will want this feature if you want - to implement Differentiated Services together with DSMARK. - - To compile this code as a module, choose M here: the - module will be called cls_tcindex. - config NET_CLS_ROUTE4 tristate "Routing decision (ROUTE)" depends on INET @@ -514,34 +503,6 @@ config CLS_U32_MARK ---help--- Say Y here to be able to use netfilter marks as u32 key. -config NET_CLS_RSVP - tristate "IPv4 Resource Reservation Protocol (RSVP)" - select NET_CLS - ---help--- - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp. - -config NET_CLS_RSVP6 - tristate "IPv6 Resource Reservation Protocol (RSVP6)" - select NET_CLS - ---help--- - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6 protocol. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp6. - config NET_CLS_FLOW tristate "Flow classifier" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index f0403f49edcb..3139c32e1947 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -61,9 +61,6 @@ obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o -obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o -obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o -obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a30c17a28281..9aad86e4a0fb 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -220,7 +220,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } - if (unlikely(!(dev->flags & IFF_UP))) { + if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); goto out; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index aeb8f84cbd9e..255d4ecf6252 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -29,6 +29,7 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) }, [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 435911dc9f16..6166bbad9753 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,8 +31,6 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> -extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; - /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -1840,6 +1838,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); + module_put(ops->owner); return -EOPNOTSUPP; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 55bf75cb1f16..164049d20f4d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -229,7 +229,7 @@ static u32 flow_get_skgid(const struct sk_buff *skb) static u32 flow_get_vlan_tag(const struct sk_buff *skb) { - u16 uninitialized_var(tag); + u16 tag; if (vlan_get_tag(skb, &tag) < 0) return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 6163648145c1..7ffa28a98d74 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -640,6 +640,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, if (option_len > sizeof(struct geneve_opt)) data_len = option_len - sizeof(struct geneve_opt); + if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4) + return -ERANGE; + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; memset(opt, 0xff, option_len); opt->length = data_len / 4; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index cb2c62605fc7..f15089c24a32 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -221,11 +221,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, if (err < 0) return err; - if (tb[TCA_FW_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); - tcf_bind_filter(tp, &f->res, base); - } - #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FW_INDEV]) { int ret; @@ -244,6 +239,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, } else if (head->mask != 0xFFFFFFFF) return err; + if (tb[TCA_FW_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + return 0; } @@ -277,7 +277,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; fnew->id = f->id; - fnew->res = f->res; #ifdef CONFIG_NET_CLS_IND fnew->ifindex = f->ifindex; #endif /* CONFIG_NET_CLS_IND */ diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 4c7fa1cfd8e3..a924292623ec 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { f->id = fold->id; f->iif = fold->iif; - f->res = fold->res; f->handle = fold->handle; f->tp = fold->tp; diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c deleted file mode 100644 index cbb5e0d600f3..000000000000 --- a/net/sched/cls_rsvp.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> - -#define RSVP_DST_LEN 1 -#define RSVP_ID "rsvp" -#define RSVP_OPS cls_rsvp_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h deleted file mode 100644 index eb1dd2afc5a1..000000000000 --- a/net/sched/cls_rsvp.h +++ /dev/null @@ -1,775 +0,0 @@ -/* - * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -/* - Comparing to general packet classification problem, - RSVP needs only sevaral relatively simple rules: - - * (dst, protocol) are always specified, - so that we are able to hash them. - * src may be exact, or may be wildcard, so that - we can keep a hash table plus one wildcard entry. - * source port (or flow label) is important only if src is given. - - IMPLEMENTATION. - - We use a two level hash table: The top level is keyed by - destination address and protocol ID, every bucket contains a list - of "rsvp sessions", identified by destination address, protocol and - DPI(="Destination Port ID"): triple (key, mask, offset). - - Every bucket has a smaller hash table keyed by source address - (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. - Every bucket is again a list of "RSVP flows", selected by - source address and SPI(="Source Port ID" here rather than - "security parameter index"): triple (key, mask, offset). - - - NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) - and all fragmented packets go to the best-effort traffic class. - - - NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires - only one "Generalized Port Identifier". So that for classic - ah, esp (and udp,tcp) both *pi should coincide or one of them - should be wildcard. - - At first sight, this redundancy is just a waste of CPU - resources. But DPI and SPI add the possibility to assign different - priorities to GPIs. Look also at note 4 about tunnels below. - - - NOTE 3. One complication is the case of tunneled packets. - We implement it as following: if the first lookup - matches a special session with "tunnelhdr" value not zero, - flowid doesn't contain the true flow ID, but the tunnel ID (1...255). - In this case, we pull tunnelhdr bytes and restart lookup - with tunnel ID added to the list of keys. Simple and stupid 8)8) - It's enough for PIMREG and IPIP. - - - NOTE 4. Two GPIs make it possible to parse even GRE packets. - F.e. DPI can select ETH_P_IP (and necessary flags to make - tunnelhdr correct) in GRE protocol field and SPI matches - GRE key. Is it not nice? 8)8) - - - Well, as result, despite its simplicity, we get a pretty - powerful classification engine. */ - - -struct rsvp_head { - u32 tmap[256/32]; - u32 hgenerator; - u8 tgenerator; - struct rsvp_session __rcu *ht[256]; - struct rcu_head rcu; -}; - -struct rsvp_session { - struct rsvp_session __rcu *next; - __be32 dst[RSVP_DST_LEN]; - struct tc_rsvp_gpi dpi; - u8 protocol; - u8 tunnelid; - /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter __rcu *ht[16 + 1]; - struct rcu_head rcu; -}; - - -struct rsvp_filter { - struct rsvp_filter __rcu *next; - __be32 src[RSVP_DST_LEN]; - struct tc_rsvp_gpi spi; - u8 tunnelhdr; - - struct tcf_result res; - struct tcf_exts exts; - - u32 handle; - struct rsvp_session *sess; - struct rcu_work rwork; -}; - -static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) -{ - unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; - - h ^= h>>16; - h ^= h>>8; - return (h ^ protocol ^ tunnelid) & 0xFF; -} - -static inline unsigned int hash_src(__be32 *src) -{ - unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; - - h ^= h>>16; - h ^= h>>8; - h ^= h>>4; - return h & 0xF; -} - -#define RSVP_APPLY_RESULT() \ -{ \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) \ - continue; \ - else if (r > 0) \ - return r; \ -} - -static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct rsvp_head *head = rcu_dereference_bh(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1, h2; - __be32 *dst, *src; - u8 protocol; - u8 tunnelid = 0; - u8 *xprt; -#if RSVP_DST_LEN == 4 - struct ipv6hdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ipv6_hdr(skb); -#else - struct iphdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ip_hdr(skb); -#endif -restart: - -#if RSVP_DST_LEN == 4 - src = &nhptr->saddr.s6_addr32[0]; - dst = &nhptr->daddr.s6_addr32[0]; - protocol = nhptr->nexthdr; - xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); -#else - src = &nhptr->saddr; - dst = &nhptr->daddr; - protocol = nhptr->protocol; - xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (ip_is_fragment(nhptr)) - return -1; -#endif - - h1 = hash_dst(dst, protocol, tunnelid); - h2 = hash_src(src); - - for (s = rcu_dereference_bh(head->ht[h1]); s; - s = rcu_dereference_bh(s->next)) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && - protocol == s->protocol && - !(s->dpi.mask & - (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - tunnelid == s->tunnelid) { - - for (f = rcu_dereference_bh(s->ht[h2]); f; - f = rcu_dereference_bh(f->next)) { - if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && - !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) -#if RSVP_DST_LEN == 4 - && - src[0] == f->src[0] && - src[1] == f->src[1] && - src[2] == f->src[2] -#endif - ) { - *res = f->res; - RSVP_APPLY_RESULT(); - -matched: - if (f->tunnelhdr == 0) - return 0; - - tunnelid = f->res.classid; - nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); - goto restart; - } - } - - /* And wildcard bucket... */ - for (f = rcu_dereference_bh(s->ht[16]); f; - f = rcu_dereference_bh(f->next)) { - *res = f->res; - RSVP_APPLY_RESULT(); - goto matched; - } - return -1; - } - } - return -1; -} - -static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter __rcu **ins; - struct rsvp_filter *pins; - unsigned int h1 = h & 0xFF; - unsigned int h2 = (h >> 8) & 0xFF; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ; - ins = &pins->next, pins = rtnl_dereference(*ins)) { - if (pins->handle == h) { - RCU_INIT_POINTER(n->next, pins->next); - rcu_assign_pointer(*ins, n); - return; - } - } - } - - /* Something went wrong if we are trying to replace a non-existant - * node. Mind as well halt instead of silently failing. - */ - BUG_ON(1); -} - -static void *rsvp_get(struct tcf_proto *tp, u32 handle) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1 = handle & 0xFF; - unsigned int h2 = (handle >> 8) & 0xFF; - - if (h2 > 16) - return NULL; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->handle == handle) - return f; - } - } - return NULL; -} - -static int rsvp_init(struct tcf_proto *tp) -{ - struct rsvp_head *data; - - data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); - if (data) { - rcu_assign_pointer(tp->root, data); - return 0; - } - return -ENOBUFS; -} - -static void __rsvp_delete_filter(struct rsvp_filter *f) -{ - tcf_exts_destroy(&f->exts); - tcf_exts_put_net(&f->exts); - kfree(f); -} - -static void rsvp_delete_filter_work(struct work_struct *work) -{ - struct rsvp_filter *f = container_of(to_rcu_work(work), - struct rsvp_filter, - rwork); - rtnl_lock(); - __rsvp_delete_filter(f); - rtnl_unlock(); -} - -static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -{ - tcf_unbind_filter(tp, &f->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (tcf_exts_get_net(&f->exts)) - tcf_queue_work(&f->rwork, rsvp_delete_filter_work); - else - __rsvp_delete_filter(f); -} - -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int h1, h2; - - if (data == NULL) - return; - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - - while ((s = rtnl_dereference(data->ht[h1])) != NULL) { - RCU_INIT_POINTER(data->ht[h1], s->next); - - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - while ((f = rtnl_dereference(s->ht[h2])) != NULL) { - rcu_assign_pointer(s->ht[h2], f->next); - rsvp_delete_filter(tp, f); - } - } - kfree_rcu(s, rcu); - } - } - kfree_rcu(data, rcu); -} - -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = arg; - struct rsvp_filter __rcu **fp; - unsigned int h = f->handle; - struct rsvp_session __rcu **sp; - struct rsvp_session *nsp, *s = f->sess; - int i, h1; - - fp = &s->ht[(h >> 8) & 0xFF]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - if (nfp == f) { - RCU_INIT_POINTER(*fp, f->next); - rsvp_delete_filter(tp, f); - - /* Strip tree */ - - for (i = 0; i <= 16; i++) - if (s->ht[i]) - goto out; - - /* OK, session has no flows */ - sp = &head->ht[h & 0xFF]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if (nsp == s) { - RCU_INIT_POINTER(*sp, s->next); - kfree_rcu(s, rcu); - goto out; - } - } - - break; - } - } - -out: - *last = true; - for (h1 = 0; h1 < 256; h1++) { - if (rcu_access_pointer(head->ht[h1])) { - *last = false; - break; - } - } - - return 0; -} - -static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int i = 0xFFFF; - - while (i-- > 0) { - u32 h; - - if ((data->hgenerator += 0x10000) == 0) - data->hgenerator = 0x10000; - h = data->hgenerator|salt; - if (!rsvp_get(tp, h)) - return h; - } - return 0; -} - -static int tunnel_bts(struct rsvp_head *data) -{ - int n = data->tgenerator >> 5; - u32 b = 1 << (data->tgenerator & 0x1F); - - if (data->tmap[n] & b) - return 0; - data->tmap[n] |= b; - return 1; -} - -static void tunnel_recycle(struct rsvp_head *data) -{ - struct rsvp_session __rcu **sht = data->ht; - u32 tmap[256/32]; - int h1, h2; - - memset(tmap, 0, sizeof(tmap)); - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - for (s = rtnl_dereference(sht[h1]); s; - s = rtnl_dereference(s->next)) { - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->tunnelhdr == 0) - continue; - data->tgenerator = f->res.classid; - tunnel_bts(data); - } - } - } - } - - memcpy(data->tmap, tmap, sizeof(tmap)); -} - -static u32 gen_tunnel(struct rsvp_head *data) -{ - int i, k; - - for (k = 0; k < 2; k++) { - for (i = 255; i > 0; i--) { - if (++data->tgenerator == 0) - data->tgenerator = 1; - if (tunnel_bts(data)) - return data->tgenerator; - } - tunnel_recycle(data); - } - return 0; -} - -static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { - [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, -}; - -static int rsvp_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - struct rsvp_filter *f, *nfp; - struct rsvp_filter __rcu **fp; - struct rsvp_session *nsp, *s; - struct rsvp_session __rcu **sp; - struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_RSVP_MAX + 1]; - struct tcf_exts e; - unsigned int h1, h2; - __be32 *dst; - int err; - - if (opt == NULL) - return handle ? -EINVAL : 0; - - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); - if (err < 0) - return err; - - err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); - if (err < 0) - goto errout2; - - f = *arg; - if (f) { - /* Node exists: adjust only classid */ - struct rsvp_filter *n; - - if (f->handle != handle && handle) - goto errout2; - - n = kmemdup(f, sizeof(*f), GFP_KERNEL); - if (!n) { - err = -ENOMEM; - goto errout2; - } - - err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) { - kfree(n); - goto errout2; - } - - if (tb[TCA_RSVP_CLASSID]) { - n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - tcf_bind_filter(tp, &n->res, base); - } - - tcf_exts_change(&n->exts, &e); - rsvp_replace(tp, n, handle); - return 0; - } - - /* Now more serious part... */ - err = -EINVAL; - if (handle) - goto errout2; - if (tb[TCA_RSVP_DST] == NULL) - goto errout2; - - err = -ENOBUFS; - f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL); - if (f == NULL) - goto errout2; - - err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - goto errout; - h2 = 16; - if (tb[TCA_RSVP_SRC]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); - h2 = hash_src(f->src); - } - if (tb[TCA_RSVP_PINFO]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO]); - f->spi = pinfo->spi; - f->tunnelhdr = pinfo->tunnelhdr; - } - if (tb[TCA_RSVP_CLASSID]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - - dst = nla_data(tb[TCA_RSVP_DST]); - h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); - - err = -ENOMEM; - if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0) - goto errout; - - if (f->tunnelhdr) { - err = -EINVAL; - if (f->res.classid > 255) - goto errout; - - err = -ENOMEM; - if (f->res.classid == 0 && - (f->res.classid = gen_tunnel(data)) == 0) - goto errout; - } - - for (sp = &data->ht[h1]; - (s = rtnl_dereference(*sp)) != NULL; - sp = &s->next) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && - pinfo && pinfo->protocol == s->protocol && - memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - pinfo->tunnelid == s->tunnelid) { - -insert: - /* OK, we found appropriate session */ - - fp = &s->ht[h2]; - - f->sess = s; - if (f->tunnelhdr == 0) - tcf_bind_filter(tp, &f->res, base); - - tcf_exts_change(&f->exts, &e); - - fp = &s->ht[h2]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - __u32 mask = nfp->spi.mask & f->spi.mask; - - if (mask != f->spi.mask) - break; - } - RCU_INIT_POINTER(f->next, nfp); - rcu_assign_pointer(*fp, f); - - *arg = f; - return 0; - } - } - - /* No session found. Create new one. */ - - err = -ENOBUFS; - s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL); - if (s == NULL) - goto errout; - memcpy(s->dst, dst, sizeof(s->dst)); - - if (pinfo) { - s->dpi = pinfo->dpi; - s->protocol = pinfo->protocol; - s->tunnelid = pinfo->tunnelid; - } - sp = &data->ht[h1]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask) - break; - } - RCU_INIT_POINTER(s->next, nsp); - rcu_assign_pointer(*sp, s); - - goto insert; - -errout: - tcf_exts_destroy(&f->exts); - kfree(f); -errout2: - tcf_exts_destroy(&e); - return err; -} - -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - unsigned int h, h1; - - if (arg->stop) - return; - - for (h = 0; h < 256; h++) { - struct rsvp_session *s; - - for (s = rtnl_dereference(head->ht[h]); s; - s = rtnl_dereference(s->next)) { - for (h1 = 0; h1 <= 16; h1++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h1]); f; - f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; - return; - } - arg->count++; - } - } - } - } -} - -static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) -{ - struct rsvp_filter *f = fh; - struct rsvp_session *s; - struct nlattr *nest; - struct tc_rsvp_pinfo pinfo; - - if (f == NULL) - return skb->len; - s = f->sess; - - t->tcm_handle = f->handle; - - nest = nla_nest_start(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst)) - goto nla_put_failure; - pinfo.dpi = s->dpi; - pinfo.spi = f->spi; - pinfo.protocol = s->protocol; - pinfo.tunnelid = s->tunnelid; - pinfo.tunnelhdr = f->tunnelhdr; - pinfo.pad = 0; - if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo)) - goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid)) - goto nla_put_failure; - if (((f->handle >> 8) & 0xFF) != 16 && - nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &f->exts) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &f->exts) < 0) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, - unsigned long base) -{ - struct rsvp_filter *f = fh; - - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } -} - -static struct tcf_proto_ops RSVP_OPS __read_mostly = { - .kind = RSVP_ID, - .classify = rsvp_classify, - .init = rsvp_init, - .destroy = rsvp_destroy, - .get = rsvp_get, - .change = rsvp_change, - .delete = rsvp_delete, - .walk = rsvp_walk, - .dump = rsvp_dump, - .bind_class = rsvp_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_rsvp(void) -{ - return register_tcf_proto_ops(&RSVP_OPS); -} - -static void __exit exit_rsvp(void) -{ - unregister_tcf_proto_ops(&RSVP_OPS); -} - -module_init(init_rsvp) -module_exit(exit_rsvp) diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c deleted file mode 100644 index dd08aea2aee5..000000000000 --- a/net/sched/cls_rsvp6.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ipv6.h> -#include <linux/skbuff.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> -#include <net/netlink.h> - -#define RSVP_DST_LEN 4 -#define RSVP_ID "rsvp6" -#define RSVP_OPS cls_rsvp6_ops - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c deleted file mode 100644 index 4070197f9543..000000000000 --- a/net/sched/cls_tcindex.c +++ /dev/null @@ -1,698 +0,0 @@ -/* - * net/sched/cls_tcindex.c Packet classifier for skb->tc_index - * - * Written 1998,1999 by Werner Almesberger, EPFL ICA - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <net/act_api.h> -#include <net/netlink.h> -#include <net/pkt_cls.h> -#include <net/sch_generic.h> - -/* - * Passing parameters to the root seems to be done more awkwardly than really - * necessary. At least, u32 doesn't seem to use such dirty hacks. To be - * verified. FIXME. - */ - -#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */ -#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ - - -struct tcindex_filter_result { - struct tcf_exts exts; - struct tcf_result res; - struct rcu_work rwork; -}; - -struct tcindex_filter { - u16 key; - struct tcindex_filter_result result; - struct tcindex_filter __rcu *next; - struct rcu_work rwork; -}; - - -struct tcindex_data { - struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ - struct tcindex_filter __rcu **h; /* imperfect hash; */ - struct tcf_proto *tp; - u16 mask; /* AND key with mask */ - u32 shift; /* shift ANDed key to the right */ - u32 hash; /* hash table size; 0 if undefined */ - u32 alloc_hash; /* allocated size */ - u32 fall_through; /* 0: only classify if explicit match */ - struct rcu_work rwork; -}; - -static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) -{ - return tcf_exts_has_actions(&r->exts) || r->res.classid; -} - -static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, - u16 key) -{ - if (p->perfect) { - struct tcindex_filter_result *f = p->perfect + key; - - return tcindex_filter_is_set(f) ? f : NULL; - } else if (p->h) { - struct tcindex_filter __rcu **fp; - struct tcindex_filter *f; - - fp = &p->h[key % p->hash]; - for (f = rcu_dereference_bh_rtnl(*fp); - f; - fp = &f->next, f = rcu_dereference_bh_rtnl(*fp)) - if (f->key == key) - return &f->result; - } - - return NULL; -} - - -static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct tcindex_data *p = rcu_dereference_bh(tp->root); - struct tcindex_filter_result *f; - int key = (skb->tc_index & p->mask) >> p->shift; - - pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n", - skb, tp, res, p); - - f = tcindex_lookup(p, key); - if (!f) { - struct Qdisc *q = tcf_block_q(tp->chain->block); - - if (!p->fall_through) - return -1; - res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key); - res->class = 0; - pr_debug("alg 0x%x\n", res->classid); - return 0; - } - *res = f->res; - pr_debug("map 0x%x\n", res->classid); - - return tcf_exts_exec(skb, &f->exts, res); -} - - -static void *tcindex_get(struct tcf_proto *tp, u32 handle) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r; - - pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); - if (p->perfect && handle >= p->alloc_hash) - return NULL; - r = tcindex_lookup(p, handle); - return r && tcindex_filter_is_set(r) ? r : NULL; -} - -static int tcindex_init(struct tcf_proto *tp) -{ - struct tcindex_data *p; - - pr_debug("tcindex_init(tp %p)\n", tp); - p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL); - if (!p) - return -ENOMEM; - - p->mask = 0xffff; - p->hash = DEFAULT_HASH_SIZE; - p->fall_through = 1; - - rcu_assign_pointer(tp->root, p); - return 0; -} - -static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) -{ - tcf_exts_destroy(&r->exts); - tcf_exts_put_net(&r->exts); -} - -static void tcindex_destroy_rexts_work(struct work_struct *work) -{ - struct tcindex_filter_result *r; - - r = container_of(to_rcu_work(work), - struct tcindex_filter_result, - rwork); - rtnl_lock(); - __tcindex_destroy_rexts(r); - rtnl_unlock(); -} - -static void __tcindex_destroy_fexts(struct tcindex_filter *f) -{ - tcf_exts_destroy(&f->result.exts); - tcf_exts_put_net(&f->result.exts); - kfree(f); -} - -static void tcindex_destroy_fexts_work(struct work_struct *work) -{ - struct tcindex_filter *f = container_of(to_rcu_work(work), - struct tcindex_filter, - rwork); - - rtnl_lock(); - __tcindex_destroy_fexts(f); - rtnl_unlock(); -} - -static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = arg; - struct tcindex_filter __rcu **walk; - struct tcindex_filter *f = NULL; - - pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p); - if (p->perfect) { - if (!r->res.class) - return -ENOENT; - } else { - int i; - - for (i = 0; i < p->hash; i++) { - walk = p->h + i; - for (f = rtnl_dereference(*walk); f; - walk = &f->next, f = rtnl_dereference(*walk)) { - if (&f->result == r) - goto found; - } - } - return -ENOENT; - -found: - rcu_assign_pointer(*walk, rtnl_dereference(f->next)); - } - tcf_unbind_filter(tp, &r->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (f) { - if (tcf_exts_get_net(&f->result.exts)) - tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work); - else - __tcindex_destroy_fexts(f); - } else { - if (tcf_exts_get_net(&r->exts)) - tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); - else - __tcindex_destroy_rexts(r); - } - - *last = false; - return 0; -} - -static void tcindex_destroy_work(struct work_struct *work) -{ - struct tcindex_data *p = container_of(to_rcu_work(work), - struct tcindex_data, - rwork); - - kfree(p->perfect); - kfree(p->h); - kfree(p); -} - -static inline int -valid_perfect_hash(struct tcindex_data *p) -{ - return p->hash > (p->mask >> p->shift); -} - -static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { - [TCA_TCINDEX_HASH] = { .type = NLA_U32 }, - [TCA_TCINDEX_MASK] = { .type = NLA_U16 }, - [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 }, - [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 }, - [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, -}; - -static int tcindex_filter_result_init(struct tcindex_filter_result *r) -{ - memset(r, 0, sizeof(*r)); - return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); -} - -static void tcindex_partial_destroy_work(struct work_struct *work) -{ - struct tcindex_data *p = container_of(to_rcu_work(work), - struct tcindex_data, - rwork); - - kfree(p->perfect); - kfree(p); -} - -static void tcindex_free_perfect_hash(struct tcindex_data *cp) -{ - int i; - - for (i = 0; i < cp->hash; i++) - tcf_exts_destroy(&cp->perfect[i].exts); - kfree(cp->perfect); -} - -static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) -{ - int i, err = 0; - - cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), - GFP_KERNEL | __GFP_NOWARN); - if (!cp->perfect) - return -ENOMEM; - - for (i = 0; i < cp->hash; i++) { - err = tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - if (err < 0) - goto errout; -#ifdef CONFIG_NET_CLS_ACT - cp->perfect[i].exts.net = net; -#endif - } - - return 0; - -errout: - tcindex_free_perfect_hash(cp); - return err; -} - -static int -tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, - u32 handle, struct tcindex_data *p, - struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) -{ - struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_data *cp = NULL, *oldp; - struct tcindex_filter *f = NULL; /* make gcc behave */ - struct tcf_result cr = {}; - int err, balloc = 0; - struct tcf_exts e; - - err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); - if (err < 0) - goto errout; - - err = -ENOMEM; - /* tcindex_data attributes must look atomic to classifier/lookup so - * allocate new tcindex data and RCU assign it onto root. Keeping - * perfect hash and hash pointers from old data. - */ - cp = kzalloc(sizeof(*cp), GFP_KERNEL); - if (!cp) - goto errout; - - cp->mask = p->mask; - cp->shift = p->shift; - cp->hash = p->hash; - cp->alloc_hash = p->alloc_hash; - cp->fall_through = p->fall_through; - cp->tp = tp; - - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) { - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - if (cp->shift > 16) { - err = -EINVAL; - goto errout; - } - } - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - - if (p->perfect) { - int i; - - if (tcindex_alloc_perfect_hash(net, cp) < 0) - goto errout; - cp->alloc_hash = cp->hash; - for (i = 0; i < min(cp->hash, p->hash); i++) - cp->perfect[i].res = p->perfect[i].res; - balloc = 1; - } - cp->h = p->h; - - err = tcindex_filter_result_init(&new_filter_result); - if (err < 0) - goto errout_alloc; - if (old_r) - cr = r->res; - - err = -EBUSY; - - /* Hash already allocated, make sure that we still meet the - * requirements for the allocated hash. - */ - if (cp->perfect) { - if (!valid_perfect_hash(cp) || - cp->hash > cp->alloc_hash) - goto errout_alloc; - } else if (cp->h && cp->hash != cp->alloc_hash) { - goto errout_alloc; - } - - err = -EINVAL; - if (tb[TCA_TCINDEX_FALL_THROUGH]) - cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - - if (!cp->perfect && !cp->h) - cp->alloc_hash = cp->hash; - - /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) - * but then, we'd fail handles that may become valid after some future - * mask change. While this is extremely unlikely to ever matter, - * the check below is safer (and also more backwards-compatible). - */ - if (cp->perfect || valid_perfect_hash(cp)) - if (handle >= cp->alloc_hash) - goto errout_alloc; - - - err = -ENOMEM; - if (!cp->perfect && !cp->h) { - if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(net, cp) < 0) - goto errout_alloc; - balloc = 1; - } else { - struct tcindex_filter __rcu **hash; - - hash = kcalloc(cp->hash, - sizeof(struct tcindex_filter *), - GFP_KERNEL); - - if (!hash) - goto errout_alloc; - - cp->h = hash; - balloc = 2; - } - } - - if (cp->perfect) - r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; - - if (r == &new_filter_result) { - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - goto errout_alloc; - f->key = handle; - f->next = NULL; - err = tcindex_filter_result_init(&f->result); - if (err < 0) { - kfree(f); - goto errout_alloc; - } - } - - if (tb[TCA_TCINDEX_CLASSID]) { - cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr, base); - } - - if (old_r && old_r != r) { - err = tcindex_filter_result_init(old_r); - if (err < 0) { - kfree(f); - goto errout_alloc; - } - } - - oldp = p; - r->res = cr; - tcf_exts_change(&r->exts, &e); - - rcu_assign_pointer(tp->root, cp); - - if (r == &new_filter_result) { - struct tcindex_filter *nfp; - struct tcindex_filter __rcu **fp; - - f->result.res = r->res; - tcf_exts_change(&f->result.exts, &r->exts); - - fp = cp->h + (handle % cp->hash); - for (nfp = rtnl_dereference(*fp); - nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) - ; /* nothing */ - - rcu_assign_pointer(*fp, f); - } else { - tcf_exts_destroy(&new_filter_result.exts); - } - - if (oldp) - tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work); - return 0; - -errout_alloc: - if (balloc == 1) - tcindex_free_perfect_hash(cp); - else if (balloc == 2) - kfree(cp->h); - tcf_exts_destroy(&new_filter_result.exts); -errout: - kfree(cp); - tcf_exts_destroy(&e); - return err; -} - -static int -tcindex_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) -{ - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = *arg; - int err; - - pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," - "p %p,r %p,*arg %p\n", - tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL); - - if (!opt) - return 0; - - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); - if (err < 0) - return err; - - return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr, extack); -} - -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter *f, *next; - int i; - - pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p); - if (p->perfect) { - for (i = 0; i < p->hash; i++) { - if (!p->perfect[i].res.class) - continue; - if (walker->count >= walker->skip) { - if (walker->fn(tp, p->perfect + i, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; - } - } - if (!p->h) - return; - for (i = 0; i < p->hash; i++) { - for (f = rtnl_dereference(p->h[i]); f; f = next) { - next = rtnl_dereference(f->next); - if (walker->count >= walker->skip) { - if (walker->fn(tp, &f->result, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; - } - } -} - -static void tcindex_destroy(struct tcf_proto *tp, - struct netlink_ext_ack *extack) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - int i; - - pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); - - if (p->perfect) { - for (i = 0; i < p->hash; i++) { - struct tcindex_filter_result *r = p->perfect + i; - - tcf_unbind_filter(tp, &r->res); - if (tcf_exts_get_net(&r->exts)) - tcf_queue_work(&r->rwork, - tcindex_destroy_rexts_work); - else - __tcindex_destroy_rexts(r); - } - } - - for (i = 0; p->h && i < p->hash; i++) { - struct tcindex_filter *f, *next; - bool last; - - for (f = rtnl_dereference(p->h[i]); f; f = next) { - next = rtnl_dereference(f->next); - tcindex_delete(tp, &f->result, &last, NULL); - } - } - - tcf_queue_work(&p->rwork, tcindex_destroy_work); -} - - -static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = fh; - struct nlattr *nest; - - pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n", - tp, fh, skb, t, p, r); - pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); - - nest = nla_nest_start(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (!fh) { - t->tcm_handle = ~0; /* whatever ... */ - if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) || - nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) || - nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) || - nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through)) - goto nla_put_failure; - nla_nest_end(skb, nest); - } else { - if (p->perfect) { - t->tcm_handle = r - p->perfect; - } else { - struct tcindex_filter *f; - struct tcindex_filter __rcu **fp; - int i; - - t->tcm_handle = 0; - for (i = 0; !t->tcm_handle && i < p->hash; i++) { - fp = &p->h[i]; - for (f = rtnl_dereference(*fp); - !t->tcm_handle && f; - fp = &f->next, f = rtnl_dereference(*fp)) { - if (&f->result == r) - t->tcm_handle = f->key; - } - } - } - pr_debug("handle = %d\n", t->tcm_handle); - if (r->res.class && - nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &r->exts) < 0) - goto nla_put_failure; - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &r->exts) < 0) - goto nla_put_failure; - } - - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, - void *q, unsigned long base) -{ - struct tcindex_filter_result *r = fh; - - if (r && r->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &r->res, base); - else - __tcf_unbind_filter(q, &r->res); - } -} - -static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { - .kind = "tcindex", - .classify = tcindex_classify, - .init = tcindex_init, - .destroy = tcindex_destroy, - .get = tcindex_get, - .change = tcindex_change, - .delete = tcindex_delete, - .walk = tcindex_walk, - .dump = tcindex_dump, - .bind_class = tcindex_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_tcindex(void) -{ - return register_tcf_proto_ops(&cls_tcindex_ops); -} - -static void __exit exit_tcindex(void) -{ - unregister_tcf_proto_ops(&cls_tcindex_ops); -} - -module_init(init_tcindex) -module_exit(exit_tcindex) -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d30256ac3537..1e71ff093c91 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -778,11 +778,22 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct netlink_ext_ack *extack) { int err; +#ifdef CONFIG_NET_CLS_IND + int ifindex = -1; +#endif err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); if (err < 0) return err; +#ifdef CONFIG_NET_CLS_IND + if (tb[TCA_U32_INDEV]) { + ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); + if (ifindex < 0) + return -EINVAL; + } +#endif + if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; @@ -814,13 +825,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, } #ifdef CONFIG_NET_CLS_IND - if (tb[TCA_U32_INDEV]) { - int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); - if (ret < 0) - return -EINVAL; - n->ifindex = ret; - } + if (ifindex >= 0) + n->ifindex = ifindex; #endif return 0; } @@ -873,7 +879,6 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->ifindex = n->ifindex; #endif new->fshift = n->fshift; - new->res = n->res; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); @@ -1061,18 +1066,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } + /* At this point, we need to derive the new handle that will be used to + * uniquely map the identity of this table match entry. The + * identity of the entry that we need to construct is 32 bits made of: + * htid(12b):bucketid(8b):node/entryid(12b) + * + * At this point _we have the table(ht)_ in which we will insert this + * entry. We carry the table's id in variable "htid". + * Note that earlier code picked the ht selection either by a) the user + * providing the htid specified via TCA_U32_HASH attribute or b) when + * no such attribute is passed then the root ht, is default to at ID + * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. + * If OTOH the user passed us the htid, they may also pass a bucketid of + * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is + * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be + * passed via the htid, so even if it was non-zero it will be ignored. + * + * We may also have a handle, if the user passed one. The handle also + * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). + * Rule: the bucketid on the handle is ignored even if one was passed; + * rather the value on "htid" is always assumed to be the bucketid. + */ if (handle) { + /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL; } - handle = htid | TC_U32_NODE(handle); - err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, - GFP_KERNEL); - if (err) - return err; - } else + /* Ok, so far we have a valid htid(12b):bucketid(8b) but we + * need to finalize the table entry identification with the last + * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for + * entries. Rule: nodeid of 0 is reserved only for tables(see + * earlier code which processes TC_U32_DIVISOR attribute). + * Rule: The nodeid can only be derived from the handle (and not + * htid). + * Rule: if the handle specified zero for the node id example + * 0x60000000, then pick a new nodeid from the pool of IDs + * this hash table has been allocating from. + * If OTOH it is specified (i.e for example the user passed a + * handle such as 0x60000123), then we use it generate our final + * handle which is used to uniquely identify the match entry. + */ + if (!TC_U32_NODE(handle)) { + handle = gen_new_kid(ht, htid); + } else { + handle = htid | TC_U32_NODE(handle); + err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, + handle, GFP_KERNEL); + if (err) + return err; + } + } else { + /* The user did not give us a handle; lets just generate one + * from the table's pool of nodeids. + */ handle = gen_new_kid(ht, htid); + } if (tb[TCA_U32_SEL] == NULL) { NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 73e2ed576ceb..cbf44783024f 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -101,8 +101,10 @@ retry: static void em_text_destroy(struct tcf_ematch *m) { - if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) + if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) { textsearch_destroy(EM_TEXT_PRIV(m)->config); + kfree(EM_TEXT_PRIV(m)); + } } static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 113a133ee544..5ba3548d2eb7 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -259,6 +259,8 @@ static int tcf_em_validate(struct tcf_proto *tp, * the value carried. */ if (em_hdr->flags & TCF_EM_SIMPLE) { + if (em->ops->datalen > 0) + goto errout; if (data_len < sizeof(u32)) goto errout; em->data = *(u32 *) data; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 424e70907b96..ab57c0ee9923 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1004,12 +1004,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - dev->qdisc, new); + old = dev->qdisc; if (new && !new->ops->attach) qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { @@ -1031,8 +1032,12 @@ skip: unsigned long cl = cops->find(parent, classid); if (cl) { - err = cops->graft(parent, cl, new, &old, - extack); + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + err = -EINVAL; + } else { + err = cops->graft(parent, cl, new, &old, extack); + } } else { NL_SET_ERR_MSG(extack, "Specified class not found"); err = -ENOENT; @@ -1144,7 +1149,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->parent = parent; if (handle == TC_H_INGRESS) { - sch->flags |= TCQ_F_INGRESS; + if (!(sch->flags & TCQ_F_INGRESS)) { + NL_SET_ERR_MSG(extack, + "Specified parent ID is reserved for ingress and clsact Qdiscs"); + err = -EINVAL; + goto err_out3; + } handle = TC_H_MAKE(TC_H_INGRESS, 0); lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { @@ -1429,10 +1439,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, return 0; } +static bool req_create_or_replace(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_REPLACE); +} + +static bool req_create_exclusive(struct nlmsghdr *n) +{ + return (n->nlmsg_flags & NLM_F_CREATE && + n->nlmsg_flags & NLM_F_EXCL); +} + +static bool req_change(struct nlmsghdr *n) +{ + return (!(n->nlmsg_flags & NLM_F_CREATE) && + !(n->nlmsg_flags & NLM_F_REPLACE) && + !(n->nlmsg_flags & NLM_F_EXCL)); +} + /* * Create/change qdisc. */ - static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1505,11 +1533,20 @@ replay: NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } + if (q->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot regraft ingress or clsact Qdiscs"); + return -EINVAL; + } if (q == p || (p && check_loop(q, p, 0))) { NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; } + if (clid == TC_H_INGRESS) { + NL_SET_ERR_MSG(extack, "Ingress cannot graft directly"); + return -EINVAL; + } qdisc_refcount_inc(q); goto graft; } else { @@ -1520,27 +1557,35 @@ replay: * * We know, that some child q is already * attached to this parent and have choice: - * either to change it or to create/graft new one. + * 1) change it or 2) create/graft new one. + * If the requested qdisc kind is different + * than the existing one, then we choose graft. + * If they are the same then this is "change" + * operation - just let it fallthrough.. * * 1. We are allowed to create/graft only - * if CREATE and REPLACE flags are set. + * if the request is explicitly stating + * "please create if it doesn't exist". * - * 2. If EXCL is set, requestor wanted to say, - * that qdisc tcm_handle is not expected + * 2. If the request is to exclusive create + * then the qdisc tcm_handle is not expected * to exist, so that we choose create/graft too. * * 3. The last case is when no flags are set. + * This will happen when for example tc + * utility issues a "change" command. * Alas, it is sort of hole in API, we * cannot decide what to do unambiguously. - * For now we select create/graft, if - * user gave KIND, which does not match existing. + * For now we select create/graft. */ - if ((n->nlmsg_flags & NLM_F_CREATE) && - (n->nlmsg_flags & NLM_F_REPLACE) && - ((n->nlmsg_flags & NLM_F_EXCL) || - (tca[TCA_KIND] && - nla_strcmp(tca[TCA_KIND], q->ops->id)))) - goto create_n_graft; + if (tca[TCA_KIND] && + nla_strcmp(tca[TCA_KIND], q->ops->id)) { + if (req_create_or_replace(n) || + req_create_exclusive(n)) + goto create_n_graft; + else if (req_change(n)) + goto create_n_graft2; + } } } } else { @@ -1574,6 +1619,7 @@ create_n_graft: NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; } +create_n_graft2: if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), p, diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 9a1bfa13a6cd..ff825f40ea04 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -394,10 +394,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 01a177cfa533..d91665ea7b14 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1649,7 +1649,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); int len = qdisc_pkt_len(skb); - int uninitialized_var(ret); + int ret; struct sk_buff *ack = NULL; ktime_t now = ktime_get(); struct cake_tin_data *b; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 0a76ad05e5ae..7f0a5d22deaf 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -236,6 +236,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -256,8 +258,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; /* fall through */ - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } @@ -365,7 +365,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); - int uninitialized_var(ret); + int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index e4f69c779b8c..7a4777ee0536 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -192,7 +192,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct fq_codel_sched_data *q = qdisc_priv(sch); unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; - int uninitialized_var(ret); + int ret; unsigned int pkt_len; bool memory_limited; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b18ec1f6de60..e71443623d67 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -913,6 +913,14 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc, cl->cl_flags |= HFSC_USC; } +static void +hfsc_upgrade_rt(struct hfsc_class *cl) +{ + cl->cl_fsc = cl->cl_rsc; + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); + cl->cl_flags |= HFSC_FSC; +} + static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = { [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) }, [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) }, @@ -1073,6 +1081,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cf_tree = RB_ROOT; sch_tree_lock(sch); + /* Check if the inner class is a misconfigured 'rt' */ + if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { + NL_SET_ERR_MSG(extack, + "Forced curve change on parent 'rt' to 'sc'"); + hfsc_upgrade_rt(parent); + } qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ce3f55259d0d..834960cc755e 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -83,6 +83,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + if (sch->parent != TC_H_INGRESS) + return -EOPNOTSUPP; + net_inc_ingress_queue(); mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); @@ -98,6 +101,9 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_INGRESS) + return; + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -130,7 +136,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", .priv_size = sizeof(struct ingress_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -215,6 +221,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); int err; + if (sch->parent != TC_H_CLSACT) + return -EOPNOTSUPP; + net_inc_ingress_queue(); net_inc_egress_queue(); @@ -242,6 +251,9 @@ static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_CLSACT) + return; + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); @@ -262,7 +274,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", .priv_size = sizeof(struct clsact_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 64d7f876d7de..c0ab1e38e80c 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,6 +132,97 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } +static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct mqprio_sched *priv = qdisc_priv(sch); + struct nlattr *tb[TCA_MQPRIO_MAX + 1]; + struct nlattr *attr; + int i, rem, err; + + err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, + sizeof(*qopt)); + if (err < 0) + return err; + + if (!qopt->hw) { + NL_SET_ERR_MSG(extack, + "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode"); + return -EINVAL; + } + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64], + "min_rate accepted only when shaper is in bw_rlimit mode"); + return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MIN_RATE64"); + return -EINVAL; + } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64], + "max_rate accepted only when shaper is in bw_rlimit mode"); + return -EINVAL; + } + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute type expected to be TCA_MQPRIO_MAX_RATE64"); + return -EINVAL; + } + + if (nla_len(attr) != sizeof(u64)) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length"); + return -EINVAL; + } + + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + + return 0; +} + static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -141,9 +232,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; - struct nlattr *tb[TCA_MQPRIO_MAX + 1]; - struct nlattr *attr; - int rem; int len; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); @@ -168,55 +256,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt, len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); if (len > 0) { - err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, - sizeof(*qopt)); - if (err < 0) + err = mqprio_parse_nlattr(sch, qopt, opt, extack); + if (err) return err; - - if (!qopt->hw) - return -EINVAL; - - if (tb[TCA_MQPRIO_MODE]) { - priv->flags |= TC_MQPRIO_F_MODE; - priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); - } - - if (tb[TCA_MQPRIO_SHAPER]) { - priv->flags |= TC_MQPRIO_F_SHAPER; - priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); - } - - if (tb[TCA_MQPRIO_MIN_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->min_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MIN_RATE; - } - - if (tb[TCA_MQPRIO_MAX_RATE64]) { - if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) - return -EINVAL; - i = 0; - nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], - rem) { - if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) - return -EINVAL; - if (i >= qopt->num_tc) - break; - priv->max_rate[i] = *(u64 *)nla_data(attr); - i++; - } - priv->flags |= TC_MQPRIO_F_MAX_RATE; - } } /* pre-allocate qdisc, attachment can't fail */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 31793af1a77b..cf93dbe3d040 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -748,12 +748,10 @@ static void dist_free(struct disttable *d) * signed 16 bit values. */ -static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, - const struct nlattr *attr) +static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) { size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); - spinlock_t *root_lock; struct disttable *d; int i; @@ -768,13 +766,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, for (i = 0; i < n; i++) d->table[i] = data[i]; - root_lock = qdisc_root_sleeping_lock(sch); - - spin_lock_bh(root_lock); - swap(*tbl, d); - spin_unlock_bh(root_lock); - - dist_free(d); + *tbl = d; return 0; } @@ -930,6 +922,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, { struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; + struct disttable *delay_dist = NULL; + struct disttable *slot_dist = NULL; struct tc_netem_qopt *qopt; struct clgstate old_clg; int old_loss_model = CLG_RANDOM; @@ -943,6 +937,19 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (ret < 0) return ret; + if (tb[TCA_NETEM_DELAY_DIST]) { + ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_SLOT_DIST]) { + ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); + if (ret) + goto table_free; + } + + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; @@ -951,26 +958,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; - return ret; + q->clg = old_clg; + goto unlock; } } else { q->loss_model = CLG_RANDOM; } - if (tb[TCA_NETEM_DELAY_DIST]) { - ret = get_dist_table(sch, &q->delay_dist, - tb[TCA_NETEM_DELAY_DIST]); - if (ret) - goto get_table_failure; - } - - if (tb[TCA_NETEM_SLOT_DIST]) { - ret = get_dist_table(sch, &q->slot_dist, - tb[TCA_NETEM_SLOT_DIST]); - if (ret) - goto get_table_failure; - } - + if (delay_dist) + swap(q->delay_dist, delay_dist); + if (slot_dist) + swap(q->slot_dist, slot_dist); sch->limit = qopt->limit; q->latency = PSCHED_TICKS2NS(qopt->latency); @@ -1018,15 +1016,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - return ret; +unlock: + sch_tree_unlock(sch); -get_table_failure: - /* recover clg and loss_model, in case of - * q->clg and q->loss_model were modified - * in get_loss_clg() - */ - q->clg = old_clg; - q->loss_model = old_loss_model; +table_free: + dist_free(delay_dist); + dist_free(slot_dist); return ret; } diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 5619d2eb17b6..4ddb4af61d10 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -214,7 +214,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .priv_size = sizeof(struct plug_sched_data), .enqueue = plug_enqueue, .dequeue = plug_dequeue, - .peek = qdisc_peek_head, + .peek = qdisc_peek_dequeued, .init = plug_init, .change = plug_change, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 20dc1851d4ff..4f246599734e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -387,8 +387,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); + struct qfq_aggregate *new_agg; + /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ + if (lmax > (1UL << QFQ_MTU_SHIFT)) + return -EINVAL; + + new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) @@ -433,15 +438,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } else weight = 1; - if (tb[TCA_QFQ_LMAX]) { + if (tb[TCA_QFQ_LMAX]) lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - } else + else lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { + pr_notice("qfq: invalid max length %u\n", lmax); + return -EINVAL; + } + inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; @@ -982,10 +988,13 @@ static void qfq_update_eligible(struct qfq_sched *q) } /* Dequeue head packet of the head class in the DRR queue of the aggregate. */ -static void agg_dequeue(struct qfq_aggregate *agg, - struct qfq_class *cl, unsigned int len) +static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, + struct qfq_class *cl, unsigned int len) { - qdisc_dequeue_peeked(cl->qdisc); + struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc); + + if (!skb) + return NULL; cl->deficit -= (int) len; @@ -995,6 +1004,8 @@ static void agg_dequeue(struct qfq_aggregate *agg, cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } + + return skb; } static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, @@ -1140,11 +1151,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; - qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; + + skb = agg_dequeue(in_serv_agg, cl, len); + + if (!skb) { + sch->q.qlen++; + return NULL; + } + + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); - agg_dequeue(in_serv_agg, cl, len); /* If lmax is lowered, through qfq_change_class, for a class * owning pending packets with larger size than the new value * of lmax, then the following condition may hold. diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 1bfdf90fa0cc..07721a1e98d8 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -353,7 +353,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; - int uninitialized_var(ret); + int ret; struct sk_buff *head; int delta; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index d17708800652..78c1429d1301 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1181,8 +1181,7 @@ int sctp_assoc_update(struct sctp_association *asoc, /* Add any peer addresses from the new association. */ list_for_each_entry(trans, &new->peer.transport_addr_list, transports) - if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) && - !sctp_assoc_add_peer(asoc, &trans->ipaddr, + if (!sctp_assoc_add_peer(asoc, &trans->ipaddr, GFP_ATOMIC, trans->state)) return -ENOMEM; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f8a283245672..d723942e5e65 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -88,6 +88,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 82d96441e64d..c4a2d647e6cc 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1255,7 +1255,10 @@ static int sctp_side_effects(enum sctp_event event_type, default: pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); - BUG(); + error = status; + if (error >= 0) + error = -EINVAL; + WARN_ON_ONCE(1); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3d52431dea9b..8298f27e8de0 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4392,7 +4392,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) - return -ENOMEM; + return SCTP_DISPOSITION_NOMEM; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 8901bb7afa2b..f954d3c8876d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -112,7 +112,7 @@ struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { - sctp_memory_pressure = 1; + WRITE_ONCE(sctp_memory_pressure, 1); } @@ -380,9 +380,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp) struct net *net = sock_net(&sp->inet.sk); if (net->sctp.default_auto_asconf) { - spin_lock(&net->sctp.addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); - spin_unlock(&net->sctp.addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); sp->do_auto_asconf = 1; } } @@ -1953,6 +1953,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) goto err; + if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { + err = -EINVAL; + goto err; + } } if (sctp_state(asoc, CLOSED)) { @@ -2574,6 +2578,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if (trans) { trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval); + sctp_transport_reset_hb_timer(trans); } else if (asoc) { asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval); @@ -4824,13 +4829,17 @@ static void sctp_destroy_sock(struct sock *sk) } /* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk); /* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +} +static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); } @@ -8760,7 +8769,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@ -8980,11 +8989,20 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) -#include <net/transp_v6.h> -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) +{ + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; } struct proto sctpv6_prot = { @@ -8994,8 +9012,8 @@ struct proto sctpv6_prot = { .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index 0a78cdf86463..3290e6f5b6c6 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -1151,7 +1151,8 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn) #define _sctp_walk_ifwdtsn(pos, chunk, end) \ for (pos = chunk->subh.ifwdtsn_hdr->skip; \ - (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++) + (void *)pos <= (void *)chunk->subh.ifwdtsn_hdr->skip + (end) - \ + sizeof(struct sctp_ifwdtsn_skip); pos++) #define sctp_walk_ifwdtsn(pos, ch) \ _sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index dcd00b514c3f..ad0ac657fe12 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -141,34 +141,34 @@ static int smc_release(struct socket *sock) if (!smc->use_fallback) { rc = smc_close_active(smc); - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; - } - - sk->sk_prot->unhash(sk); - - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); } + sk->sk_prot->unhash(sk); + + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + release_sock(sk); + smc_clcsock_release(smc); + lock_sock(sk); + } + if (!smc->use_fallback) + smc_conn_free(&smc->conn); + } + /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk); sock_put(sk); /* final sock_put */ @@ -852,7 +852,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); + smc_sock_set_flag(new_sk, SOCK_DEAD); sock_put(new_sk); /* final */ *new_smc = NULL; goto out; @@ -1013,13 +1013,13 @@ static void smc_listen_out(struct smc_sock *new_smc) struct smc_sock *lsmc = new_smc->listen_smc; struct sock *newsmcsk = &new_smc->sk; - lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); + release_sock(&lsmc->sk); } else { /* no longer listening */ smc_close_non_accepted(newsmcsk); } - release_sock(&lsmc->sk); /* Wake up accept */ lsmc->sk.sk_data_ready(&lsmc->sk); @@ -1215,6 +1215,9 @@ static void smc_listen_work(struct work_struct *work) int rc = 0; u8 ibport; + if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) + return smc_listen_out_err(new_smc); + if (new_smc->use_fallback) { smc_listen_out_connected(new_smc); return; diff --git a/net/smc/smc.h b/net/smc/smc.h index adbdf195eb08..c3b0e1e3f505 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -268,4 +268,9 @@ static inline bool using_ipsec(struct smc_sock *smc) struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); +static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) +{ + set_bit(flag, &sk->sk_flags); +} + #endif /* __SMC_H */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 333e4353498f..c657fd29ff5d 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -304,7 +304,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_shutdown |= RCV_SHUTDOWN; if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(&smc->sk, SOCK_DONE); + smc_sock_set_flag(&smc->sk, SOCK_DONE); sock_hold(&smc->sk); /* sock_put in close_work */ if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 092696d738c0..4ea28ec7ad13 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -21,6 +21,22 @@ #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) +/* release the clcsock that is assigned to the smc_sock */ +void smc_clcsock_release(struct smc_sock *smc) +{ + struct socket *tcp; + + if (smc->listen_smc && current_work() != &smc->smc_listen_work) + cancel_work_sync(&smc->smc_listen_work); + mutex_lock(&smc->clcsock_release_lock); + if (smc->clcsock) { + tcp = smc->clcsock; + smc->clcsock = NULL; + sock_release(tcp); + } + mutex_unlock(&smc->clcsock_release_lock); +} + static void smc_close_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -148,7 +164,7 @@ static void smc_close_active_abort(struct smc_sock *smc) break; } - sock_set_flag(sk, SOCK_DEAD); + smc_sock_set_flag(sk, SOCK_DEAD); sk->sk_state_change(sk); } @@ -331,6 +347,7 @@ static void smc_close_passive_work(struct work_struct *work) close_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); struct smc_cdc_conn_state_flags *rxflags; + bool release_clcsock = false; struct sock *sk = &smc->sk; int old_state; @@ -415,10 +432,15 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) + release_clcsock = true; + } } release_sock(sk); + if (release_clcsock) + smc_clcsock_release(smc); sock_put(sk); /* sock_hold done by schedulers of close_work */ } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 19eb6a211c23..e0e3b5df25d2 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); +void smc_clcsock_release(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ diff --git a/net/socket.c b/net/socket.c index a5167f03c31d..49ac98cfda42 100644 --- a/net/socket.c +++ b/net/socket.c @@ -90,6 +90,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/nospec.h> +#include <linux/indirect_call_wrapper.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -108,6 +109,13 @@ #include <net/busy_poll.h> #include <linux/errqueue.h> +/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */ +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__) +#endif + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; @@ -637,29 +645,50 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -/** - * sock_sendmsg - send a message through @sock - * @sock: socket - * @msg: message to send - * - * Sends @msg through @sock, passing through LSM. - * Returns the number of bytes sent, or an error code. - */ - +INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, + size_t)); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); + int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock, + msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } -int sock_sendmsg(struct socket *sock, struct msghdr *msg) +static int __sock_sendmsg(struct socket *sock, struct msghdr *msg) { int err = security_socket_sendmsg(sock, msg, msg_data_left(msg)); return err ?: sock_sendmsg_nosec(sock, msg); } + +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ +int sock_sendmsg(struct socket *sock, struct msghdr *msg) +{ + struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name; + struct sockaddr_storage address; + int save_len = msg->msg_namelen; + int ret; + + if (msg->msg_name) { + memcpy(&address, msg->msg_name, msg->msg_namelen); + msg->msg_name = &address; + } + + ret = __sock_sendmsg(sock, msg); + msg->msg_name = save_addr; + msg->msg_namelen = save_len; + + return ret; +} EXPORT_SYMBOL(sock_sendmsg); /** @@ -843,6 +872,15 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, + size_t , int )); +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + int flags) +{ + return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg, + msg_data_left(msg), flags); +} + /** * sock_recvmsg - receive a message from @sock * @sock: socket @@ -852,13 +890,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); * Receives @msg from @sock, passing through LSM. Returns the total number * of bytes received, or an error. */ - -static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - int flags) -{ - return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); -} - int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) { int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); @@ -963,7 +994,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = sock_sendmsg(sock, &msg); + res = __sock_sendmsg(sock, &msg); *from = msg.msg_iter; return res; } @@ -1896,7 +1927,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg); + err = __sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -2224,7 +2255,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys); + err = __sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. @@ -2555,7 +2586,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, * error to return on the next call or if the * app asks about it using getsockopt(SO_ERROR). */ - sock->sk->sk_err = -err; + WRITE_ONCE(sock->sk->sk_err, -err); } out_put: fput_light(sock->file, fput_needed); @@ -3398,7 +3429,11 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { - return sock->ops->bind(sock, addr, addrlen); + struct sockaddr_storage address; + + memcpy(&address, addr, addrlen); + + return sock->ops->bind(sock, (struct sockaddr *)&address, addrlen); } EXPORT_SYMBOL(kernel_bind); @@ -3468,7 +3503,11 @@ EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { - return sock->ops->connect(sock, addr, addrlen, flags); + struct sockaddr_storage address; + + memcpy(&address, addr, addrlen); + + return sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, flags); } EXPORT_SYMBOL(kernel_connect); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e61c48c1b37d..c11e68539602 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -323,7 +323,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; - if (auth && pos->auth->service != auth->service) + if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); dprintk("RPC: %s found msg %p\n", __func__, pos); @@ -677,6 +677,21 @@ out: return err; } +static struct gss_upcall_msg * +gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) +{ + struct gss_upcall_msg *pos; + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; + if (!rpc_msg_is_inflight(&pos->msg)) + continue; + refcount_inc(&pos->count); + return pos; + } + return NULL; +} + #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -723,7 +738,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid, NULL); + gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d9d03881e4de..ed6b2a155f44 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1088,18 +1088,23 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, return res; inlen = svc_getnl(argv); - if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) + if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) { + kfree(in_handle->data); return SVC_DENIED; + } pages = DIV_ROUND_UP(inlen, PAGE_SIZE); in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL); - if (!in_token->pages) + if (!in_token->pages) { + kfree(in_handle->data); return SVC_DENIED; + } in_token->page_base = 0; in_token->page_len = inlen; for (i = 0; i < pages; i++) { in_token->pages[i] = alloc_page(GFP_KERNEL); if (!in_token->pages[i]) { + kfree(in_handle->data); gss_free_in_token_pages(in_token); return SVC_DENIED; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0d7d149b1b1b..e5498253ad93 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1267,7 +1267,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, break; default: err = -EAFNOSUPPORT; - goto out; + goto out_release; } if (err < 0) { dprintk("RPC: can't bind UDP socket (%d)\n", err); @@ -1814,9 +1814,6 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } - if (task->tk_rebind_retry == 0) - break; - task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e36ae4d4b540..9af919364a00 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -709,7 +709,6 @@ rpc_init_task_statistics(struct rpc_task *task) /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_rebind_retry = 2; /* starting timestamp */ task->tk_start = ktime_get(); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d0b5a1c47a32..b5ee21d5d1f3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -757,12 +757,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) { - /* Refer to svc_setup_socket() for details. */ - rmb(); - svsk->sk_odata(sk); - } - /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -771,15 +765,20 @@ static void svc_tcp_listen_data_ready(struct sock *sk) * when one of child sockets become ESTABLISHED. * 2) data_ready method of the child socket may be called * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. + * In case of 2, we should ignore it silently and DO NOT + * dereference svsk. */ - if (sk->sk_state == TCP_LISTEN) { - if (svsk) { - set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } else - printk("svc: socket %p: no user data\n", sk); - } + if (sk->sk_state != TCP_LISTEN) + return; + + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); + svsk->sk_odata(sk); + set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); + svc_xprt_enqueue(&svsk->sk_xprt); + } else + printk("svc: socket %p: no user data\n", sk); } /* diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 0f970259d0d5..4353968bc5a5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1128,7 +1128,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; @@ -1177,7 +1177,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) int err; char *name; struct tipc_media *m; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; diff --git a/net/tipc/link.c b/net/tipc/link.c index 0d2ee4eb131f..ee4aca974622 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1595,7 +1595,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 9b36163d951e..bf11d57ef3ae 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -87,7 +87,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING, + [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_LINK_NAME }, [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, @@ -116,7 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, - [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING, .len = TIPC_MAX_BEARER_NAME }, [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 59e8e17d8da9..2276a0704a63 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -101,6 +101,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) return -EMSGSIZE; skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); tlv->tlv_type = htons(type); tlv->tlv_len = htons(TLV_LENGTH(len)); if (len && data) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8266452c143b..c83eaa718369 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -388,7 +388,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) rc_ = tipc_sk_sock_err((sock_), timeo_); \ if (rc_) \ break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + add_wait_queue(sk_sleep(sk_), &wait_); \ release_sock(sk_); \ *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ sched_annotate_sleep(); \ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e79c32942796..0632b494d329 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -536,7 +536,7 @@ static void unix_release_sock(struct sock *sk, int embrion) /* Clear state */ unix_state_lock(sk); sock_orphan(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); path = u->path; u->path.dentry = NULL; u->path.mnt = NULL; @@ -554,7 +554,7 @@ static void unix_release_sock(struct sock *sk, int embrion) if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); /* No more writes */ - skpair->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; unix_state_unlock(skpair); @@ -594,7 +594,7 @@ static void unix_release_sock(struct sock *sk, int embrion) * What the above comment does talk about? --ANK(980817) */ - if (unix_tot_inflight) + if (READ_ONCE(unix_tot_inflight)) unix_gc(); /* Garbage collect fds */ } @@ -706,7 +706,7 @@ static int unix_set_peek_off(struct sock *sk, int val) if (mutex_lock_interruptible(&u->iolock)) return -EINTR; - sk->sk_peek_off = val; + WRITE_ONCE(sk->sk_peek_off, val); mutex_unlock(&u->iolock); return 0; @@ -1232,7 +1232,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && - unix_recvq_full(other); + unix_recvq_full_lockless(other); unix_state_unlock(other); @@ -1984,6 +1984,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, if (false) { alloc_skb: + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, @@ -2023,6 +2024,7 @@ alloc_skb: init_scm = false; } + spin_lock(&other->sk_receive_queue.lock); skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; @@ -2053,14 +2055,11 @@ alloc_skb: refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { - err = unix_scm_to_skb(&scm, skb, false); - if (err) - goto err_state_unlock; - spin_lock(&other->sk_receive_queue.lock); + unix_scm_to_skb(&scm, skb, false); __skb_queue_tail(&other->sk_receive_queue, newskb); - spin_unlock(&other->sk_receive_queue.lock); } + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); @@ -2551,7 +2550,7 @@ static int unix_shutdown(struct socket *sock, int mode) ++mode; unix_state_lock(sk); - sk->sk_shutdown |= mode; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode); other = unix_peer(sk); if (other) sock_hold(other); @@ -2568,7 +2567,7 @@ static int unix_shutdown(struct socket *sock, int mode) if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; unix_state_lock(other); - other->sk_shutdown |= peer_mode; + WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode); unix_state_unlock(other); other->sk_state_change(other); if (peer_mode == SHUTDOWN_MASK) @@ -2687,16 +2686,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa { struct sock *sk = sock->sk; __poll_t mask; + u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; + shutdown = READ_ONCE(sk->sk_shutdown); /* exceptional events? */ if (sk->sk_err) mask |= EPOLLERR; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; /* readable? */ @@ -2724,18 +2725,20 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int writable; __poll_t mask; + u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; + shutdown = READ_ONCE(sk->sk_shutdown); /* exceptional events? */ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; - if (sk->sk_shutdown == SHUTDOWN_MASK) + if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ diff --git a/net/unix/scm.c b/net/unix/scm.c index a07b2efbf8b5..ac206bfdbbe3 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -59,7 +59,7 @@ void unix_inflight(struct user_struct *user, struct file *fp) /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } - user->unix_inflight++; + WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1); spin_unlock(&unix_gc_lock); } @@ -80,7 +80,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) /* Paired with READ_ONCE() in wait_for_unix_gc() */ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } - user->unix_inflight--; + WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1); spin_unlock(&unix_gc_lock); } @@ -94,7 +94,7 @@ static inline bool too_many_unix_fds(struct task_struct *p) { struct user_struct *user = current_user(); - if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); return false; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d55a47858d6d..0dfa2dfcb4bc 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1240,7 +1240,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, vsock_transport_cancel_pkt(vsk); vsock_remove_connected(vsk); goto out_wait; - } else if (timeout == 0) { + } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) { err = -ETIMEDOUT; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 449b5261e661..187d0f7253a6 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -348,7 +348,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; s64 bytes; - bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 42ab3e2ac060..2a8127f245e8 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1733,7 +1733,11 @@ static int vmci_transport_dgram_enqueue( if (!dg) return -ENOMEM; - memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); + err = memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); + if (err) { + kfree(dg); + return err; + } dg->dst = vmci_make_handle(remote_addr->svm_cid, remote_addr->svm_port); diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..0d50369bede9 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 534f57363f4a..e33c1175b158 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6504,7 +6504,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct mesh_config cfg; + struct mesh_config cfg = {}; u32 mask; int err; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 07d053603e3a..beba41f8a178 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3918,8 +3918,10 @@ static int __init regulatory_init_db(void) return -EINVAL; err = load_builtin_regdb_keys(); - if (err) + if (err) { + platform_device_unregister(reg_pdev); return err; + } /* We always try to get an update for the static regdomain */ err = regulatory_hint_core(cfg80211_world_regdom->alpha2); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 9d8b106deb0b..ebc73faa8fb1 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -269,6 +269,15 @@ void cfg80211_conn_work(struct work_struct *work) rtnl_unlock(); } +static void cfg80211_step_auth_next(struct cfg80211_conn *conn, + struct cfg80211_bss *bss) +{ + memcpy(conn->bssid, bss->bssid, ETH_ALEN); + conn->params.bssid = conn->bssid; + conn->params.channel = bss->channel; + conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; +} + /* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { @@ -286,10 +295,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) if (!bss) return NULL; - memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN); - wdev->conn->params.bssid = wdev->conn->bssid; - wdev->conn->params.channel = bss->channel; - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; + cfg80211_step_auth_next(wdev->conn, bss); schedule_work(&rdev->conn_work); return bss; @@ -568,7 +574,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, wdev->conn->params.ssid_len = wdev->ssid_len; /* see if we have the bss already */ - bss = cfg80211_get_conn_bss(wdev); + bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, + wdev->conn->params.bssid, + wdev->conn->params.ssid, + wdev->conn->params.ssid_len, + wdev->conn_bss_type, + IEEE80211_PRIVACY(wdev->conn->params.privacy)); if (prev_bssid) { memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); @@ -579,6 +590,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, if (bss) { enum nl80211_timeout_reason treason; + cfg80211_step_auth_next(wdev->conn, bss); err = cfg80211_conn_do_work(wdev, &treason); cfg80211_put_bss(wdev->wiphy, bss); } else { @@ -1207,6 +1219,13 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, } else { if (WARN_ON(connkeys)) return -EINVAL; + + /* connect can point to wdev->wext.connect which + * can hold key data from a previous connection + */ + connect->key = NULL; + connect->key_len = 0; + connect->key_idx = 0; } wdev->connect_keys = connkeys; diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 76a80a41615b..a57f54bc0e1a 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -796,6 +796,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } } + /* Sanity-check to ensure we never end up _allocating_ zero + * bytes of data for extra. + */ + if (extra_size <= 0) + return -EFAULT; + /* kzalloc() ensures NULL-termination for essid_compat. */ extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 77d8adb27ec7..9d0328bb30ca 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -497,6 +497,12 @@ static int x25_listen(struct socket *sock, int backlog) int rc = -EOPNOTSUPP; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + rc = -EINVAL; + release_sock(sk); + return rc; + } + if (sk->sk_state != TCP_LISTEN) { memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index fbc4552d17b8..6e5e307f985e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,6 +3,8 @@ # Makefile for the XFRM subsystem. # +xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o + obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ xfrm_sysctl.o xfrm_replay.o xfrm_device.o diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface_core.c index 3c642328a117..10fa26103bdf 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface_core.c @@ -219,8 +219,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) skb->dev = dev; if (err) { - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_dropped); return 0; } @@ -260,7 +260,6 @@ static int xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); unsigned int length = skb->len; struct net_device *tdev; @@ -286,7 +285,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) tdev = dst->dev; if (tdev == dev) { - stats->collisions++; + DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", dev->name); goto tx_err_dst_release; @@ -329,13 +328,13 @@ xmit: tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); } else { - stats->tx_errors++; - stats->tx_aborted_errors++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_aborted_errors); } return 0; tx_err_link_failure: - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); @@ -345,7 +344,6 @@ tx_err_dst_release: static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; @@ -354,23 +352,23 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IPV6): - xfrm_decode_session(skb, &fl, AF_INET6); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + xfrm_decode_session(skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); if (dst->error) { dst_release(dst); - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, dst); } break; case htons(ETH_P_IP): - xfrm_decode_session(skb, &fl, AF_INET); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + xfrm_decode_session(skb, &fl, AF_INET); if (!dst) { struct rtable *rt; @@ -378,7 +376,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); if (IS_ERR(rt)) { - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, &rt->dst); @@ -397,8 +395,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; tx_err: - stats->tx_errors++; - stats->tx_dropped++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1cd21a8c4dea..c8a7a5739425 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -654,8 +654,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild); * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { - static u32 idx_generator; - for (;;) { struct hlist_head *list; struct xfrm_policy *p; @@ -663,8 +661,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) int found; if (!index) { - idx = (idx_generator | dir); - idx_generator += 8; + idx = (net->xfrm.idx_generator | dir); + net->xfrm.idx_generator += 8; } else { idx = index; index = 0; @@ -2240,7 +2238,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) static inline int xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, - unsigned short family) + unsigned short family, u32 if_id) { if (xfrm_state_kern(x)) return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); @@ -2251,7 +2249,8 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && - xfrm_state_addr_cmp(tmpl, x, family)); + xfrm_state_addr_cmp(tmpl, x, family)) && + (if_id == 0 || if_id == x->if_id); } /* @@ -2263,7 +2262,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, */ static inline int xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, - unsigned short family) + unsigned short family, u32 if_id) { int idx = start; @@ -2273,7 +2272,7 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star } else start = -1; for (; idx < sp->len; idx++) { - if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) + if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { if (start == -1) @@ -2450,7 +2449,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * are implied between each two transformations. */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(tpp[i], sp, k, family); + k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) /* "-2 - errored_index" returned */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 94c7ebc26c48..699e544b4bfd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -521,7 +521,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; - if (re) { + if (re && x->replay_esn && x->preplay_esn) { struct xfrm_replay_state_esn *replay_esn; replay_esn = nla_data(re); memcpy(x->replay_esn, replay_esn, @@ -1036,6 +1036,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; + + /* see addr_match(), (prefix length >> 5) << 2 + * will be used to compare xfrm_address_t + */ + if (filter->splen > (sizeof(xfrm_address_t) << 3) || + filter->dplen > (sizeof(xfrm_address_t) << 3)) { + kfree(filter); + return -EINVAL; + } } if (attrs[XFRMA_PROTO]) @@ -2573,7 +2582,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, - [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) }, [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, |